{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 34251, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.9196227847362122e-05, "grad_norm": 1.8228937007168902, "learning_rate": 1.4594279042615296e-09, "loss": 1.1658, "step": 1 }, { "epoch": 5.8392455694724244e-05, "grad_norm": 1.9016710047704566, "learning_rate": 2.9188558085230592e-09, "loss": 1.0618, "step": 2 }, { "epoch": 8.758868354208637e-05, "grad_norm": 1.814878309993498, "learning_rate": 4.378283712784589e-09, "loss": 1.1539, "step": 3 }, { "epoch": 0.00011678491138944849, "grad_norm": 1.9244587775888444, "learning_rate": 5.8377116170461185e-09, "loss": 1.2509, "step": 4 }, { "epoch": 0.0001459811392368106, "grad_norm": 1.8139895088972, "learning_rate": 7.297139521307648e-09, "loss": 1.2492, "step": 5 }, { "epoch": 0.00017517736708417273, "grad_norm": 1.8531900994800066, "learning_rate": 8.756567425569178e-09, "loss": 1.3522, "step": 6 }, { "epoch": 0.00020437359493153485, "grad_norm": 2.0925291872679797, "learning_rate": 1.0215995329830708e-08, "loss": 1.3484, "step": 7 }, { "epoch": 0.00023356982277889697, "grad_norm": 1.6900173198201776, "learning_rate": 1.1675423234092237e-08, "loss": 1.1201, "step": 8 }, { "epoch": 0.0002627660506262591, "grad_norm": 1.8087717924775721, "learning_rate": 1.3134851138353766e-08, "loss": 1.1899, "step": 9 }, { "epoch": 0.0002919622784736212, "grad_norm": 2.145936003549506, "learning_rate": 1.4594279042615296e-08, "loss": 1.2931, "step": 10 }, { "epoch": 0.00032115850632098334, "grad_norm": 1.921602603681621, "learning_rate": 1.6053706946876825e-08, "loss": 1.0623, "step": 11 }, { "epoch": 0.00035035473416834546, "grad_norm": 1.7110940077622523, "learning_rate": 1.7513134851138356e-08, "loss": 1.1181, "step": 12 }, { "epoch": 0.0003795509620157076, "grad_norm": 2.139612591697966, "learning_rate": 1.8972562755399884e-08, "loss": 1.2314, "step": 13 }, { "epoch": 0.0004087471898630697, "grad_norm": 2.247054726655143, "learning_rate": 2.0431990659661415e-08, "loss": 1.2128, "step": 14 }, { "epoch": 0.00043794341771043183, "grad_norm": 1.9155506178998558, "learning_rate": 2.1891418563922943e-08, "loss": 1.1209, "step": 15 }, { "epoch": 0.00046713964555779395, "grad_norm": 1.732013265511219, "learning_rate": 2.3350846468184474e-08, "loss": 1.2032, "step": 16 }, { "epoch": 0.000496335873405156, "grad_norm": 1.6668026542753926, "learning_rate": 2.4810274372446005e-08, "loss": 1.0674, "step": 17 }, { "epoch": 0.0005255321012525182, "grad_norm": 1.889124029366496, "learning_rate": 2.6269702276707533e-08, "loss": 1.1875, "step": 18 }, { "epoch": 0.0005547283290998803, "grad_norm": 1.8937221186198312, "learning_rate": 2.7729130180969064e-08, "loss": 1.2766, "step": 19 }, { "epoch": 0.0005839245569472424, "grad_norm": 1.8891047860424008, "learning_rate": 2.918855808523059e-08, "loss": 1.2393, "step": 20 }, { "epoch": 0.0006131207847946045, "grad_norm": 1.8366528303899932, "learning_rate": 3.064798598949212e-08, "loss": 1.2225, "step": 21 }, { "epoch": 0.0006423170126419667, "grad_norm": 2.00822558408225, "learning_rate": 3.210741389375365e-08, "loss": 1.27, "step": 22 }, { "epoch": 0.0006715132404893287, "grad_norm": 1.8623544055986743, "learning_rate": 3.356684179801518e-08, "loss": 1.2416, "step": 23 }, { "epoch": 0.0007007094683366909, "grad_norm": 1.7347657300478707, "learning_rate": 3.502626970227671e-08, "loss": 1.1254, "step": 24 }, { "epoch": 0.000729905696184053, "grad_norm": 1.9979392503650237, "learning_rate": 3.6485697606538244e-08, "loss": 1.1457, "step": 25 }, { "epoch": 0.0007591019240314152, "grad_norm": 2.104735316240025, "learning_rate": 3.794512551079977e-08, "loss": 1.3405, "step": 26 }, { "epoch": 0.0007882981518787772, "grad_norm": 2.2933540802391676, "learning_rate": 3.94045534150613e-08, "loss": 1.3655, "step": 27 }, { "epoch": 0.0008174943797261394, "grad_norm": 1.7775501135030372, "learning_rate": 4.086398131932283e-08, "loss": 1.1558, "step": 28 }, { "epoch": 0.0008466906075735015, "grad_norm": 1.9844618090089563, "learning_rate": 4.232340922358436e-08, "loss": 1.1716, "step": 29 }, { "epoch": 0.0008758868354208637, "grad_norm": 1.8553354182242179, "learning_rate": 4.3782837127845886e-08, "loss": 1.2205, "step": 30 }, { "epoch": 0.0009050830632682257, "grad_norm": 1.746474986339343, "learning_rate": 4.5242265032107417e-08, "loss": 1.1395, "step": 31 }, { "epoch": 0.0009342792911155879, "grad_norm": 1.80409802122569, "learning_rate": 4.670169293636895e-08, "loss": 1.2039, "step": 32 }, { "epoch": 0.00096347551896295, "grad_norm": 1.915942259545171, "learning_rate": 4.816112084063048e-08, "loss": 1.3288, "step": 33 }, { "epoch": 0.000992671746810312, "grad_norm": 1.8990281251365397, "learning_rate": 4.962054874489201e-08, "loss": 1.1914, "step": 34 }, { "epoch": 0.0010218679746576743, "grad_norm": 2.048501015844779, "learning_rate": 5.1079976649153534e-08, "loss": 1.3561, "step": 35 }, { "epoch": 0.0010510642025050364, "grad_norm": 1.8117002459833338, "learning_rate": 5.2539404553415065e-08, "loss": 1.1978, "step": 36 }, { "epoch": 0.0010802604303523985, "grad_norm": 2.1679407133910087, "learning_rate": 5.3998832457676596e-08, "loss": 1.4075, "step": 37 }, { "epoch": 0.0011094566581997605, "grad_norm": 1.8288538783863846, "learning_rate": 5.545826036193813e-08, "loss": 1.1017, "step": 38 }, { "epoch": 0.0011386528860471228, "grad_norm": 1.7925259254619377, "learning_rate": 5.691768826619965e-08, "loss": 1.2208, "step": 39 }, { "epoch": 0.0011678491138944849, "grad_norm": 1.8027576309012383, "learning_rate": 5.837711617046118e-08, "loss": 1.0399, "step": 40 }, { "epoch": 0.001197045341741847, "grad_norm": 2.025779491587184, "learning_rate": 5.983654407472271e-08, "loss": 1.2353, "step": 41 }, { "epoch": 0.001226241569589209, "grad_norm": 1.762532246396644, "learning_rate": 6.129597197898424e-08, "loss": 1.1793, "step": 42 }, { "epoch": 0.0012554377974365713, "grad_norm": 1.6252993031072844, "learning_rate": 6.275539988324578e-08, "loss": 1.1728, "step": 43 }, { "epoch": 0.0012846340252839334, "grad_norm": 1.8925229526084895, "learning_rate": 6.42148277875073e-08, "loss": 1.2091, "step": 44 }, { "epoch": 0.0013138302531312954, "grad_norm": 2.1760296867781808, "learning_rate": 6.567425569176884e-08, "loss": 1.3169, "step": 45 }, { "epoch": 0.0013430264809786575, "grad_norm": 2.065895318399194, "learning_rate": 6.713368359603036e-08, "loss": 1.1769, "step": 46 }, { "epoch": 0.0013722227088260198, "grad_norm": 1.817002890783188, "learning_rate": 6.859311150029189e-08, "loss": 1.096, "step": 47 }, { "epoch": 0.0014014189366733818, "grad_norm": 1.9304853338473635, "learning_rate": 7.005253940455342e-08, "loss": 1.2665, "step": 48 }, { "epoch": 0.001430615164520744, "grad_norm": 1.7078528862685147, "learning_rate": 7.151196730881495e-08, "loss": 1.1364, "step": 49 }, { "epoch": 0.001459811392368106, "grad_norm": 1.869623853140318, "learning_rate": 7.297139521307649e-08, "loss": 1.2008, "step": 50 }, { "epoch": 0.0014890076202154683, "grad_norm": 2.0812385950723384, "learning_rate": 7.443082311733801e-08, "loss": 1.3668, "step": 51 }, { "epoch": 0.0015182038480628303, "grad_norm": 2.0015589071341586, "learning_rate": 7.589025102159954e-08, "loss": 1.2693, "step": 52 }, { "epoch": 0.0015474000759101924, "grad_norm": 1.8436615252511008, "learning_rate": 7.734967892586107e-08, "loss": 1.2004, "step": 53 }, { "epoch": 0.0015765963037575545, "grad_norm": 4.894789532601757, "learning_rate": 7.88091068301226e-08, "loss": 1.2079, "step": 54 }, { "epoch": 0.0016057925316049165, "grad_norm": 1.9502709218525864, "learning_rate": 8.026853473438412e-08, "loss": 1.1784, "step": 55 }, { "epoch": 0.0016349887594522788, "grad_norm": 1.8820425902414597, "learning_rate": 8.172796263864566e-08, "loss": 1.2374, "step": 56 }, { "epoch": 0.0016641849872996409, "grad_norm": 1.966200631796313, "learning_rate": 8.318739054290718e-08, "loss": 1.2868, "step": 57 }, { "epoch": 0.001693381215147003, "grad_norm": 1.9426057311854459, "learning_rate": 8.464681844716872e-08, "loss": 1.1858, "step": 58 }, { "epoch": 0.001722577442994365, "grad_norm": 1.7248013090719212, "learning_rate": 8.610624635143025e-08, "loss": 1.1579, "step": 59 }, { "epoch": 0.0017517736708417273, "grad_norm": 1.8392664649568278, "learning_rate": 8.756567425569177e-08, "loss": 1.2081, "step": 60 }, { "epoch": 0.0017809698986890894, "grad_norm": 1.7666046887605238, "learning_rate": 8.902510215995331e-08, "loss": 1.2319, "step": 61 }, { "epoch": 0.0018101661265364514, "grad_norm": 1.7910078534699576, "learning_rate": 9.048453006421483e-08, "loss": 1.2267, "step": 62 }, { "epoch": 0.0018393623543838135, "grad_norm": 2.01465950393011, "learning_rate": 9.194395796847637e-08, "loss": 1.2925, "step": 63 }, { "epoch": 0.0018685585822311758, "grad_norm": 2.0406669828728115, "learning_rate": 9.34033858727379e-08, "loss": 1.2455, "step": 64 }, { "epoch": 0.0018977548100785379, "grad_norm": 1.9822583434286631, "learning_rate": 9.486281377699942e-08, "loss": 1.3059, "step": 65 }, { "epoch": 0.0019269510379259, "grad_norm": 2.0569068211175883, "learning_rate": 9.632224168126096e-08, "loss": 1.1668, "step": 66 }, { "epoch": 0.001956147265773262, "grad_norm": 1.6483683097283344, "learning_rate": 9.778166958552248e-08, "loss": 1.0333, "step": 67 }, { "epoch": 0.001985343493620624, "grad_norm": 1.8502150205434755, "learning_rate": 9.924109748978402e-08, "loss": 1.2295, "step": 68 }, { "epoch": 0.002014539721467986, "grad_norm": 2.0258141873566045, "learning_rate": 1.0070052539404554e-07, "loss": 1.3349, "step": 69 }, { "epoch": 0.0020437359493153486, "grad_norm": 1.886104107611061, "learning_rate": 1.0215995329830707e-07, "loss": 1.1798, "step": 70 }, { "epoch": 0.0020729321771627107, "grad_norm": 1.8379019089934094, "learning_rate": 1.036193812025686e-07, "loss": 1.1411, "step": 71 }, { "epoch": 0.0021021284050100728, "grad_norm": 1.7408541036622467, "learning_rate": 1.0507880910683013e-07, "loss": 1.2103, "step": 72 }, { "epoch": 0.002131324632857435, "grad_norm": 1.8097953423939064, "learning_rate": 1.0653823701109166e-07, "loss": 1.1399, "step": 73 }, { "epoch": 0.002160520860704797, "grad_norm": 1.7739161611471337, "learning_rate": 1.0799766491535319e-07, "loss": 1.242, "step": 74 }, { "epoch": 0.002189717088552159, "grad_norm": 1.757090818203994, "learning_rate": 1.0945709281961472e-07, "loss": 1.0182, "step": 75 }, { "epoch": 0.002218913316399521, "grad_norm": 1.75164561379449, "learning_rate": 1.1091652072387625e-07, "loss": 1.1644, "step": 76 }, { "epoch": 0.002248109544246883, "grad_norm": 1.8543948158686046, "learning_rate": 1.1237594862813778e-07, "loss": 1.2115, "step": 77 }, { "epoch": 0.0022773057720942456, "grad_norm": 1.8538155498424531, "learning_rate": 1.138353765323993e-07, "loss": 1.2556, "step": 78 }, { "epoch": 0.0023065019999416077, "grad_norm": 1.8478920929432194, "learning_rate": 1.1529480443666084e-07, "loss": 1.2425, "step": 79 }, { "epoch": 0.0023356982277889697, "grad_norm": 1.7681192377367072, "learning_rate": 1.1675423234092237e-07, "loss": 1.113, "step": 80 }, { "epoch": 0.002364894455636332, "grad_norm": 1.8392202029123048, "learning_rate": 1.182136602451839e-07, "loss": 1.2544, "step": 81 }, { "epoch": 0.002394090683483694, "grad_norm": 1.7057092795895912, "learning_rate": 1.1967308814944543e-07, "loss": 1.083, "step": 82 }, { "epoch": 0.002423286911331056, "grad_norm": 1.7849947340014902, "learning_rate": 1.2113251605370697e-07, "loss": 1.1347, "step": 83 }, { "epoch": 0.002452483139178418, "grad_norm": 1.7688544388597331, "learning_rate": 1.2259194395796848e-07, "loss": 1.2108, "step": 84 }, { "epoch": 0.00248167936702578, "grad_norm": 1.9149703899578265, "learning_rate": 1.2405137186223001e-07, "loss": 1.1186, "step": 85 }, { "epoch": 0.0025108755948731426, "grad_norm": 2.0276863271836403, "learning_rate": 1.2551079976649155e-07, "loss": 1.1657, "step": 86 }, { "epoch": 0.0025400718227205047, "grad_norm": 1.8555974379529225, "learning_rate": 1.2697022767075306e-07, "loss": 1.2478, "step": 87 }, { "epoch": 0.0025692680505678667, "grad_norm": 1.7444394498703681, "learning_rate": 1.284296555750146e-07, "loss": 1.1504, "step": 88 }, { "epoch": 0.002598464278415229, "grad_norm": 1.8977404618199865, "learning_rate": 1.2988908347927614e-07, "loss": 1.0925, "step": 89 }, { "epoch": 0.002627660506262591, "grad_norm": 1.8129431737781814, "learning_rate": 1.3134851138353768e-07, "loss": 1.191, "step": 90 }, { "epoch": 0.002656856734109953, "grad_norm": 1.9882450153552693, "learning_rate": 1.328079392877992e-07, "loss": 1.2783, "step": 91 }, { "epoch": 0.002686052961957315, "grad_norm": 2.179004080701424, "learning_rate": 1.3426736719206073e-07, "loss": 1.1493, "step": 92 }, { "epoch": 0.002715249189804677, "grad_norm": 1.9389230918000595, "learning_rate": 1.3572679509632226e-07, "loss": 1.2555, "step": 93 }, { "epoch": 0.0027444454176520396, "grad_norm": 1.733145120686487, "learning_rate": 1.3718622300058377e-07, "loss": 1.1564, "step": 94 }, { "epoch": 0.0027736416454994016, "grad_norm": 1.6689581131625708, "learning_rate": 1.386456509048453e-07, "loss": 1.0777, "step": 95 }, { "epoch": 0.0028028378733467637, "grad_norm": 1.9703936599650689, "learning_rate": 1.4010507880910685e-07, "loss": 1.1705, "step": 96 }, { "epoch": 0.0028320341011941258, "grad_norm": 1.8720903919947582, "learning_rate": 1.4156450671336836e-07, "loss": 1.1476, "step": 97 }, { "epoch": 0.002861230329041488, "grad_norm": 1.6453594303094905, "learning_rate": 1.430239346176299e-07, "loss": 1.0488, "step": 98 }, { "epoch": 0.00289042655688885, "grad_norm": 1.670052347651608, "learning_rate": 1.4448336252189144e-07, "loss": 1.0569, "step": 99 }, { "epoch": 0.002919622784736212, "grad_norm": 1.8230543791348222, "learning_rate": 1.4594279042615297e-07, "loss": 1.1152, "step": 100 }, { "epoch": 0.002948819012583574, "grad_norm": 2.2568658804249657, "learning_rate": 1.4740221833041449e-07, "loss": 1.2699, "step": 101 }, { "epoch": 0.0029780152404309365, "grad_norm": 2.5321047188071066, "learning_rate": 1.4886164623467602e-07, "loss": 1.223, "step": 102 }, { "epoch": 0.0030072114682782986, "grad_norm": 1.651102708922236, "learning_rate": 1.5032107413893756e-07, "loss": 1.0322, "step": 103 }, { "epoch": 0.0030364076961256607, "grad_norm": 2.0499981287498117, "learning_rate": 1.5178050204319907e-07, "loss": 1.3364, "step": 104 }, { "epoch": 0.0030656039239730227, "grad_norm": 1.9270688974421215, "learning_rate": 1.532399299474606e-07, "loss": 1.2219, "step": 105 }, { "epoch": 0.003094800151820385, "grad_norm": 1.8904981640604148, "learning_rate": 1.5469935785172215e-07, "loss": 1.2264, "step": 106 }, { "epoch": 0.003123996379667747, "grad_norm": 1.9512051080015003, "learning_rate": 1.5615878575598366e-07, "loss": 1.2017, "step": 107 }, { "epoch": 0.003153192607515109, "grad_norm": 2.185388594753351, "learning_rate": 1.576182136602452e-07, "loss": 1.2228, "step": 108 }, { "epoch": 0.003182388835362471, "grad_norm": 1.7838890880574156, "learning_rate": 1.590776415645067e-07, "loss": 1.187, "step": 109 }, { "epoch": 0.003211585063209833, "grad_norm": 1.8688416681474476, "learning_rate": 1.6053706946876824e-07, "loss": 1.1238, "step": 110 }, { "epoch": 0.0032407812910571956, "grad_norm": 1.9727768522087845, "learning_rate": 1.6199649737302976e-07, "loss": 1.1808, "step": 111 }, { "epoch": 0.0032699775189045576, "grad_norm": 1.764155410857833, "learning_rate": 1.6345592527729132e-07, "loss": 1.1723, "step": 112 }, { "epoch": 0.0032991737467519197, "grad_norm": 1.8486104412270137, "learning_rate": 1.6491535318155283e-07, "loss": 1.2411, "step": 113 }, { "epoch": 0.0033283699745992818, "grad_norm": 1.8112455981090374, "learning_rate": 1.6637478108581437e-07, "loss": 1.1274, "step": 114 }, { "epoch": 0.003357566202446644, "grad_norm": 1.811227658886351, "learning_rate": 1.6783420899007588e-07, "loss": 1.1671, "step": 115 }, { "epoch": 0.003386762430294006, "grad_norm": 1.8038482379729184, "learning_rate": 1.6929363689433744e-07, "loss": 1.1664, "step": 116 }, { "epoch": 0.003415958658141368, "grad_norm": 2.010493518848997, "learning_rate": 1.7075306479859896e-07, "loss": 1.2085, "step": 117 }, { "epoch": 0.00344515488598873, "grad_norm": 1.686593419836746, "learning_rate": 1.722124927028605e-07, "loss": 1.2151, "step": 118 }, { "epoch": 0.0034743511138360926, "grad_norm": 1.8184311796463606, "learning_rate": 1.73671920607122e-07, "loss": 1.2117, "step": 119 }, { "epoch": 0.0035035473416834546, "grad_norm": 1.7703202251266665, "learning_rate": 1.7513134851138354e-07, "loss": 1.1233, "step": 120 }, { "epoch": 0.0035327435695308167, "grad_norm": 2.355109084385224, "learning_rate": 1.7659077641564505e-07, "loss": 1.1383, "step": 121 }, { "epoch": 0.0035619397973781788, "grad_norm": 2.0021384871337466, "learning_rate": 1.7805020431990662e-07, "loss": 1.2931, "step": 122 }, { "epoch": 0.003591136025225541, "grad_norm": 1.9664711798027312, "learning_rate": 1.7950963222416813e-07, "loss": 1.2515, "step": 123 }, { "epoch": 0.003620332253072903, "grad_norm": 2.0336971391940306, "learning_rate": 1.8096906012842967e-07, "loss": 1.2573, "step": 124 }, { "epoch": 0.003649528480920265, "grad_norm": 1.828522911082273, "learning_rate": 1.8242848803269118e-07, "loss": 1.2134, "step": 125 }, { "epoch": 0.003678724708767627, "grad_norm": 2.023481420860015, "learning_rate": 1.8388791593695274e-07, "loss": 1.3051, "step": 126 }, { "epoch": 0.0037079209366149895, "grad_norm": 1.8852228612724806, "learning_rate": 1.8534734384121425e-07, "loss": 1.217, "step": 127 }, { "epoch": 0.0037371171644623516, "grad_norm": 1.7639325686184681, "learning_rate": 1.868067717454758e-07, "loss": 1.1457, "step": 128 }, { "epoch": 0.0037663133923097137, "grad_norm": 1.9237954516060474, "learning_rate": 1.882661996497373e-07, "loss": 1.191, "step": 129 }, { "epoch": 0.0037955096201570757, "grad_norm": 1.7470762901936103, "learning_rate": 1.8972562755399884e-07, "loss": 1.1733, "step": 130 }, { "epoch": 0.003824705848004438, "grad_norm": 1.8085930168186406, "learning_rate": 1.9118505545826035e-07, "loss": 1.0618, "step": 131 }, { "epoch": 0.0038539020758518, "grad_norm": 2.3996847588478554, "learning_rate": 1.9264448336252192e-07, "loss": 1.2006, "step": 132 }, { "epoch": 0.003883098303699162, "grad_norm": 1.589080398333637, "learning_rate": 1.9410391126678343e-07, "loss": 1.103, "step": 133 }, { "epoch": 0.003912294531546524, "grad_norm": 1.8098738504775072, "learning_rate": 1.9556333917104496e-07, "loss": 1.1608, "step": 134 }, { "epoch": 0.003941490759393886, "grad_norm": 1.7104464437096472, "learning_rate": 1.9702276707530648e-07, "loss": 1.189, "step": 135 }, { "epoch": 0.003970686987241248, "grad_norm": 2.0287049608339696, "learning_rate": 1.9848219497956804e-07, "loss": 1.2826, "step": 136 }, { "epoch": 0.00399988321508861, "grad_norm": 1.6569047701435826, "learning_rate": 1.9994162288382955e-07, "loss": 1.187, "step": 137 }, { "epoch": 0.004029079442935972, "grad_norm": 1.9202520268059444, "learning_rate": 2.014010507880911e-07, "loss": 1.134, "step": 138 }, { "epoch": 0.004058275670783335, "grad_norm": 1.8136639974612851, "learning_rate": 2.028604786923526e-07, "loss": 1.1607, "step": 139 }, { "epoch": 0.004087471898630697, "grad_norm": 1.9303572853688034, "learning_rate": 2.0431990659661414e-07, "loss": 1.26, "step": 140 }, { "epoch": 0.004116668126478059, "grad_norm": 1.8338618188385234, "learning_rate": 2.0577933450087565e-07, "loss": 1.2063, "step": 141 }, { "epoch": 0.004145864354325421, "grad_norm": 1.7369180843507923, "learning_rate": 2.072387624051372e-07, "loss": 1.1914, "step": 142 }, { "epoch": 0.0041750605821727835, "grad_norm": 1.7602263596716725, "learning_rate": 2.0869819030939872e-07, "loss": 1.18, "step": 143 }, { "epoch": 0.0042042568100201455, "grad_norm": 1.901810321223264, "learning_rate": 2.1015761821366026e-07, "loss": 1.215, "step": 144 }, { "epoch": 0.004233453037867508, "grad_norm": 2.0099123466871878, "learning_rate": 2.1161704611792177e-07, "loss": 1.2183, "step": 145 }, { "epoch": 0.00426264926571487, "grad_norm": 1.7113397135216992, "learning_rate": 2.130764740221833e-07, "loss": 1.1279, "step": 146 }, { "epoch": 0.004291845493562232, "grad_norm": 1.8634086926956812, "learning_rate": 2.1453590192644485e-07, "loss": 1.0638, "step": 147 }, { "epoch": 0.004321041721409594, "grad_norm": 1.6709347912440924, "learning_rate": 2.1599532983070639e-07, "loss": 1.1744, "step": 148 }, { "epoch": 0.004350237949256956, "grad_norm": 1.9120356685312079, "learning_rate": 2.174547577349679e-07, "loss": 1.193, "step": 149 }, { "epoch": 0.004379434177104318, "grad_norm": 1.8696421766732298, "learning_rate": 2.1891418563922943e-07, "loss": 1.1593, "step": 150 }, { "epoch": 0.00440863040495168, "grad_norm": 1.5847116626797104, "learning_rate": 2.2037361354349095e-07, "loss": 1.0482, "step": 151 }, { "epoch": 0.004437826632799042, "grad_norm": 1.8283011312730928, "learning_rate": 2.218330414477525e-07, "loss": 1.2841, "step": 152 }, { "epoch": 0.004467022860646404, "grad_norm": 1.6628914821690541, "learning_rate": 2.2329246935201402e-07, "loss": 1.1896, "step": 153 }, { "epoch": 0.004496219088493766, "grad_norm": 1.7806913142152734, "learning_rate": 2.2475189725627556e-07, "loss": 1.2436, "step": 154 }, { "epoch": 0.004525415316341129, "grad_norm": 1.7508426392680774, "learning_rate": 2.2621132516053707e-07, "loss": 1.1526, "step": 155 }, { "epoch": 0.004554611544188491, "grad_norm": 2.0879209707456954, "learning_rate": 2.276707530647986e-07, "loss": 1.2773, "step": 156 }, { "epoch": 0.004583807772035853, "grad_norm": 2.030025160549916, "learning_rate": 2.2913018096906012e-07, "loss": 1.1074, "step": 157 }, { "epoch": 0.004613003999883215, "grad_norm": 1.684777570167562, "learning_rate": 2.3058960887332168e-07, "loss": 1.1095, "step": 158 }, { "epoch": 0.004642200227730577, "grad_norm": 2.094790609509077, "learning_rate": 2.320490367775832e-07, "loss": 1.2323, "step": 159 }, { "epoch": 0.0046713964555779395, "grad_norm": 1.6202620010672013, "learning_rate": 2.3350846468184473e-07, "loss": 1.039, "step": 160 }, { "epoch": 0.0047005926834253016, "grad_norm": 1.701392028715677, "learning_rate": 2.3496789258610624e-07, "loss": 1.1656, "step": 161 }, { "epoch": 0.004729788911272664, "grad_norm": 1.7211164999563722, "learning_rate": 2.364273204903678e-07, "loss": 1.296, "step": 162 }, { "epoch": 0.004758985139120026, "grad_norm": 2.031191644750636, "learning_rate": 2.3788674839462932e-07, "loss": 1.2916, "step": 163 }, { "epoch": 0.004788181366967388, "grad_norm": 1.7261164801485471, "learning_rate": 2.3934617629889086e-07, "loss": 1.156, "step": 164 }, { "epoch": 0.00481737759481475, "grad_norm": 1.7434209576654613, "learning_rate": 2.4080560420315237e-07, "loss": 1.1853, "step": 165 }, { "epoch": 0.004846573822662112, "grad_norm": 1.8729449555483424, "learning_rate": 2.4226503210741393e-07, "loss": 1.2275, "step": 166 }, { "epoch": 0.004875770050509474, "grad_norm": 1.493365198467357, "learning_rate": 2.4372446001167544e-07, "loss": 0.985, "step": 167 }, { "epoch": 0.004904966278356836, "grad_norm": 2.1233298675392436, "learning_rate": 2.4518388791593695e-07, "loss": 1.3081, "step": 168 }, { "epoch": 0.004934162506204198, "grad_norm": 1.7436713744579337, "learning_rate": 2.4664331582019847e-07, "loss": 1.2004, "step": 169 }, { "epoch": 0.00496335873405156, "grad_norm": 1.7067759522979125, "learning_rate": 2.4810274372446003e-07, "loss": 1.0535, "step": 170 }, { "epoch": 0.004992554961898922, "grad_norm": 1.7321899952825164, "learning_rate": 2.4956217162872154e-07, "loss": 1.2517, "step": 171 }, { "epoch": 0.005021751189746285, "grad_norm": 1.7826703250388047, "learning_rate": 2.510215995329831e-07, "loss": 1.1923, "step": 172 }, { "epoch": 0.005050947417593647, "grad_norm": 1.8241442300718662, "learning_rate": 2.524810274372446e-07, "loss": 1.1872, "step": 173 }, { "epoch": 0.005080143645441009, "grad_norm": 1.7417637635876229, "learning_rate": 2.5394045534150613e-07, "loss": 1.1913, "step": 174 }, { "epoch": 0.005109339873288371, "grad_norm": 1.7352193047404822, "learning_rate": 2.5539988324576764e-07, "loss": 1.1362, "step": 175 }, { "epoch": 0.0051385361011357334, "grad_norm": 1.7751765658946619, "learning_rate": 2.568593111500292e-07, "loss": 1.1766, "step": 176 }, { "epoch": 0.0051677323289830955, "grad_norm": 1.5505744231963325, "learning_rate": 2.583187390542907e-07, "loss": 1.1143, "step": 177 }, { "epoch": 0.005196928556830458, "grad_norm": 1.8581987178721313, "learning_rate": 2.597781669585523e-07, "loss": 1.1439, "step": 178 }, { "epoch": 0.00522612478467782, "grad_norm": 1.6238586747318047, "learning_rate": 2.612375948628138e-07, "loss": 1.1543, "step": 179 }, { "epoch": 0.005255321012525182, "grad_norm": 1.7096483637615085, "learning_rate": 2.6269702276707535e-07, "loss": 1.0247, "step": 180 }, { "epoch": 0.005284517240372544, "grad_norm": 1.5576703982944688, "learning_rate": 2.6415645067133686e-07, "loss": 1.019, "step": 181 }, { "epoch": 0.005313713468219906, "grad_norm": 1.7950376381925204, "learning_rate": 2.656158785755984e-07, "loss": 1.1757, "step": 182 }, { "epoch": 0.005342909696067268, "grad_norm": 1.662781319705304, "learning_rate": 2.670753064798599e-07, "loss": 1.2792, "step": 183 }, { "epoch": 0.00537210592391463, "grad_norm": 1.503629446657202, "learning_rate": 2.6853473438412145e-07, "loss": 1.0445, "step": 184 }, { "epoch": 0.005401302151761992, "grad_norm": 1.6870970662907327, "learning_rate": 2.6999416228838296e-07, "loss": 1.1885, "step": 185 }, { "epoch": 0.005430498379609354, "grad_norm": 1.8100949927700638, "learning_rate": 2.714535901926445e-07, "loss": 1.1947, "step": 186 }, { "epoch": 0.005459694607456716, "grad_norm": 1.475841069074069, "learning_rate": 2.7291301809690604e-07, "loss": 1.069, "step": 187 }, { "epoch": 0.005488890835304079, "grad_norm": 1.6530931193663538, "learning_rate": 2.7437244600116755e-07, "loss": 1.1978, "step": 188 }, { "epoch": 0.005518087063151441, "grad_norm": 1.5783396213691485, "learning_rate": 2.7583187390542906e-07, "loss": 1.0853, "step": 189 }, { "epoch": 0.005547283290998803, "grad_norm": 2.2303089548696704, "learning_rate": 2.772913018096906e-07, "loss": 1.3111, "step": 190 }, { "epoch": 0.005576479518846165, "grad_norm": 1.6525144576852384, "learning_rate": 2.7875072971395214e-07, "loss": 1.1074, "step": 191 }, { "epoch": 0.005605675746693527, "grad_norm": 1.7242603789541466, "learning_rate": 2.802101576182137e-07, "loss": 1.1916, "step": 192 }, { "epoch": 0.0056348719745408895, "grad_norm": 1.742944830075046, "learning_rate": 2.816695855224752e-07, "loss": 1.2065, "step": 193 }, { "epoch": 0.0056640682023882515, "grad_norm": 1.7050816550042398, "learning_rate": 2.831290134267367e-07, "loss": 1.183, "step": 194 }, { "epoch": 0.005693264430235614, "grad_norm": 2.2002140565800756, "learning_rate": 2.8458844133099823e-07, "loss": 1.1578, "step": 195 }, { "epoch": 0.005722460658082976, "grad_norm": 1.5692089165307062, "learning_rate": 2.860478692352598e-07, "loss": 1.0594, "step": 196 }, { "epoch": 0.005751656885930338, "grad_norm": 1.640073188962811, "learning_rate": 2.875072971395213e-07, "loss": 1.0918, "step": 197 }, { "epoch": 0.0057808531137777, "grad_norm": 1.7040828176994178, "learning_rate": 2.8896672504378287e-07, "loss": 1.1708, "step": 198 }, { "epoch": 0.005810049341625062, "grad_norm": 1.8658157220392162, "learning_rate": 2.904261529480444e-07, "loss": 1.1214, "step": 199 }, { "epoch": 0.005839245569472424, "grad_norm": 1.9081349083473886, "learning_rate": 2.9188558085230595e-07, "loss": 1.1565, "step": 200 }, { "epoch": 0.005868441797319786, "grad_norm": 1.7733315125777545, "learning_rate": 2.9334500875656746e-07, "loss": 1.3049, "step": 201 }, { "epoch": 0.005897638025167148, "grad_norm": 1.7624688639181043, "learning_rate": 2.9480443666082897e-07, "loss": 1.2144, "step": 202 }, { "epoch": 0.00592683425301451, "grad_norm": 2.1998184677160446, "learning_rate": 2.962638645650905e-07, "loss": 1.2714, "step": 203 }, { "epoch": 0.005956030480861873, "grad_norm": 1.6942891330134466, "learning_rate": 2.9772329246935205e-07, "loss": 1.1557, "step": 204 }, { "epoch": 0.005985226708709235, "grad_norm": 1.7231021372307216, "learning_rate": 2.9918272037361356e-07, "loss": 1.3078, "step": 205 }, { "epoch": 0.006014422936556597, "grad_norm": 1.5177937337263416, "learning_rate": 3.006421482778751e-07, "loss": 1.1239, "step": 206 }, { "epoch": 0.006043619164403959, "grad_norm": 1.855418051647185, "learning_rate": 3.0210157618213663e-07, "loss": 1.3592, "step": 207 }, { "epoch": 0.006072815392251321, "grad_norm": 1.6766844024985006, "learning_rate": 3.0356100408639814e-07, "loss": 1.1693, "step": 208 }, { "epoch": 0.006102011620098683, "grad_norm": 1.9769987574851824, "learning_rate": 3.0502043199065965e-07, "loss": 1.2803, "step": 209 }, { "epoch": 0.0061312078479460455, "grad_norm": 1.6663220497155573, "learning_rate": 3.064798598949212e-07, "loss": 1.0541, "step": 210 }, { "epoch": 0.0061604040757934075, "grad_norm": 1.3861525421125627, "learning_rate": 3.0793928779918273e-07, "loss": 1.0435, "step": 211 }, { "epoch": 0.00618960030364077, "grad_norm": 1.9844877398815621, "learning_rate": 3.093987157034443e-07, "loss": 1.1635, "step": 212 }, { "epoch": 0.006218796531488132, "grad_norm": 1.6673323618570393, "learning_rate": 3.108581436077058e-07, "loss": 1.1811, "step": 213 }, { "epoch": 0.006247992759335494, "grad_norm": 1.6858144951031955, "learning_rate": 3.123175715119673e-07, "loss": 1.237, "step": 214 }, { "epoch": 0.006277188987182856, "grad_norm": 1.7674669007737875, "learning_rate": 3.137769994162289e-07, "loss": 1.2218, "step": 215 }, { "epoch": 0.006306385215030218, "grad_norm": 1.7837515413513383, "learning_rate": 3.152364273204904e-07, "loss": 1.3375, "step": 216 }, { "epoch": 0.00633558144287758, "grad_norm": 1.4262467630217504, "learning_rate": 3.166958552247519e-07, "loss": 1.0527, "step": 217 }, { "epoch": 0.006364777670724942, "grad_norm": 1.4618866906216905, "learning_rate": 3.181552831290134e-07, "loss": 1.0389, "step": 218 }, { "epoch": 0.006393973898572304, "grad_norm": 1.3394330877289036, "learning_rate": 3.19614711033275e-07, "loss": 0.9764, "step": 219 }, { "epoch": 0.006423170126419666, "grad_norm": 1.6774164988590967, "learning_rate": 3.210741389375365e-07, "loss": 1.2785, "step": 220 }, { "epoch": 0.006452366354267029, "grad_norm": 1.5581749282159407, "learning_rate": 3.22533566841798e-07, "loss": 1.1824, "step": 221 }, { "epoch": 0.006481562582114391, "grad_norm": 1.6467105923438146, "learning_rate": 3.239929947460595e-07, "loss": 1.1097, "step": 222 }, { "epoch": 0.006510758809961753, "grad_norm": 1.5632112914194025, "learning_rate": 3.2545242265032113e-07, "loss": 1.212, "step": 223 }, { "epoch": 0.006539955037809115, "grad_norm": 1.481280148528584, "learning_rate": 3.2691185055458264e-07, "loss": 1.0791, "step": 224 }, { "epoch": 0.006569151265656477, "grad_norm": 1.4931273752238368, "learning_rate": 3.2837127845884415e-07, "loss": 1.105, "step": 225 }, { "epoch": 0.006598347493503839, "grad_norm": 1.476087766896513, "learning_rate": 3.2983070636310566e-07, "loss": 1.1216, "step": 226 }, { "epoch": 0.0066275437213512015, "grad_norm": 1.4602526284142057, "learning_rate": 3.3129013426736723e-07, "loss": 1.0948, "step": 227 }, { "epoch": 0.0066567399491985636, "grad_norm": 1.573678486748269, "learning_rate": 3.3274956217162874e-07, "loss": 1.1682, "step": 228 }, { "epoch": 0.006685936177045926, "grad_norm": 1.5281149539095111, "learning_rate": 3.3420899007589025e-07, "loss": 1.2333, "step": 229 }, { "epoch": 0.006715132404893288, "grad_norm": 1.601551254979704, "learning_rate": 3.3566841798015176e-07, "loss": 1.1712, "step": 230 }, { "epoch": 0.00674432863274065, "grad_norm": 1.7515566638348197, "learning_rate": 3.371278458844134e-07, "loss": 1.2537, "step": 231 }, { "epoch": 0.006773524860588012, "grad_norm": 1.4202209400761336, "learning_rate": 3.385872737886749e-07, "loss": 1.116, "step": 232 }, { "epoch": 0.006802721088435374, "grad_norm": 1.335001822421316, "learning_rate": 3.400467016929364e-07, "loss": 1.0523, "step": 233 }, { "epoch": 0.006831917316282736, "grad_norm": 1.373508516289454, "learning_rate": 3.415061295971979e-07, "loss": 1.066, "step": 234 }, { "epoch": 0.006861113544130098, "grad_norm": 1.3969138071328, "learning_rate": 3.429655575014595e-07, "loss": 1.0148, "step": 235 }, { "epoch": 0.00689030977197746, "grad_norm": 1.4826320315064567, "learning_rate": 3.44424985405721e-07, "loss": 1.0412, "step": 236 }, { "epoch": 0.006919505999824823, "grad_norm": 1.4386816848717532, "learning_rate": 3.458844133099825e-07, "loss": 1.1875, "step": 237 }, { "epoch": 0.006948702227672185, "grad_norm": 1.4177099556518549, "learning_rate": 3.47343841214244e-07, "loss": 1.1442, "step": 238 }, { "epoch": 0.006977898455519547, "grad_norm": 1.4394986396779483, "learning_rate": 3.488032691185056e-07, "loss": 1.0717, "step": 239 }, { "epoch": 0.007007094683366909, "grad_norm": 1.8932996842681515, "learning_rate": 3.502626970227671e-07, "loss": 1.2587, "step": 240 }, { "epoch": 0.007036290911214271, "grad_norm": 1.748061455031705, "learning_rate": 3.517221249270286e-07, "loss": 1.2315, "step": 241 }, { "epoch": 0.007065487139061633, "grad_norm": 1.305329253517568, "learning_rate": 3.531815528312901e-07, "loss": 1.0242, "step": 242 }, { "epoch": 0.0070946833669089954, "grad_norm": 1.5118046998173078, "learning_rate": 3.546409807355517e-07, "loss": 1.2161, "step": 243 }, { "epoch": 0.0071238795947563575, "grad_norm": 1.6226041113060667, "learning_rate": 3.5610040863981324e-07, "loss": 1.1041, "step": 244 }, { "epoch": 0.00715307582260372, "grad_norm": 1.8428695453685005, "learning_rate": 3.5755983654407475e-07, "loss": 1.2837, "step": 245 }, { "epoch": 0.007182272050451082, "grad_norm": 1.4889807233823857, "learning_rate": 3.5901926444833626e-07, "loss": 1.2083, "step": 246 }, { "epoch": 0.007211468278298444, "grad_norm": 1.54903818828253, "learning_rate": 3.604786923525978e-07, "loss": 1.209, "step": 247 }, { "epoch": 0.007240664506145806, "grad_norm": 1.5238626660139298, "learning_rate": 3.6193812025685933e-07, "loss": 1.0416, "step": 248 }, { "epoch": 0.007269860733993168, "grad_norm": 1.3193840609610026, "learning_rate": 3.6339754816112084e-07, "loss": 1.1058, "step": 249 }, { "epoch": 0.00729905696184053, "grad_norm": 1.4629312768882559, "learning_rate": 3.6485697606538236e-07, "loss": 1.083, "step": 250 }, { "epoch": 0.007328253189687892, "grad_norm": 1.617370340703401, "learning_rate": 3.6631640396964397e-07, "loss": 1.2828, "step": 251 }, { "epoch": 0.007357449417535254, "grad_norm": 1.521621074978622, "learning_rate": 3.677758318739055e-07, "loss": 1.2676, "step": 252 }, { "epoch": 0.007386645645382617, "grad_norm": 1.393300639037085, "learning_rate": 3.69235259778167e-07, "loss": 1.1481, "step": 253 }, { "epoch": 0.007415841873229979, "grad_norm": 1.7609562713142446, "learning_rate": 3.706946876824285e-07, "loss": 1.2207, "step": 254 }, { "epoch": 0.007445038101077341, "grad_norm": 1.3075584005265195, "learning_rate": 3.7215411558669007e-07, "loss": 0.972, "step": 255 }, { "epoch": 0.007474234328924703, "grad_norm": 1.3147864040230735, "learning_rate": 3.736135434909516e-07, "loss": 0.9764, "step": 256 }, { "epoch": 0.007503430556772065, "grad_norm": 1.5807491371968154, "learning_rate": 3.750729713952131e-07, "loss": 1.218, "step": 257 }, { "epoch": 0.007532626784619427, "grad_norm": 1.40433246647741, "learning_rate": 3.765323992994746e-07, "loss": 1.0536, "step": 258 }, { "epoch": 0.007561823012466789, "grad_norm": 1.4382675437679635, "learning_rate": 3.7799182720373617e-07, "loss": 1.0978, "step": 259 }, { "epoch": 0.0075910192403141515, "grad_norm": 1.446099840994693, "learning_rate": 3.794512551079977e-07, "loss": 1.1534, "step": 260 }, { "epoch": 0.0076202154681615135, "grad_norm": 1.4329597868565547, "learning_rate": 3.809106830122592e-07, "loss": 1.1974, "step": 261 }, { "epoch": 0.007649411696008876, "grad_norm": 1.3540040767768409, "learning_rate": 3.823701109165207e-07, "loss": 1.1287, "step": 262 }, { "epoch": 0.007678607923856238, "grad_norm": 1.4477238977667568, "learning_rate": 3.838295388207823e-07, "loss": 1.2924, "step": 263 }, { "epoch": 0.0077078041517036, "grad_norm": 1.344368135909357, "learning_rate": 3.8528896672504383e-07, "loss": 0.9874, "step": 264 }, { "epoch": 0.007737000379550962, "grad_norm": 1.500420202381967, "learning_rate": 3.8674839462930534e-07, "loss": 1.0222, "step": 265 }, { "epoch": 0.007766196607398324, "grad_norm": 1.4915059202690877, "learning_rate": 3.8820782253356685e-07, "loss": 1.2965, "step": 266 }, { "epoch": 0.007795392835245686, "grad_norm": 1.3847312014756215, "learning_rate": 3.896672504378284e-07, "loss": 1.0873, "step": 267 }, { "epoch": 0.007824589063093048, "grad_norm": 1.5673294234729223, "learning_rate": 3.9112667834208993e-07, "loss": 1.0186, "step": 268 }, { "epoch": 0.007853785290940411, "grad_norm": 1.3050289286473888, "learning_rate": 3.9258610624635144e-07, "loss": 1.0452, "step": 269 }, { "epoch": 0.007882981518787772, "grad_norm": 1.3863211832592595, "learning_rate": 3.9404553415061295e-07, "loss": 1.1313, "step": 270 }, { "epoch": 0.007912177746635135, "grad_norm": 1.3438778793079598, "learning_rate": 3.9550496205487457e-07, "loss": 1.0121, "step": 271 }, { "epoch": 0.007941373974482496, "grad_norm": 1.3660634835572238, "learning_rate": 3.969643899591361e-07, "loss": 1.0633, "step": 272 }, { "epoch": 0.00797057020232986, "grad_norm": 1.350942169629318, "learning_rate": 3.984238178633976e-07, "loss": 1.0773, "step": 273 }, { "epoch": 0.00799976643017722, "grad_norm": 1.3647428675744222, "learning_rate": 3.998832457676591e-07, "loss": 1.0456, "step": 274 }, { "epoch": 0.008028962658024583, "grad_norm": 1.439104989206572, "learning_rate": 4.0134267367192067e-07, "loss": 1.1595, "step": 275 }, { "epoch": 0.008058158885871945, "grad_norm": 1.4568381153064818, "learning_rate": 4.028021015761822e-07, "loss": 1.1871, "step": 276 }, { "epoch": 0.008087355113719307, "grad_norm": 1.4235398715343701, "learning_rate": 4.042615294804437e-07, "loss": 1.136, "step": 277 }, { "epoch": 0.00811655134156667, "grad_norm": 1.4874931038199892, "learning_rate": 4.057209573847052e-07, "loss": 1.0997, "step": 278 }, { "epoch": 0.008145747569414032, "grad_norm": 1.7533757881370833, "learning_rate": 4.0718038528896676e-07, "loss": 1.1569, "step": 279 }, { "epoch": 0.008174943797261395, "grad_norm": 1.3569107188061147, "learning_rate": 4.086398131932283e-07, "loss": 1.1137, "step": 280 }, { "epoch": 0.008204140025108756, "grad_norm": 1.4793721194313005, "learning_rate": 4.100992410974898e-07, "loss": 1.1897, "step": 281 }, { "epoch": 0.008233336252956119, "grad_norm": 1.4218939659815846, "learning_rate": 4.115586690017513e-07, "loss": 1.2444, "step": 282 }, { "epoch": 0.00826253248080348, "grad_norm": 1.4144616496345588, "learning_rate": 4.130180969060129e-07, "loss": 1.283, "step": 283 }, { "epoch": 0.008291728708650843, "grad_norm": 1.384058194737617, "learning_rate": 4.144775248102744e-07, "loss": 1.0661, "step": 284 }, { "epoch": 0.008320924936498204, "grad_norm": 1.3462879021242626, "learning_rate": 4.1593695271453594e-07, "loss": 1.1742, "step": 285 }, { "epoch": 0.008350121164345567, "grad_norm": 1.4052709815971918, "learning_rate": 4.1739638061879745e-07, "loss": 1.01, "step": 286 }, { "epoch": 0.008379317392192928, "grad_norm": 1.3714098741445195, "learning_rate": 4.18855808523059e-07, "loss": 1.0451, "step": 287 }, { "epoch": 0.008408513620040291, "grad_norm": 1.4814293214915553, "learning_rate": 4.203152364273205e-07, "loss": 1.1564, "step": 288 }, { "epoch": 0.008437709847887652, "grad_norm": 1.6624294892732954, "learning_rate": 4.2177466433158203e-07, "loss": 1.1063, "step": 289 }, { "epoch": 0.008466906075735015, "grad_norm": 1.3139156299179613, "learning_rate": 4.2323409223584355e-07, "loss": 1.1585, "step": 290 }, { "epoch": 0.008496102303582376, "grad_norm": 1.2587871802721118, "learning_rate": 4.246935201401051e-07, "loss": 0.991, "step": 291 }, { "epoch": 0.00852529853142974, "grad_norm": 1.4383669824579983, "learning_rate": 4.261529480443666e-07, "loss": 1.1576, "step": 292 }, { "epoch": 0.0085544947592771, "grad_norm": 1.3187246613162715, "learning_rate": 4.276123759486282e-07, "loss": 1.0672, "step": 293 }, { "epoch": 0.008583690987124463, "grad_norm": 1.1953980183009751, "learning_rate": 4.290718038528897e-07, "loss": 0.9673, "step": 294 }, { "epoch": 0.008612887214971826, "grad_norm": 1.2977512545042114, "learning_rate": 4.3053123175715126e-07, "loss": 1.0374, "step": 295 }, { "epoch": 0.008642083442819188, "grad_norm": 1.457418305379164, "learning_rate": 4.3199065966141277e-07, "loss": 1.1179, "step": 296 }, { "epoch": 0.00867127967066655, "grad_norm": 1.3912824254106113, "learning_rate": 4.334500875656743e-07, "loss": 1.0564, "step": 297 }, { "epoch": 0.008700475898513912, "grad_norm": 1.2887433304579643, "learning_rate": 4.349095154699358e-07, "loss": 1.0796, "step": 298 }, { "epoch": 0.008729672126361275, "grad_norm": 1.3740866094687831, "learning_rate": 4.3636894337419736e-07, "loss": 1.108, "step": 299 }, { "epoch": 0.008758868354208636, "grad_norm": 1.3989351209210823, "learning_rate": 4.3782837127845887e-07, "loss": 1.1428, "step": 300 }, { "epoch": 0.008788064582055999, "grad_norm": 1.3926606224945928, "learning_rate": 4.392877991827204e-07, "loss": 1.2316, "step": 301 }, { "epoch": 0.00881726080990336, "grad_norm": 1.445043285551444, "learning_rate": 4.407472270869819e-07, "loss": 1.1033, "step": 302 }, { "epoch": 0.008846457037750723, "grad_norm": 1.390252057356193, "learning_rate": 4.422066549912435e-07, "loss": 1.2356, "step": 303 }, { "epoch": 0.008875653265598084, "grad_norm": 1.3071342503733188, "learning_rate": 4.43666082895505e-07, "loss": 1.1669, "step": 304 }, { "epoch": 0.008904849493445447, "grad_norm": 1.3747476927929396, "learning_rate": 4.4512551079976653e-07, "loss": 1.1492, "step": 305 }, { "epoch": 0.008934045721292808, "grad_norm": 1.3525506779172978, "learning_rate": 4.4658493870402804e-07, "loss": 1.1223, "step": 306 }, { "epoch": 0.008963241949140171, "grad_norm": 1.2690539453501433, "learning_rate": 4.480443666082896e-07, "loss": 1.1412, "step": 307 }, { "epoch": 0.008992438176987532, "grad_norm": 1.2992321868330092, "learning_rate": 4.495037945125511e-07, "loss": 1.1253, "step": 308 }, { "epoch": 0.009021634404834895, "grad_norm": 1.1316492405980718, "learning_rate": 4.5096322241681263e-07, "loss": 0.9895, "step": 309 }, { "epoch": 0.009050830632682258, "grad_norm": 1.3668962506077857, "learning_rate": 4.5242265032107414e-07, "loss": 1.1739, "step": 310 }, { "epoch": 0.00908002686052962, "grad_norm": 1.3537192660998063, "learning_rate": 4.538820782253357e-07, "loss": 1.1143, "step": 311 }, { "epoch": 0.009109223088376982, "grad_norm": 1.2602696615950546, "learning_rate": 4.553415061295972e-07, "loss": 1.0093, "step": 312 }, { "epoch": 0.009138419316224344, "grad_norm": 1.430561472893561, "learning_rate": 4.5680093403385873e-07, "loss": 1.1785, "step": 313 }, { "epoch": 0.009167615544071707, "grad_norm": 1.3152263062241398, "learning_rate": 4.5826036193812024e-07, "loss": 1.0177, "step": 314 }, { "epoch": 0.009196811771919068, "grad_norm": 1.2734964363438042, "learning_rate": 4.5971978984238186e-07, "loss": 1.1785, "step": 315 }, { "epoch": 0.00922600799976643, "grad_norm": 1.164180069009724, "learning_rate": 4.6117921774664337e-07, "loss": 1.0263, "step": 316 }, { "epoch": 0.009255204227613792, "grad_norm": 1.1410663845115447, "learning_rate": 4.626386456509049e-07, "loss": 1.0262, "step": 317 }, { "epoch": 0.009284400455461155, "grad_norm": 1.1770080713189115, "learning_rate": 4.640980735551664e-07, "loss": 1.0503, "step": 318 }, { "epoch": 0.009313596683308516, "grad_norm": 1.2215507460820874, "learning_rate": 4.6555750145942795e-07, "loss": 1.1418, "step": 319 }, { "epoch": 0.009342792911155879, "grad_norm": 1.1911356078088722, "learning_rate": 4.6701692936368946e-07, "loss": 1.1092, "step": 320 }, { "epoch": 0.00937198913900324, "grad_norm": 2.0470917724612616, "learning_rate": 4.68476357267951e-07, "loss": 0.98, "step": 321 }, { "epoch": 0.009401185366850603, "grad_norm": 1.2914241627852383, "learning_rate": 4.699357851722125e-07, "loss": 1.0326, "step": 322 }, { "epoch": 0.009430381594697964, "grad_norm": 1.5903836873899633, "learning_rate": 4.713952130764741e-07, "loss": 1.087, "step": 323 }, { "epoch": 0.009459577822545327, "grad_norm": 1.2631832151822162, "learning_rate": 4.728546409807356e-07, "loss": 0.9675, "step": 324 }, { "epoch": 0.009488774050392688, "grad_norm": 1.21605516208114, "learning_rate": 4.743140688849971e-07, "loss": 1.1234, "step": 325 }, { "epoch": 0.009517970278240051, "grad_norm": 1.1577518376267983, "learning_rate": 4.7577349678925864e-07, "loss": 0.9374, "step": 326 }, { "epoch": 0.009547166506087414, "grad_norm": 1.1510906716344194, "learning_rate": 4.772329246935201e-07, "loss": 1.0803, "step": 327 }, { "epoch": 0.009576362733934776, "grad_norm": 1.2513495075498278, "learning_rate": 4.786923525977817e-07, "loss": 1.0897, "step": 328 }, { "epoch": 0.009605558961782138, "grad_norm": 1.4708769615273305, "learning_rate": 4.801517805020433e-07, "loss": 1.1173, "step": 329 }, { "epoch": 0.0096347551896295, "grad_norm": 1.2971662286011412, "learning_rate": 4.816112084063047e-07, "loss": 1.0598, "step": 330 }, { "epoch": 0.009663951417476863, "grad_norm": 1.115949391539953, "learning_rate": 4.830706363105663e-07, "loss": 0.9309, "step": 331 }, { "epoch": 0.009693147645324224, "grad_norm": 1.1785315681074524, "learning_rate": 4.845300642148279e-07, "loss": 1.1108, "step": 332 }, { "epoch": 0.009722343873171587, "grad_norm": 1.114103069175744, "learning_rate": 4.859894921190893e-07, "loss": 1.0472, "step": 333 }, { "epoch": 0.009751540101018948, "grad_norm": 1.2397361345072904, "learning_rate": 4.874489200233509e-07, "loss": 1.1339, "step": 334 }, { "epoch": 0.00978073632886631, "grad_norm": 1.296652369775321, "learning_rate": 4.889083479276124e-07, "loss": 1.0502, "step": 335 }, { "epoch": 0.009809932556713672, "grad_norm": 1.1846307492975476, "learning_rate": 4.903677758318739e-07, "loss": 1.025, "step": 336 }, { "epoch": 0.009839128784561035, "grad_norm": 1.1903954667871255, "learning_rate": 4.918272037361355e-07, "loss": 1.0045, "step": 337 }, { "epoch": 0.009868325012408396, "grad_norm": 1.0586189941565125, "learning_rate": 4.932866316403969e-07, "loss": 1.0718, "step": 338 }, { "epoch": 0.00989752124025576, "grad_norm": 1.1007161450906187, "learning_rate": 4.947460595446586e-07, "loss": 1.0987, "step": 339 }, { "epoch": 0.00992671746810312, "grad_norm": 1.1321359601034164, "learning_rate": 4.962054874489201e-07, "loss": 1.062, "step": 340 }, { "epoch": 0.009955913695950483, "grad_norm": 1.231249031790005, "learning_rate": 4.976649153531816e-07, "loss": 1.1096, "step": 341 }, { "epoch": 0.009985109923797844, "grad_norm": 1.097954499563067, "learning_rate": 4.991243432574431e-07, "loss": 0.9896, "step": 342 }, { "epoch": 0.010014306151645207, "grad_norm": 1.0725299707969598, "learning_rate": 5.005837711617046e-07, "loss": 0.9875, "step": 343 }, { "epoch": 0.01004350237949257, "grad_norm": 1.132668992279777, "learning_rate": 5.020431990659662e-07, "loss": 1.0353, "step": 344 }, { "epoch": 0.010072698607339932, "grad_norm": 1.179265408805325, "learning_rate": 5.035026269702277e-07, "loss": 1.0246, "step": 345 }, { "epoch": 0.010101894835187294, "grad_norm": 1.0391967046523094, "learning_rate": 5.049620548744892e-07, "loss": 0.8741, "step": 346 }, { "epoch": 0.010131091063034656, "grad_norm": 1.1072352648918433, "learning_rate": 5.064214827787508e-07, "loss": 1.1298, "step": 347 }, { "epoch": 0.010160287290882019, "grad_norm": 1.1462445095083564, "learning_rate": 5.078809106830123e-07, "loss": 1.0559, "step": 348 }, { "epoch": 0.01018948351872938, "grad_norm": 1.1696320487535323, "learning_rate": 5.093403385872738e-07, "loss": 1.0159, "step": 349 }, { "epoch": 0.010218679746576743, "grad_norm": 1.146562188384641, "learning_rate": 5.107997664915353e-07, "loss": 1.1, "step": 350 }, { "epoch": 0.010247875974424104, "grad_norm": 1.3424369468170074, "learning_rate": 5.122591943957969e-07, "loss": 1.1298, "step": 351 }, { "epoch": 0.010277072202271467, "grad_norm": 1.1760578986299477, "learning_rate": 5.137186223000584e-07, "loss": 1.0544, "step": 352 }, { "epoch": 0.010306268430118828, "grad_norm": 1.2238622529865966, "learning_rate": 5.1517805020432e-07, "loss": 1.1941, "step": 353 }, { "epoch": 0.010335464657966191, "grad_norm": 1.2262441034291307, "learning_rate": 5.166374781085814e-07, "loss": 1.0916, "step": 354 }, { "epoch": 0.010364660885813552, "grad_norm": 1.122067762895734, "learning_rate": 5.18096906012843e-07, "loss": 1.053, "step": 355 }, { "epoch": 0.010393857113660915, "grad_norm": 1.1747961741806996, "learning_rate": 5.195563339171046e-07, "loss": 1.0417, "step": 356 }, { "epoch": 0.010423053341508276, "grad_norm": 1.1091767566882875, "learning_rate": 5.21015761821366e-07, "loss": 1.0006, "step": 357 }, { "epoch": 0.01045224956935564, "grad_norm": 1.1191818340723114, "learning_rate": 5.224751897256276e-07, "loss": 1.0433, "step": 358 }, { "epoch": 0.010481445797203002, "grad_norm": 1.5610723137913405, "learning_rate": 5.239346176298891e-07, "loss": 1.0977, "step": 359 }, { "epoch": 0.010510642025050363, "grad_norm": 1.1849086209968651, "learning_rate": 5.253940455341507e-07, "loss": 0.9642, "step": 360 }, { "epoch": 0.010539838252897726, "grad_norm": 1.0428335033264795, "learning_rate": 5.268534734384122e-07, "loss": 1.0247, "step": 361 }, { "epoch": 0.010569034480745088, "grad_norm": 1.0797839731836922, "learning_rate": 5.283129013426737e-07, "loss": 1.1204, "step": 362 }, { "epoch": 0.01059823070859245, "grad_norm": 1.237012109389058, "learning_rate": 5.297723292469353e-07, "loss": 1.1889, "step": 363 }, { "epoch": 0.010627426936439812, "grad_norm": 1.1440266435534234, "learning_rate": 5.312317571511968e-07, "loss": 1.1309, "step": 364 }, { "epoch": 0.010656623164287175, "grad_norm": 1.0262184837012047, "learning_rate": 5.326911850554583e-07, "loss": 0.8717, "step": 365 }, { "epoch": 0.010685819392134536, "grad_norm": 1.238272919081407, "learning_rate": 5.341506129597198e-07, "loss": 1.0471, "step": 366 }, { "epoch": 0.010715015619981899, "grad_norm": 1.0394195766102565, "learning_rate": 5.356100408639813e-07, "loss": 0.8392, "step": 367 }, { "epoch": 0.01074421184782926, "grad_norm": 1.1916843050789512, "learning_rate": 5.370694687682429e-07, "loss": 1.0875, "step": 368 }, { "epoch": 0.010773408075676623, "grad_norm": 1.078799340178351, "learning_rate": 5.385288966725044e-07, "loss": 1.0231, "step": 369 }, { "epoch": 0.010802604303523984, "grad_norm": 1.0381661381653924, "learning_rate": 5.399883245767659e-07, "loss": 0.8606, "step": 370 }, { "epoch": 0.010831800531371347, "grad_norm": 1.4669633481182243, "learning_rate": 5.414477524810275e-07, "loss": 1.0166, "step": 371 }, { "epoch": 0.010860996759218708, "grad_norm": 1.1224075718229627, "learning_rate": 5.42907180385289e-07, "loss": 1.0795, "step": 372 }, { "epoch": 0.010890192987066071, "grad_norm": 1.084772296956568, "learning_rate": 5.443666082895505e-07, "loss": 0.9368, "step": 373 }, { "epoch": 0.010919389214913432, "grad_norm": 1.0401251502055062, "learning_rate": 5.458260361938121e-07, "loss": 1.0674, "step": 374 }, { "epoch": 0.010948585442760795, "grad_norm": 1.0758540642537062, "learning_rate": 5.472854640980736e-07, "loss": 1.0755, "step": 375 }, { "epoch": 0.010977781670608158, "grad_norm": 1.0759592186937272, "learning_rate": 5.487448920023351e-07, "loss": 1.161, "step": 376 }, { "epoch": 0.01100697789845552, "grad_norm": 1.2291358592367578, "learning_rate": 5.502043199065967e-07, "loss": 1.0944, "step": 377 }, { "epoch": 0.011036174126302882, "grad_norm": 1.0525029240508612, "learning_rate": 5.516637478108581e-07, "loss": 1.0181, "step": 378 }, { "epoch": 0.011065370354150244, "grad_norm": 1.165804521725008, "learning_rate": 5.531231757151198e-07, "loss": 1.007, "step": 379 }, { "epoch": 0.011094566581997607, "grad_norm": 1.0078946420920483, "learning_rate": 5.545826036193812e-07, "loss": 0.9416, "step": 380 }, { "epoch": 0.011123762809844968, "grad_norm": 2.1859743036981243, "learning_rate": 5.560420315236428e-07, "loss": 1.1098, "step": 381 }, { "epoch": 0.01115295903769233, "grad_norm": 1.0577519228178007, "learning_rate": 5.575014594279043e-07, "loss": 0.9046, "step": 382 }, { "epoch": 0.011182155265539692, "grad_norm": 2.3274436813483947, "learning_rate": 5.589608873321658e-07, "loss": 1.1497, "step": 383 }, { "epoch": 0.011211351493387055, "grad_norm": 1.1303234117193932, "learning_rate": 5.604203152364274e-07, "loss": 1.0067, "step": 384 }, { "epoch": 0.011240547721234416, "grad_norm": 1.2823352591257542, "learning_rate": 5.618797431406889e-07, "loss": 1.0761, "step": 385 }, { "epoch": 0.011269743949081779, "grad_norm": 1.2919407980050417, "learning_rate": 5.633391710449504e-07, "loss": 0.955, "step": 386 }, { "epoch": 0.01129894017692914, "grad_norm": 1.2454707206999776, "learning_rate": 5.64798598949212e-07, "loss": 1.0801, "step": 387 }, { "epoch": 0.011328136404776503, "grad_norm": 1.1617079111841915, "learning_rate": 5.662580268534734e-07, "loss": 1.0018, "step": 388 }, { "epoch": 0.011357332632623864, "grad_norm": 1.1021527987294277, "learning_rate": 5.67717454757735e-07, "loss": 0.9061, "step": 389 }, { "epoch": 0.011386528860471227, "grad_norm": 1.0774639090997704, "learning_rate": 5.691768826619965e-07, "loss": 1.0561, "step": 390 }, { "epoch": 0.011415725088318588, "grad_norm": 1.071725656015677, "learning_rate": 5.706363105662581e-07, "loss": 0.9941, "step": 391 }, { "epoch": 0.011444921316165951, "grad_norm": 1.0389090346800365, "learning_rate": 5.720957384705196e-07, "loss": 0.9623, "step": 392 }, { "epoch": 0.011474117544013314, "grad_norm": 1.1949774146583967, "learning_rate": 5.735551663747812e-07, "loss": 1.0297, "step": 393 }, { "epoch": 0.011503313771860675, "grad_norm": 1.1422226061474021, "learning_rate": 5.750145942790426e-07, "loss": 1.0958, "step": 394 }, { "epoch": 0.011532509999708038, "grad_norm": 1.2006298828506425, "learning_rate": 5.764740221833042e-07, "loss": 1.0784, "step": 395 }, { "epoch": 0.0115617062275554, "grad_norm": 1.073020111949378, "learning_rate": 5.779334500875657e-07, "loss": 0.9613, "step": 396 }, { "epoch": 0.011590902455402763, "grad_norm": 1.0017146479330017, "learning_rate": 5.793928779918272e-07, "loss": 0.9219, "step": 397 }, { "epoch": 0.011620098683250124, "grad_norm": 1.0959072142695574, "learning_rate": 5.808523058960888e-07, "loss": 1.0461, "step": 398 }, { "epoch": 0.011649294911097487, "grad_norm": 1.056131231546455, "learning_rate": 5.823117338003503e-07, "loss": 1.1115, "step": 399 }, { "epoch": 0.011678491138944848, "grad_norm": 1.0929357849750831, "learning_rate": 5.837711617046119e-07, "loss": 1.0074, "step": 400 }, { "epoch": 0.01170768736679221, "grad_norm": 1.1548890612207519, "learning_rate": 5.852305896088734e-07, "loss": 0.9606, "step": 401 }, { "epoch": 0.011736883594639572, "grad_norm": 1.0942904565887972, "learning_rate": 5.866900175131349e-07, "loss": 1.0558, "step": 402 }, { "epoch": 0.011766079822486935, "grad_norm": 1.0183456508887074, "learning_rate": 5.881494454173965e-07, "loss": 0.9991, "step": 403 }, { "epoch": 0.011795276050334296, "grad_norm": 0.9419988423339117, "learning_rate": 5.896088733216579e-07, "loss": 0.8772, "step": 404 }, { "epoch": 0.011824472278181659, "grad_norm": 0.9464939014488419, "learning_rate": 5.910683012259195e-07, "loss": 0.9408, "step": 405 }, { "epoch": 0.01185366850602902, "grad_norm": 1.4074834089493373, "learning_rate": 5.92527729130181e-07, "loss": 1.0632, "step": 406 }, { "epoch": 0.011882864733876383, "grad_norm": 1.16972833129566, "learning_rate": 5.939871570344425e-07, "loss": 1.0606, "step": 407 }, { "epoch": 0.011912060961723746, "grad_norm": 0.9474824787662015, "learning_rate": 5.954465849387041e-07, "loss": 0.8878, "step": 408 }, { "epoch": 0.011941257189571107, "grad_norm": 1.1183998523745582, "learning_rate": 5.969060128429656e-07, "loss": 1.0947, "step": 409 }, { "epoch": 0.01197045341741847, "grad_norm": 1.9684938466070077, "learning_rate": 5.983654407472271e-07, "loss": 1.0781, "step": 410 }, { "epoch": 0.011999649645265831, "grad_norm": 1.1179371101504902, "learning_rate": 5.998248686514887e-07, "loss": 1.1253, "step": 411 }, { "epoch": 0.012028845873113194, "grad_norm": 1.0191261250015278, "learning_rate": 6.012842965557502e-07, "loss": 0.9767, "step": 412 }, { "epoch": 0.012058042100960556, "grad_norm": 0.971650473010985, "learning_rate": 6.027437244600117e-07, "loss": 0.9367, "step": 413 }, { "epoch": 0.012087238328807919, "grad_norm": 1.0074422536734369, "learning_rate": 6.042031523642733e-07, "loss": 0.9886, "step": 414 }, { "epoch": 0.01211643455665528, "grad_norm": 1.2529614797047117, "learning_rate": 6.056625802685348e-07, "loss": 1.0412, "step": 415 }, { "epoch": 0.012145630784502643, "grad_norm": 1.1004315973051655, "learning_rate": 6.071220081727963e-07, "loss": 1.0452, "step": 416 }, { "epoch": 0.012174827012350004, "grad_norm": 1.0283988391711432, "learning_rate": 6.085814360770579e-07, "loss": 1.0039, "step": 417 }, { "epoch": 0.012204023240197367, "grad_norm": 1.0072338869130961, "learning_rate": 6.100408639813193e-07, "loss": 0.9097, "step": 418 }, { "epoch": 0.012233219468044728, "grad_norm": 1.0318740244535547, "learning_rate": 6.115002918855809e-07, "loss": 1.0481, "step": 419 }, { "epoch": 0.012262415695892091, "grad_norm": 1.071473444821434, "learning_rate": 6.129597197898424e-07, "loss": 1.1232, "step": 420 }, { "epoch": 0.012291611923739452, "grad_norm": 1.1105977347374845, "learning_rate": 6.144191476941039e-07, "loss": 0.9668, "step": 421 }, { "epoch": 0.012320808151586815, "grad_norm": 1.060483048469006, "learning_rate": 6.158785755983655e-07, "loss": 0.9432, "step": 422 }, { "epoch": 0.012350004379434176, "grad_norm": 1.2159645531270156, "learning_rate": 6.17338003502627e-07, "loss": 1.0102, "step": 423 }, { "epoch": 0.01237920060728154, "grad_norm": 1.0351930290862394, "learning_rate": 6.187974314068886e-07, "loss": 0.9059, "step": 424 }, { "epoch": 0.012408396835128902, "grad_norm": 1.2232173981726773, "learning_rate": 6.2025685931115e-07, "loss": 1.0173, "step": 425 }, { "epoch": 0.012437593062976263, "grad_norm": 0.9525523944457099, "learning_rate": 6.217162872154116e-07, "loss": 0.8737, "step": 426 }, { "epoch": 0.012466789290823626, "grad_norm": 0.9674589152120592, "learning_rate": 6.231757151196732e-07, "loss": 0.917, "step": 427 }, { "epoch": 0.012495985518670987, "grad_norm": 1.011675580611335, "learning_rate": 6.246351430239346e-07, "loss": 0.975, "step": 428 }, { "epoch": 0.01252518174651835, "grad_norm": 1.0781563298522503, "learning_rate": 6.260945709281963e-07, "loss": 1.0181, "step": 429 }, { "epoch": 0.012554377974365712, "grad_norm": 1.283101507736084, "learning_rate": 6.275539988324578e-07, "loss": 0.9459, "step": 430 }, { "epoch": 0.012583574202213075, "grad_norm": 1.0729069968324891, "learning_rate": 6.290134267367193e-07, "loss": 1.1414, "step": 431 }, { "epoch": 0.012612770430060436, "grad_norm": 0.9619145732141406, "learning_rate": 6.304728546409808e-07, "loss": 0.9113, "step": 432 }, { "epoch": 0.012641966657907799, "grad_norm": 1.0332662229658207, "learning_rate": 6.319322825452423e-07, "loss": 0.9665, "step": 433 }, { "epoch": 0.01267116288575516, "grad_norm": 1.0429428320279523, "learning_rate": 6.333917104495038e-07, "loss": 0.9454, "step": 434 }, { "epoch": 0.012700359113602523, "grad_norm": 0.9448967270751972, "learning_rate": 6.348511383537654e-07, "loss": 0.939, "step": 435 }, { "epoch": 0.012729555341449884, "grad_norm": 0.9736327663186927, "learning_rate": 6.363105662580268e-07, "loss": 0.8606, "step": 436 }, { "epoch": 0.012758751569297247, "grad_norm": 1.0803151050489725, "learning_rate": 6.377699941622885e-07, "loss": 0.8617, "step": 437 }, { "epoch": 0.012787947797144608, "grad_norm": 0.9255215594559869, "learning_rate": 6.3922942206655e-07, "loss": 0.9292, "step": 438 }, { "epoch": 0.012817144024991971, "grad_norm": 1.282087410084423, "learning_rate": 6.406888499708115e-07, "loss": 1.04, "step": 439 }, { "epoch": 0.012846340252839332, "grad_norm": 1.04987319994478, "learning_rate": 6.42148277875073e-07, "loss": 0.9162, "step": 440 }, { "epoch": 0.012875536480686695, "grad_norm": 0.9670280694256389, "learning_rate": 6.436077057793345e-07, "loss": 0.8877, "step": 441 }, { "epoch": 0.012904732708534058, "grad_norm": 0.9758770097207145, "learning_rate": 6.45067133683596e-07, "loss": 1.0419, "step": 442 }, { "epoch": 0.01293392893638142, "grad_norm": 1.1490210919160961, "learning_rate": 6.465265615878576e-07, "loss": 1.0546, "step": 443 }, { "epoch": 0.012963125164228782, "grad_norm": 1.0817216884387928, "learning_rate": 6.47985989492119e-07, "loss": 0.8676, "step": 444 }, { "epoch": 0.012992321392076144, "grad_norm": 1.0631717018931848, "learning_rate": 6.494454173963807e-07, "loss": 1.1333, "step": 445 }, { "epoch": 0.013021517619923506, "grad_norm": 1.0985022239229683, "learning_rate": 6.509048453006423e-07, "loss": 1.0301, "step": 446 }, { "epoch": 0.013050713847770868, "grad_norm": 1.2056037073914143, "learning_rate": 6.523642732049037e-07, "loss": 0.8903, "step": 447 }, { "epoch": 0.01307991007561823, "grad_norm": 1.020691630568236, "learning_rate": 6.538237011091653e-07, "loss": 0.9908, "step": 448 }, { "epoch": 0.013109106303465592, "grad_norm": 1.02166670040501, "learning_rate": 6.552831290134267e-07, "loss": 0.9807, "step": 449 }, { "epoch": 0.013138302531312955, "grad_norm": 1.0234445832354666, "learning_rate": 6.567425569176883e-07, "loss": 1.0313, "step": 450 }, { "epoch": 0.013167498759160316, "grad_norm": 1.0616762786306224, "learning_rate": 6.582019848219498e-07, "loss": 0.9333, "step": 451 }, { "epoch": 0.013196694987007679, "grad_norm": 1.0872896526062756, "learning_rate": 6.596614127262113e-07, "loss": 0.9228, "step": 452 }, { "epoch": 0.01322589121485504, "grad_norm": 0.9471484787455453, "learning_rate": 6.61120840630473e-07, "loss": 1.0288, "step": 453 }, { "epoch": 0.013255087442702403, "grad_norm": 1.0682789970223885, "learning_rate": 6.625802685347345e-07, "loss": 1.0403, "step": 454 }, { "epoch": 0.013284283670549764, "grad_norm": 1.0084025716426321, "learning_rate": 6.64039696438996e-07, "loss": 0.9768, "step": 455 }, { "epoch": 0.013313479898397127, "grad_norm": 1.0051858122158743, "learning_rate": 6.654991243432575e-07, "loss": 0.9783, "step": 456 }, { "epoch": 0.01334267612624449, "grad_norm": 1.3551442857416829, "learning_rate": 6.66958552247519e-07, "loss": 1.1458, "step": 457 }, { "epoch": 0.013371872354091851, "grad_norm": 1.0508192923145816, "learning_rate": 6.684179801517805e-07, "loss": 0.9415, "step": 458 }, { "epoch": 0.013401068581939214, "grad_norm": 1.00826958255661, "learning_rate": 6.698774080560421e-07, "loss": 0.9635, "step": 459 }, { "epoch": 0.013430264809786575, "grad_norm": 1.1361197206822928, "learning_rate": 6.713368359603035e-07, "loss": 1.0513, "step": 460 }, { "epoch": 0.013459461037633938, "grad_norm": 1.1524619836648498, "learning_rate": 6.727962638645652e-07, "loss": 0.9866, "step": 461 }, { "epoch": 0.0134886572654813, "grad_norm": 0.9834620156342211, "learning_rate": 6.742556917688268e-07, "loss": 0.9328, "step": 462 }, { "epoch": 0.013517853493328662, "grad_norm": 2.1541761346010615, "learning_rate": 6.757151196730882e-07, "loss": 1.0398, "step": 463 }, { "epoch": 0.013547049721176024, "grad_norm": 1.0011346726580213, "learning_rate": 6.771745475773498e-07, "loss": 0.9271, "step": 464 }, { "epoch": 0.013576245949023387, "grad_norm": 0.9257032137333902, "learning_rate": 6.786339754816112e-07, "loss": 0.9164, "step": 465 }, { "epoch": 0.013605442176870748, "grad_norm": 1.0519626355519078, "learning_rate": 6.800934033858728e-07, "loss": 0.9493, "step": 466 }, { "epoch": 0.01363463840471811, "grad_norm": 1.0142645665044219, "learning_rate": 6.815528312901343e-07, "loss": 0.9909, "step": 467 }, { "epoch": 0.013663834632565472, "grad_norm": 1.0457169612495973, "learning_rate": 6.830122591943958e-07, "loss": 0.9969, "step": 468 }, { "epoch": 0.013693030860412835, "grad_norm": 1.29825675034561, "learning_rate": 6.844716870986574e-07, "loss": 1.1017, "step": 469 }, { "epoch": 0.013722227088260196, "grad_norm": 0.944600605018516, "learning_rate": 6.85931115002919e-07, "loss": 1.0192, "step": 470 }, { "epoch": 0.013751423316107559, "grad_norm": 0.921678709115802, "learning_rate": 6.873905429071805e-07, "loss": 0.9745, "step": 471 }, { "epoch": 0.01378061954395492, "grad_norm": 1.6900390175482543, "learning_rate": 6.88849970811442e-07, "loss": 0.9189, "step": 472 }, { "epoch": 0.013809815771802283, "grad_norm": 0.953796926761886, "learning_rate": 6.903093987157035e-07, "loss": 0.979, "step": 473 }, { "epoch": 0.013839011999649646, "grad_norm": 1.0372569426143665, "learning_rate": 6.91768826619965e-07, "loss": 1.0536, "step": 474 }, { "epoch": 0.013868208227497007, "grad_norm": 1.4653671676252302, "learning_rate": 6.932282545242266e-07, "loss": 0.9393, "step": 475 }, { "epoch": 0.01389740445534437, "grad_norm": 1.0248928820797114, "learning_rate": 6.94687682428488e-07, "loss": 1.0236, "step": 476 }, { "epoch": 0.013926600683191731, "grad_norm": 1.0101765312421553, "learning_rate": 6.961471103327497e-07, "loss": 0.9684, "step": 477 }, { "epoch": 0.013955796911039094, "grad_norm": 1.0223922289584237, "learning_rate": 6.976065382370111e-07, "loss": 1.062, "step": 478 }, { "epoch": 0.013984993138886456, "grad_norm": 1.8976873663278442, "learning_rate": 6.990659661412727e-07, "loss": 0.9123, "step": 479 }, { "epoch": 0.014014189366733818, "grad_norm": 0.9261047087965948, "learning_rate": 7.005253940455342e-07, "loss": 0.9203, "step": 480 }, { "epoch": 0.01404338559458118, "grad_norm": 1.0298465368518877, "learning_rate": 7.019848219497957e-07, "loss": 0.959, "step": 481 }, { "epoch": 0.014072581822428543, "grad_norm": 1.0900045402730112, "learning_rate": 7.034442498540572e-07, "loss": 0.8711, "step": 482 }, { "epoch": 0.014101778050275904, "grad_norm": 1.1572843443569334, "learning_rate": 7.049036777583188e-07, "loss": 1.0247, "step": 483 }, { "epoch": 0.014130974278123267, "grad_norm": 1.8464765523408528, "learning_rate": 7.063631056625802e-07, "loss": 1.0076, "step": 484 }, { "epoch": 0.014160170505970628, "grad_norm": 1.0044316046936415, "learning_rate": 7.078225335668419e-07, "loss": 0.9289, "step": 485 }, { "epoch": 0.014189366733817991, "grad_norm": 0.8952563762138968, "learning_rate": 7.092819614711034e-07, "loss": 0.8947, "step": 486 }, { "epoch": 0.014218562961665352, "grad_norm": 1.0312519016287984, "learning_rate": 7.107413893753649e-07, "loss": 0.9574, "step": 487 }, { "epoch": 0.014247759189512715, "grad_norm": 1.0151260258531285, "learning_rate": 7.122008172796265e-07, "loss": 0.9327, "step": 488 }, { "epoch": 0.014276955417360076, "grad_norm": 0.9681963608668297, "learning_rate": 7.136602451838879e-07, "loss": 0.9798, "step": 489 }, { "epoch": 0.01430615164520744, "grad_norm": 0.8842940714965357, "learning_rate": 7.151196730881495e-07, "loss": 0.907, "step": 490 }, { "epoch": 0.014335347873054802, "grad_norm": 1.1762101869946755, "learning_rate": 7.16579100992411e-07, "loss": 1.019, "step": 491 }, { "epoch": 0.014364544100902163, "grad_norm": 1.1029338483898585, "learning_rate": 7.180385288966725e-07, "loss": 1.0134, "step": 492 }, { "epoch": 0.014393740328749526, "grad_norm": 1.0735868255821717, "learning_rate": 7.194979568009342e-07, "loss": 1.0023, "step": 493 }, { "epoch": 0.014422936556596887, "grad_norm": 1.6677531693413523, "learning_rate": 7.209573847051956e-07, "loss": 1.0372, "step": 494 }, { "epoch": 0.01445213278444425, "grad_norm": 0.9126866107393075, "learning_rate": 7.224168126094572e-07, "loss": 1.0056, "step": 495 }, { "epoch": 0.014481329012291612, "grad_norm": 0.9474054242341506, "learning_rate": 7.238762405137187e-07, "loss": 0.9796, "step": 496 }, { "epoch": 0.014510525240138974, "grad_norm": 1.232654202876696, "learning_rate": 7.253356684179802e-07, "loss": 0.8615, "step": 497 }, { "epoch": 0.014539721467986336, "grad_norm": 1.0879609729018762, "learning_rate": 7.267950963222417e-07, "loss": 0.9412, "step": 498 }, { "epoch": 0.014568917695833699, "grad_norm": 0.9439081410620792, "learning_rate": 7.282545242265033e-07, "loss": 0.9913, "step": 499 }, { "epoch": 0.01459811392368106, "grad_norm": 0.9235001462443507, "learning_rate": 7.297139521307647e-07, "loss": 0.9013, "step": 500 }, { "epoch": 0.014627310151528423, "grad_norm": 1.0870861821377533, "learning_rate": 7.311733800350264e-07, "loss": 1.1125, "step": 501 }, { "epoch": 0.014656506379375784, "grad_norm": 0.9001382138971026, "learning_rate": 7.326328079392879e-07, "loss": 0.9608, "step": 502 }, { "epoch": 0.014685702607223147, "grad_norm": 1.3360981002989405, "learning_rate": 7.340922358435494e-07, "loss": 1.0434, "step": 503 }, { "epoch": 0.014714898835070508, "grad_norm": 0.9387919312875368, "learning_rate": 7.35551663747811e-07, "loss": 0.9963, "step": 504 }, { "epoch": 0.014744095062917871, "grad_norm": 1.001375937213386, "learning_rate": 7.370110916520724e-07, "loss": 1.0491, "step": 505 }, { "epoch": 0.014773291290765234, "grad_norm": 1.2969708218988332, "learning_rate": 7.38470519556334e-07, "loss": 0.9137, "step": 506 }, { "epoch": 0.014802487518612595, "grad_norm": 1.2443345558748256, "learning_rate": 7.399299474605954e-07, "loss": 0.9744, "step": 507 }, { "epoch": 0.014831683746459958, "grad_norm": 0.9632419620468704, "learning_rate": 7.41389375364857e-07, "loss": 0.9577, "step": 508 }, { "epoch": 0.01486087997430732, "grad_norm": 0.9597369558563201, "learning_rate": 7.428488032691186e-07, "loss": 1.0195, "step": 509 }, { "epoch": 0.014890076202154682, "grad_norm": 0.9214323302573877, "learning_rate": 7.443082311733801e-07, "loss": 0.9666, "step": 510 }, { "epoch": 0.014919272430002043, "grad_norm": 0.9059005661404252, "learning_rate": 7.457676590776416e-07, "loss": 0.863, "step": 511 }, { "epoch": 0.014948468657849406, "grad_norm": 0.9343251213743639, "learning_rate": 7.472270869819032e-07, "loss": 0.94, "step": 512 }, { "epoch": 0.014977664885696768, "grad_norm": 0.9651295727864517, "learning_rate": 7.486865148861646e-07, "loss": 0.9649, "step": 513 }, { "epoch": 0.01500686111354413, "grad_norm": 1.1382711417903295, "learning_rate": 7.501459427904262e-07, "loss": 0.9739, "step": 514 }, { "epoch": 0.015036057341391492, "grad_norm": 1.2569190847167204, "learning_rate": 7.516053706946876e-07, "loss": 1.0345, "step": 515 }, { "epoch": 0.015065253569238855, "grad_norm": 1.0158430872470274, "learning_rate": 7.530647985989492e-07, "loss": 0.8834, "step": 516 }, { "epoch": 0.015094449797086216, "grad_norm": 1.0404241929714637, "learning_rate": 7.545242265032109e-07, "loss": 0.9791, "step": 517 }, { "epoch": 0.015123646024933579, "grad_norm": 0.8615533775908952, "learning_rate": 7.559836544074723e-07, "loss": 0.9323, "step": 518 }, { "epoch": 0.01515284225278094, "grad_norm": 1.0075190050084157, "learning_rate": 7.574430823117339e-07, "loss": 1.0392, "step": 519 }, { "epoch": 0.015182038480628303, "grad_norm": 1.1768693427174137, "learning_rate": 7.589025102159954e-07, "loss": 0.9424, "step": 520 }, { "epoch": 0.015211234708475664, "grad_norm": 1.1229365641030378, "learning_rate": 7.603619381202569e-07, "loss": 0.8955, "step": 521 }, { "epoch": 0.015240430936323027, "grad_norm": 1.048896677493397, "learning_rate": 7.618213660245184e-07, "loss": 0.8515, "step": 522 }, { "epoch": 0.01526962716417039, "grad_norm": 1.01250177678379, "learning_rate": 7.632807939287799e-07, "loss": 0.9758, "step": 523 }, { "epoch": 0.015298823392017751, "grad_norm": 0.900716719044489, "learning_rate": 7.647402218330414e-07, "loss": 0.8718, "step": 524 }, { "epoch": 0.015328019619865114, "grad_norm": 0.9067618749940094, "learning_rate": 7.661996497373031e-07, "loss": 0.8606, "step": 525 }, { "epoch": 0.015357215847712475, "grad_norm": 0.9484817357220738, "learning_rate": 7.676590776415646e-07, "loss": 0.9367, "step": 526 }, { "epoch": 0.015386412075559838, "grad_norm": 0.9974046935458257, "learning_rate": 7.691185055458261e-07, "loss": 0.9683, "step": 527 }, { "epoch": 0.0154156083034072, "grad_norm": 0.8988120439202582, "learning_rate": 7.705779334500877e-07, "loss": 0.9134, "step": 528 }, { "epoch": 0.015444804531254562, "grad_norm": 1.2886366148657435, "learning_rate": 7.720373613543491e-07, "loss": 0.9782, "step": 529 }, { "epoch": 0.015474000759101924, "grad_norm": 0.9175631593785107, "learning_rate": 7.734967892586107e-07, "loss": 0.9131, "step": 530 }, { "epoch": 0.015503196986949287, "grad_norm": 1.1725680453470684, "learning_rate": 7.749562171628721e-07, "loss": 0.9038, "step": 531 }, { "epoch": 0.015532393214796648, "grad_norm": 1.0134285069341502, "learning_rate": 7.764156450671337e-07, "loss": 1.0802, "step": 532 }, { "epoch": 0.01556158944264401, "grad_norm": 0.8749702332370677, "learning_rate": 7.778750729713954e-07, "loss": 0.8956, "step": 533 }, { "epoch": 0.015590785670491372, "grad_norm": 1.0201815864120387, "learning_rate": 7.793345008756568e-07, "loss": 1.0313, "step": 534 }, { "epoch": 0.015619981898338735, "grad_norm": 1.032349565800869, "learning_rate": 7.807939287799184e-07, "loss": 0.9836, "step": 535 }, { "epoch": 0.015649178126186096, "grad_norm": 0.9656324995919591, "learning_rate": 7.822533566841799e-07, "loss": 0.9004, "step": 536 }, { "epoch": 0.015678374354033457, "grad_norm": 0.9316242999908765, "learning_rate": 7.837127845884414e-07, "loss": 0.9727, "step": 537 }, { "epoch": 0.015707570581880822, "grad_norm": 0.910806392343796, "learning_rate": 7.851722124927029e-07, "loss": 0.8605, "step": 538 }, { "epoch": 0.015736766809728183, "grad_norm": 0.8697613609224009, "learning_rate": 7.866316403969644e-07, "loss": 0.9191, "step": 539 }, { "epoch": 0.015765963037575544, "grad_norm": 1.2360606501646492, "learning_rate": 7.880910683012259e-07, "loss": 1.0475, "step": 540 }, { "epoch": 0.01579515926542291, "grad_norm": 0.9066771089706684, "learning_rate": 7.895504962054876e-07, "loss": 0.88, "step": 541 }, { "epoch": 0.01582435549327027, "grad_norm": 0.9287128395953661, "learning_rate": 7.910099241097491e-07, "loss": 0.8989, "step": 542 }, { "epoch": 0.01585355172111763, "grad_norm": 0.8364653219660029, "learning_rate": 7.924693520140106e-07, "loss": 0.7989, "step": 543 }, { "epoch": 0.015882747948964993, "grad_norm": 1.870719535360869, "learning_rate": 7.939287799182722e-07, "loss": 0.8999, "step": 544 }, { "epoch": 0.015911944176812357, "grad_norm": 0.961920369905821, "learning_rate": 7.953882078225336e-07, "loss": 0.9642, "step": 545 }, { "epoch": 0.01594114040465972, "grad_norm": 0.9641343632850389, "learning_rate": 7.968476357267952e-07, "loss": 0.9703, "step": 546 }, { "epoch": 0.01597033663250708, "grad_norm": 0.9073973527629078, "learning_rate": 7.983070636310566e-07, "loss": 0.9145, "step": 547 }, { "epoch": 0.01599953286035444, "grad_norm": 1.0307446084872047, "learning_rate": 7.997664915353182e-07, "loss": 0.9725, "step": 548 }, { "epoch": 0.016028729088201805, "grad_norm": 1.0403460812620637, "learning_rate": 8.012259194395798e-07, "loss": 0.8975, "step": 549 }, { "epoch": 0.016057925316049167, "grad_norm": 0.9017486512669334, "learning_rate": 8.026853473438413e-07, "loss": 0.9138, "step": 550 }, { "epoch": 0.016087121543896528, "grad_norm": 1.0275504640963373, "learning_rate": 8.041447752481028e-07, "loss": 0.9061, "step": 551 }, { "epoch": 0.01611631777174389, "grad_norm": 0.8984118395101998, "learning_rate": 8.056042031523644e-07, "loss": 0.9585, "step": 552 }, { "epoch": 0.016145513999591254, "grad_norm": 0.9744880348666961, "learning_rate": 8.070636310566258e-07, "loss": 0.9589, "step": 553 }, { "epoch": 0.016174710227438615, "grad_norm": 1.0147261961738399, "learning_rate": 8.085230589608874e-07, "loss": 0.8668, "step": 554 }, { "epoch": 0.016203906455285976, "grad_norm": 0.9024245303539123, "learning_rate": 8.099824868651488e-07, "loss": 0.9551, "step": 555 }, { "epoch": 0.01623310268313334, "grad_norm": 1.6914987492973785, "learning_rate": 8.114419147694104e-07, "loss": 0.9766, "step": 556 }, { "epoch": 0.016262298910980702, "grad_norm": 1.04871407477542, "learning_rate": 8.129013426736721e-07, "loss": 1.0921, "step": 557 }, { "epoch": 0.016291495138828063, "grad_norm": 0.9107813991069876, "learning_rate": 8.143607705779335e-07, "loss": 0.9042, "step": 558 }, { "epoch": 0.016320691366675424, "grad_norm": 0.9827418326576515, "learning_rate": 8.158201984821951e-07, "loss": 0.9328, "step": 559 }, { "epoch": 0.01634988759452279, "grad_norm": 0.9105904427518391, "learning_rate": 8.172796263864565e-07, "loss": 0.9983, "step": 560 }, { "epoch": 0.01637908382237015, "grad_norm": 1.0240955159790681, "learning_rate": 8.187390542907181e-07, "loss": 0.9887, "step": 561 }, { "epoch": 0.01640828005021751, "grad_norm": 1.2269792377177715, "learning_rate": 8.201984821949796e-07, "loss": 0.9602, "step": 562 }, { "epoch": 0.016437476278064873, "grad_norm": 2.182804831980973, "learning_rate": 8.216579100992411e-07, "loss": 0.9377, "step": 563 }, { "epoch": 0.016466672505912237, "grad_norm": 0.9301861038160334, "learning_rate": 8.231173380035026e-07, "loss": 1.0411, "step": 564 }, { "epoch": 0.0164958687337596, "grad_norm": 1.076196658643254, "learning_rate": 8.245767659077643e-07, "loss": 0.8877, "step": 565 }, { "epoch": 0.01652506496160696, "grad_norm": 1.043765560330972, "learning_rate": 8.260361938120258e-07, "loss": 0.9645, "step": 566 }, { "epoch": 0.01655426118945432, "grad_norm": 0.829585926937152, "learning_rate": 8.274956217162873e-07, "loss": 0.8451, "step": 567 }, { "epoch": 0.016583457417301686, "grad_norm": 0.9409252576554217, "learning_rate": 8.289550496205489e-07, "loss": 0.9504, "step": 568 }, { "epoch": 0.016612653645149047, "grad_norm": 1.0392651886851578, "learning_rate": 8.304144775248103e-07, "loss": 0.9475, "step": 569 }, { "epoch": 0.016641849872996408, "grad_norm": 0.956844949017766, "learning_rate": 8.318739054290719e-07, "loss": 0.8543, "step": 570 }, { "epoch": 0.016671046100843773, "grad_norm": 0.8968958549699447, "learning_rate": 8.333333333333333e-07, "loss": 0.9085, "step": 571 }, { "epoch": 0.016700242328691134, "grad_norm": 1.1075812698670415, "learning_rate": 8.347927612375949e-07, "loss": 0.9988, "step": 572 }, { "epoch": 0.016729438556538495, "grad_norm": 0.9550737140436142, "learning_rate": 8.362521891418566e-07, "loss": 0.9412, "step": 573 }, { "epoch": 0.016758634784385856, "grad_norm": 0.8190792289686893, "learning_rate": 8.37711617046118e-07, "loss": 0.8885, "step": 574 }, { "epoch": 0.01678783101223322, "grad_norm": 1.0744823967549342, "learning_rate": 8.391710449503796e-07, "loss": 1.0655, "step": 575 }, { "epoch": 0.016817027240080582, "grad_norm": 1.2605669377428164, "learning_rate": 8.40630472854641e-07, "loss": 0.8736, "step": 576 }, { "epoch": 0.016846223467927943, "grad_norm": 0.9816942920628786, "learning_rate": 8.420899007589026e-07, "loss": 0.9501, "step": 577 }, { "epoch": 0.016875419695775305, "grad_norm": 0.8647556755723881, "learning_rate": 8.435493286631641e-07, "loss": 0.8464, "step": 578 }, { "epoch": 0.01690461592362267, "grad_norm": 1.5223984817743756, "learning_rate": 8.450087565674256e-07, "loss": 0.8537, "step": 579 }, { "epoch": 0.01693381215147003, "grad_norm": 0.9148591984156357, "learning_rate": 8.464681844716871e-07, "loss": 0.9864, "step": 580 }, { "epoch": 0.01696300837931739, "grad_norm": 0.9434675731133492, "learning_rate": 8.479276123759488e-07, "loss": 0.9035, "step": 581 }, { "epoch": 0.016992204607164753, "grad_norm": 0.9983702395277725, "learning_rate": 8.493870402802102e-07, "loss": 1.0158, "step": 582 }, { "epoch": 0.017021400835012118, "grad_norm": 0.9621927687983093, "learning_rate": 8.508464681844718e-07, "loss": 0.9385, "step": 583 }, { "epoch": 0.01705059706285948, "grad_norm": 0.9532167289563561, "learning_rate": 8.523058960887332e-07, "loss": 0.9771, "step": 584 }, { "epoch": 0.01707979329070684, "grad_norm": 0.8774852591349754, "learning_rate": 8.537653239929948e-07, "loss": 0.9059, "step": 585 }, { "epoch": 0.0171089895185542, "grad_norm": 0.9560965179944739, "learning_rate": 8.552247518972564e-07, "loss": 0.9291, "step": 586 }, { "epoch": 0.017138185746401566, "grad_norm": 0.9883460536838788, "learning_rate": 8.566841798015178e-07, "loss": 0.8676, "step": 587 }, { "epoch": 0.017167381974248927, "grad_norm": 1.0959820102175328, "learning_rate": 8.581436077057794e-07, "loss": 1.0552, "step": 588 }, { "epoch": 0.017196578202096288, "grad_norm": 1.0343941651071815, "learning_rate": 8.59603035610041e-07, "loss": 0.8711, "step": 589 }, { "epoch": 0.017225774429943653, "grad_norm": 1.0680829458751375, "learning_rate": 8.610624635143025e-07, "loss": 0.9277, "step": 590 }, { "epoch": 0.017254970657791014, "grad_norm": 1.087654711686053, "learning_rate": 8.62521891418564e-07, "loss": 0.94, "step": 591 }, { "epoch": 0.017284166885638375, "grad_norm": 1.2138422981323944, "learning_rate": 8.639813193228255e-07, "loss": 0.8268, "step": 592 }, { "epoch": 0.017313363113485736, "grad_norm": 0.9303392942214562, "learning_rate": 8.65440747227087e-07, "loss": 0.9778, "step": 593 }, { "epoch": 0.0173425593413331, "grad_norm": 1.1369229797855993, "learning_rate": 8.669001751313486e-07, "loss": 1.0121, "step": 594 }, { "epoch": 0.017371755569180462, "grad_norm": 0.958197680597147, "learning_rate": 8.6835960303561e-07, "loss": 0.8604, "step": 595 }, { "epoch": 0.017400951797027824, "grad_norm": 0.8297376441058147, "learning_rate": 8.698190309398716e-07, "loss": 0.8345, "step": 596 }, { "epoch": 0.017430148024875185, "grad_norm": 0.9375460842588993, "learning_rate": 8.712784588441333e-07, "loss": 0.8382, "step": 597 }, { "epoch": 0.01745934425272255, "grad_norm": 0.9638320505336396, "learning_rate": 8.727378867483947e-07, "loss": 0.8561, "step": 598 }, { "epoch": 0.01748854048056991, "grad_norm": 0.8833501850081352, "learning_rate": 8.741973146526563e-07, "loss": 0.8465, "step": 599 }, { "epoch": 0.017517736708417272, "grad_norm": 1.0028123947766991, "learning_rate": 8.756567425569177e-07, "loss": 0.8999, "step": 600 }, { "epoch": 0.017546932936264633, "grad_norm": 1.0032275019732546, "learning_rate": 8.771161704611793e-07, "loss": 0.9453, "step": 601 }, { "epoch": 0.017576129164111998, "grad_norm": 0.860334155853473, "learning_rate": 8.785755983654408e-07, "loss": 0.8141, "step": 602 }, { "epoch": 0.01760532539195936, "grad_norm": 1.0903618702776883, "learning_rate": 8.800350262697023e-07, "loss": 0.7793, "step": 603 }, { "epoch": 0.01763452161980672, "grad_norm": 1.0402516080659123, "learning_rate": 8.814944541739638e-07, "loss": 0.9744, "step": 604 }, { "epoch": 0.017663717847654085, "grad_norm": 1.0662444448231398, "learning_rate": 8.829538820782255e-07, "loss": 1.0521, "step": 605 }, { "epoch": 0.017692914075501446, "grad_norm": 0.8620161454045691, "learning_rate": 8.84413309982487e-07, "loss": 0.913, "step": 606 }, { "epoch": 0.017722110303348807, "grad_norm": 0.9482700796714791, "learning_rate": 8.858727378867485e-07, "loss": 0.9248, "step": 607 }, { "epoch": 0.01775130653119617, "grad_norm": 0.8119599420193505, "learning_rate": 8.8733216579101e-07, "loss": 0.7626, "step": 608 }, { "epoch": 0.017780502759043533, "grad_norm": 1.1684033650737007, "learning_rate": 8.887915936952715e-07, "loss": 0.8083, "step": 609 }, { "epoch": 0.017809698986890894, "grad_norm": 0.9550238142996531, "learning_rate": 8.902510215995331e-07, "loss": 0.9046, "step": 610 }, { "epoch": 0.017838895214738255, "grad_norm": 0.9796049948502098, "learning_rate": 8.917104495037945e-07, "loss": 0.9308, "step": 611 }, { "epoch": 0.017868091442585617, "grad_norm": 1.0618003531898432, "learning_rate": 8.931698774080561e-07, "loss": 0.8906, "step": 612 }, { "epoch": 0.01789728767043298, "grad_norm": 0.9161539195671564, "learning_rate": 8.946293053123178e-07, "loss": 0.921, "step": 613 }, { "epoch": 0.017926483898280342, "grad_norm": 0.8735940934644517, "learning_rate": 8.960887332165792e-07, "loss": 0.9085, "step": 614 }, { "epoch": 0.017955680126127704, "grad_norm": 0.9670567057159766, "learning_rate": 8.975481611208408e-07, "loss": 0.8878, "step": 615 }, { "epoch": 0.017984876353975065, "grad_norm": 0.9354301938340586, "learning_rate": 8.990075890251022e-07, "loss": 0.9447, "step": 616 }, { "epoch": 0.01801407258182243, "grad_norm": 1.2970041592130626, "learning_rate": 9.004670169293638e-07, "loss": 0.9618, "step": 617 }, { "epoch": 0.01804326880966979, "grad_norm": 0.7858249785076592, "learning_rate": 9.019264448336253e-07, "loss": 0.7955, "step": 618 }, { "epoch": 0.018072465037517152, "grad_norm": 0.9558702567226153, "learning_rate": 9.033858727378868e-07, "loss": 0.8988, "step": 619 }, { "epoch": 0.018101661265364517, "grad_norm": 1.065970536616119, "learning_rate": 9.048453006421483e-07, "loss": 0.9491, "step": 620 }, { "epoch": 0.018130857493211878, "grad_norm": 0.8534588006223499, "learning_rate": 9.0630472854641e-07, "loss": 0.9157, "step": 621 }, { "epoch": 0.01816005372105924, "grad_norm": 0.8332195669404772, "learning_rate": 9.077641564506714e-07, "loss": 0.8372, "step": 622 }, { "epoch": 0.0181892499489066, "grad_norm": 1.2011587887128556, "learning_rate": 9.09223584354933e-07, "loss": 0.9944, "step": 623 }, { "epoch": 0.018218446176753965, "grad_norm": 1.1666115179345415, "learning_rate": 9.106830122591944e-07, "loss": 0.9438, "step": 624 }, { "epoch": 0.018247642404601326, "grad_norm": 1.0460675651340938, "learning_rate": 9.12142440163456e-07, "loss": 0.9232, "step": 625 }, { "epoch": 0.018276838632448687, "grad_norm": 0.8223921247887873, "learning_rate": 9.136018680677175e-07, "loss": 0.7982, "step": 626 }, { "epoch": 0.01830603486029605, "grad_norm": 0.8744546033930448, "learning_rate": 9.15061295971979e-07, "loss": 0.8724, "step": 627 }, { "epoch": 0.018335231088143413, "grad_norm": 1.087002600868294, "learning_rate": 9.165207238762405e-07, "loss": 0.993, "step": 628 }, { "epoch": 0.018364427315990774, "grad_norm": 1.305762774841242, "learning_rate": 9.179801517805021e-07, "loss": 0.9179, "step": 629 }, { "epoch": 0.018393623543838136, "grad_norm": 0.9428344451200443, "learning_rate": 9.194395796847637e-07, "loss": 0.872, "step": 630 }, { "epoch": 0.018422819771685497, "grad_norm": 0.9316110471682494, "learning_rate": 9.208990075890252e-07, "loss": 0.8458, "step": 631 }, { "epoch": 0.01845201599953286, "grad_norm": 1.1342399214661403, "learning_rate": 9.223584354932867e-07, "loss": 0.9533, "step": 632 }, { "epoch": 0.018481212227380223, "grad_norm": 0.9537973574332489, "learning_rate": 9.238178633975482e-07, "loss": 0.9454, "step": 633 }, { "epoch": 0.018510408455227584, "grad_norm": 0.9897946319971036, "learning_rate": 9.252772913018098e-07, "loss": 0.8845, "step": 634 }, { "epoch": 0.018539604683074945, "grad_norm": 9.293583666389997, "learning_rate": 9.267367192060712e-07, "loss": 0.8968, "step": 635 }, { "epoch": 0.01856880091092231, "grad_norm": 0.9339724331635584, "learning_rate": 9.281961471103328e-07, "loss": 0.9103, "step": 636 }, { "epoch": 0.01859799713876967, "grad_norm": 0.8914935360402999, "learning_rate": 9.296555750145944e-07, "loss": 0.8679, "step": 637 }, { "epoch": 0.018627193366617032, "grad_norm": 0.8853860018760604, "learning_rate": 9.311150029188559e-07, "loss": 0.8466, "step": 638 }, { "epoch": 0.018656389594464397, "grad_norm": 1.3854220810568065, "learning_rate": 9.325744308231175e-07, "loss": 0.9202, "step": 639 }, { "epoch": 0.018685585822311758, "grad_norm": 0.8950896663701681, "learning_rate": 9.340338587273789e-07, "loss": 0.9423, "step": 640 }, { "epoch": 0.01871478205015912, "grad_norm": 0.9472417298091228, "learning_rate": 9.354932866316405e-07, "loss": 0.8969, "step": 641 }, { "epoch": 0.01874397827800648, "grad_norm": 0.8926299780557725, "learning_rate": 9.36952714535902e-07, "loss": 0.7964, "step": 642 }, { "epoch": 0.018773174505853845, "grad_norm": 0.8439323201075695, "learning_rate": 9.384121424401635e-07, "loss": 0.8703, "step": 643 }, { "epoch": 0.018802370733701206, "grad_norm": 0.8082419544975662, "learning_rate": 9.39871570344425e-07, "loss": 0.7659, "step": 644 }, { "epoch": 0.018831566961548567, "grad_norm": 0.8560653157430064, "learning_rate": 9.413309982486866e-07, "loss": 0.8498, "step": 645 }, { "epoch": 0.01886076318939593, "grad_norm": 1.5749484404076282, "learning_rate": 9.427904261529482e-07, "loss": 0.8922, "step": 646 }, { "epoch": 0.018889959417243293, "grad_norm": 0.9984221569497784, "learning_rate": 9.442498540572097e-07, "loss": 0.9289, "step": 647 }, { "epoch": 0.018919155645090655, "grad_norm": 1.8943295200189987, "learning_rate": 9.457092819614712e-07, "loss": 0.9763, "step": 648 }, { "epoch": 0.018948351872938016, "grad_norm": 0.9404308232479431, "learning_rate": 9.471687098657327e-07, "loss": 0.8866, "step": 649 }, { "epoch": 0.018977548100785377, "grad_norm": 1.1937513510968478, "learning_rate": 9.486281377699943e-07, "loss": 1.1044, "step": 650 }, { "epoch": 0.01900674432863274, "grad_norm": 1.2245927480089513, "learning_rate": 9.500875656742557e-07, "loss": 0.9413, "step": 651 }, { "epoch": 0.019035940556480103, "grad_norm": 0.8621664093122444, "learning_rate": 9.515469935785173e-07, "loss": 0.8826, "step": 652 }, { "epoch": 0.019065136784327464, "grad_norm": 0.8738512478006545, "learning_rate": 9.530064214827788e-07, "loss": 0.7934, "step": 653 }, { "epoch": 0.01909433301217483, "grad_norm": 0.9242565646753126, "learning_rate": 9.544658493870403e-07, "loss": 0.9035, "step": 654 }, { "epoch": 0.01912352924002219, "grad_norm": 0.8780387989274147, "learning_rate": 9.55925277291302e-07, "loss": 0.8808, "step": 655 }, { "epoch": 0.01915272546786955, "grad_norm": 1.3591480746945088, "learning_rate": 9.573847051955634e-07, "loss": 0.8663, "step": 656 }, { "epoch": 0.019181921695716912, "grad_norm": 1.0433618703518301, "learning_rate": 9.588441330998249e-07, "loss": 0.9716, "step": 657 }, { "epoch": 0.019211117923564277, "grad_norm": 1.0345104984430602, "learning_rate": 9.603035610040866e-07, "loss": 0.959, "step": 658 }, { "epoch": 0.019240314151411638, "grad_norm": 1.0459197931875046, "learning_rate": 9.61762988908348e-07, "loss": 0.8961, "step": 659 }, { "epoch": 0.019269510379259, "grad_norm": 0.9777128091550398, "learning_rate": 9.632224168126095e-07, "loss": 0.9295, "step": 660 }, { "epoch": 0.01929870660710636, "grad_norm": 1.0559422480235425, "learning_rate": 9.646818447168711e-07, "loss": 0.9108, "step": 661 }, { "epoch": 0.019327902834953725, "grad_norm": 0.8797607125800575, "learning_rate": 9.661412726211326e-07, "loss": 0.8381, "step": 662 }, { "epoch": 0.019357099062801086, "grad_norm": 0.9813179891138509, "learning_rate": 9.67600700525394e-07, "loss": 0.8612, "step": 663 }, { "epoch": 0.019386295290648448, "grad_norm": 0.8947877804112876, "learning_rate": 9.690601284296557e-07, "loss": 0.9917, "step": 664 }, { "epoch": 0.01941549151849581, "grad_norm": 0.8667706401281686, "learning_rate": 9.705195563339172e-07, "loss": 0.8049, "step": 665 }, { "epoch": 0.019444687746343173, "grad_norm": 0.9837187331770749, "learning_rate": 9.719789842381786e-07, "loss": 0.8566, "step": 666 }, { "epoch": 0.019473883974190535, "grad_norm": 0.8891155066244657, "learning_rate": 9.7343841214244e-07, "loss": 0.8963, "step": 667 }, { "epoch": 0.019503080202037896, "grad_norm": 0.901866598697317, "learning_rate": 9.748978400467018e-07, "loss": 0.8842, "step": 668 }, { "epoch": 0.01953227642988526, "grad_norm": 0.8413334461956423, "learning_rate": 9.763572679509634e-07, "loss": 0.787, "step": 669 }, { "epoch": 0.01956147265773262, "grad_norm": 0.9358982877583891, "learning_rate": 9.77816695855225e-07, "loss": 0.8891, "step": 670 }, { "epoch": 0.019590668885579983, "grad_norm": 0.8980149731171189, "learning_rate": 9.792761237594864e-07, "loss": 0.8476, "step": 671 }, { "epoch": 0.019619865113427344, "grad_norm": 0.9494135932948222, "learning_rate": 9.807355516637478e-07, "loss": 0.91, "step": 672 }, { "epoch": 0.01964906134127471, "grad_norm": 0.9596341596021996, "learning_rate": 9.821949795680095e-07, "loss": 0.9044, "step": 673 }, { "epoch": 0.01967825756912207, "grad_norm": 1.012658653595513, "learning_rate": 9.83654407472271e-07, "loss": 0.9416, "step": 674 }, { "epoch": 0.01970745379696943, "grad_norm": 0.930787520913448, "learning_rate": 9.851138353765324e-07, "loss": 0.8694, "step": 675 }, { "epoch": 0.019736650024816792, "grad_norm": 0.8710779689588801, "learning_rate": 9.865732632807939e-07, "loss": 0.8317, "step": 676 }, { "epoch": 0.019765846252664157, "grad_norm": 0.7783445688925814, "learning_rate": 9.880326911850555e-07, "loss": 0.7269, "step": 677 }, { "epoch": 0.01979504248051152, "grad_norm": 1.8446917886055494, "learning_rate": 9.894921190893172e-07, "loss": 0.9439, "step": 678 }, { "epoch": 0.01982423870835888, "grad_norm": 0.8738417108186906, "learning_rate": 9.909515469935787e-07, "loss": 0.8854, "step": 679 }, { "epoch": 0.01985343493620624, "grad_norm": 0.8543214205315645, "learning_rate": 9.924109748978401e-07, "loss": 0.8043, "step": 680 }, { "epoch": 0.019882631164053605, "grad_norm": 0.8707210862760199, "learning_rate": 9.938704028021016e-07, "loss": 0.8421, "step": 681 }, { "epoch": 0.019911827391900967, "grad_norm": 1.0520340739992933, "learning_rate": 9.953298307063632e-07, "loss": 0.8621, "step": 682 }, { "epoch": 0.019941023619748328, "grad_norm": 0.8469986903913697, "learning_rate": 9.967892586106247e-07, "loss": 0.8416, "step": 683 }, { "epoch": 0.01997021984759569, "grad_norm": 1.022493187288475, "learning_rate": 9.982486865148862e-07, "loss": 0.9275, "step": 684 }, { "epoch": 0.019999416075443054, "grad_norm": 0.8435547220465823, "learning_rate": 9.997081144191478e-07, "loss": 0.8627, "step": 685 }, { "epoch": 0.020028612303290415, "grad_norm": 1.3593936143809517, "learning_rate": 1.0011675423234093e-06, "loss": 1.0536, "step": 686 }, { "epoch": 0.020057808531137776, "grad_norm": 0.9471157972898758, "learning_rate": 1.002626970227671e-06, "loss": 0.8866, "step": 687 }, { "epoch": 0.02008700475898514, "grad_norm": 0.882937660086946, "learning_rate": 1.0040863981319324e-06, "loss": 0.8797, "step": 688 }, { "epoch": 0.020116200986832502, "grad_norm": 0.9820551639734099, "learning_rate": 1.0055458260361939e-06, "loss": 0.9439, "step": 689 }, { "epoch": 0.020145397214679863, "grad_norm": 0.8218345251710526, "learning_rate": 1.0070052539404553e-06, "loss": 0.8167, "step": 690 }, { "epoch": 0.020174593442527224, "grad_norm": 0.8273763759578052, "learning_rate": 1.008464681844717e-06, "loss": 0.842, "step": 691 }, { "epoch": 0.02020378967037459, "grad_norm": 1.4987887847740755, "learning_rate": 1.0099241097489785e-06, "loss": 0.9822, "step": 692 }, { "epoch": 0.02023298589822195, "grad_norm": 0.9278901587709424, "learning_rate": 1.0113835376532401e-06, "loss": 0.9681, "step": 693 }, { "epoch": 0.02026218212606931, "grad_norm": 0.8957971944122517, "learning_rate": 1.0128429655575016e-06, "loss": 0.8405, "step": 694 }, { "epoch": 0.020291378353916673, "grad_norm": 0.853169489148278, "learning_rate": 1.014302393461763e-06, "loss": 0.8673, "step": 695 }, { "epoch": 0.020320574581764037, "grad_norm": 0.918597456024913, "learning_rate": 1.0157618213660245e-06, "loss": 0.9792, "step": 696 }, { "epoch": 0.0203497708096114, "grad_norm": 0.8944541411557917, "learning_rate": 1.0172212492702862e-06, "loss": 0.8696, "step": 697 }, { "epoch": 0.02037896703745876, "grad_norm": 1.4952138502123131, "learning_rate": 1.0186806771745476e-06, "loss": 0.8155, "step": 698 }, { "epoch": 0.02040816326530612, "grad_norm": 0.9768725696214506, "learning_rate": 1.020140105078809e-06, "loss": 0.923, "step": 699 }, { "epoch": 0.020437359493153485, "grad_norm": 0.9495314182959596, "learning_rate": 1.0215995329830706e-06, "loss": 0.8193, "step": 700 }, { "epoch": 0.020466555721000847, "grad_norm": 1.0683851501972232, "learning_rate": 1.0230589608873322e-06, "loss": 0.9156, "step": 701 }, { "epoch": 0.020495751948848208, "grad_norm": 0.8804196547030669, "learning_rate": 1.0245183887915939e-06, "loss": 0.8363, "step": 702 }, { "epoch": 0.020524948176695573, "grad_norm": 0.8478217446959757, "learning_rate": 1.0259778166958554e-06, "loss": 0.8727, "step": 703 }, { "epoch": 0.020554144404542934, "grad_norm": 0.9389611294284755, "learning_rate": 1.0274372446001168e-06, "loss": 0.9172, "step": 704 }, { "epoch": 0.020583340632390295, "grad_norm": 0.952299263519299, "learning_rate": 1.0288966725043783e-06, "loss": 0.8919, "step": 705 }, { "epoch": 0.020612536860237656, "grad_norm": 0.8455310717544973, "learning_rate": 1.03035610040864e-06, "loss": 0.8995, "step": 706 }, { "epoch": 0.02064173308808502, "grad_norm": 0.9241845532558315, "learning_rate": 1.0318155283129014e-06, "loss": 0.9291, "step": 707 }, { "epoch": 0.020670929315932382, "grad_norm": 0.8765281440014949, "learning_rate": 1.0332749562171629e-06, "loss": 0.9257, "step": 708 }, { "epoch": 0.020700125543779743, "grad_norm": 0.955145971300181, "learning_rate": 1.0347343841214245e-06, "loss": 0.8588, "step": 709 }, { "epoch": 0.020729321771627104, "grad_norm": 0.9348977058535806, "learning_rate": 1.036193812025686e-06, "loss": 0.9091, "step": 710 }, { "epoch": 0.02075851799947447, "grad_norm": 0.8489223431597809, "learning_rate": 1.0376532399299477e-06, "loss": 0.8974, "step": 711 }, { "epoch": 0.02078771422732183, "grad_norm": 0.8090501606367461, "learning_rate": 1.0391126678342091e-06, "loss": 0.8578, "step": 712 }, { "epoch": 0.02081691045516919, "grad_norm": 0.8011086934156915, "learning_rate": 1.0405720957384706e-06, "loss": 0.806, "step": 713 }, { "epoch": 0.020846106683016553, "grad_norm": 0.8137772796097364, "learning_rate": 1.042031523642732e-06, "loss": 0.8268, "step": 714 }, { "epoch": 0.020875302910863917, "grad_norm": 0.7644786355980335, "learning_rate": 1.0434909515469937e-06, "loss": 0.811, "step": 715 }, { "epoch": 0.02090449913871128, "grad_norm": 1.046617420001382, "learning_rate": 1.0449503794512552e-06, "loss": 0.9542, "step": 716 }, { "epoch": 0.02093369536655864, "grad_norm": 0.9362580607792192, "learning_rate": 1.0464098073555168e-06, "loss": 0.8956, "step": 717 }, { "epoch": 0.020962891594406004, "grad_norm": 0.8753224021284735, "learning_rate": 1.0478692352597783e-06, "loss": 0.8421, "step": 718 }, { "epoch": 0.020992087822253366, "grad_norm": 1.5814484755724139, "learning_rate": 1.0493286631640397e-06, "loss": 0.771, "step": 719 }, { "epoch": 0.021021284050100727, "grad_norm": 0.7597337651318385, "learning_rate": 1.0507880910683014e-06, "loss": 0.7597, "step": 720 }, { "epoch": 0.021050480277948088, "grad_norm": 0.9504003580372961, "learning_rate": 1.0522475189725629e-06, "loss": 1.0001, "step": 721 }, { "epoch": 0.021079676505795453, "grad_norm": 0.8340999738165629, "learning_rate": 1.0537069468768243e-06, "loss": 0.8012, "step": 722 }, { "epoch": 0.021108872733642814, "grad_norm": 0.8188225644859948, "learning_rate": 1.0551663747810858e-06, "loss": 0.8672, "step": 723 }, { "epoch": 0.021138068961490175, "grad_norm": 0.8122718147296435, "learning_rate": 1.0566258026853475e-06, "loss": 0.8268, "step": 724 }, { "epoch": 0.021167265189337536, "grad_norm": 0.9159267904356725, "learning_rate": 1.058085230589609e-06, "loss": 0.876, "step": 725 }, { "epoch": 0.0211964614171849, "grad_norm": 0.9436371294873437, "learning_rate": 1.0595446584938706e-06, "loss": 0.9242, "step": 726 }, { "epoch": 0.021225657645032262, "grad_norm": 0.9769264910494749, "learning_rate": 1.061004086398132e-06, "loss": 0.8628, "step": 727 }, { "epoch": 0.021254853872879623, "grad_norm": 1.0240044633176166, "learning_rate": 1.0624635143023935e-06, "loss": 0.886, "step": 728 }, { "epoch": 0.021284050100726985, "grad_norm": 1.0613392966245092, "learning_rate": 1.0639229422066552e-06, "loss": 0.9173, "step": 729 }, { "epoch": 0.02131324632857435, "grad_norm": 0.8686016290021031, "learning_rate": 1.0653823701109166e-06, "loss": 0.8592, "step": 730 }, { "epoch": 0.02134244255642171, "grad_norm": 0.9032287319719722, "learning_rate": 1.066841798015178e-06, "loss": 0.8536, "step": 731 }, { "epoch": 0.02137163878426907, "grad_norm": 0.8188159048124635, "learning_rate": 1.0683012259194395e-06, "loss": 0.7981, "step": 732 }, { "epoch": 0.021400835012116433, "grad_norm": 0.9631880803325652, "learning_rate": 1.0697606538237012e-06, "loss": 0.9092, "step": 733 }, { "epoch": 0.021430031239963798, "grad_norm": 1.23683526116637, "learning_rate": 1.0712200817279627e-06, "loss": 0.9255, "step": 734 }, { "epoch": 0.02145922746781116, "grad_norm": 1.1043967887860193, "learning_rate": 1.0726795096322243e-06, "loss": 0.9002, "step": 735 }, { "epoch": 0.02148842369565852, "grad_norm": 0.9933117928234659, "learning_rate": 1.0741389375364858e-06, "loss": 0.8963, "step": 736 }, { "epoch": 0.021517619923505885, "grad_norm": 0.9629825003018686, "learning_rate": 1.0755983654407473e-06, "loss": 0.8624, "step": 737 }, { "epoch": 0.021546816151353246, "grad_norm": 0.8499928464480829, "learning_rate": 1.0770577933450087e-06, "loss": 0.8285, "step": 738 }, { "epoch": 0.021576012379200607, "grad_norm": 0.8141509352940354, "learning_rate": 1.0785172212492704e-06, "loss": 0.8533, "step": 739 }, { "epoch": 0.021605208607047968, "grad_norm": 0.8683780611896097, "learning_rate": 1.0799766491535318e-06, "loss": 0.8825, "step": 740 }, { "epoch": 0.021634404834895333, "grad_norm": 0.8162439531523372, "learning_rate": 1.0814360770577935e-06, "loss": 0.8299, "step": 741 }, { "epoch": 0.021663601062742694, "grad_norm": 0.8476241741673205, "learning_rate": 1.082895504962055e-06, "loss": 0.838, "step": 742 }, { "epoch": 0.021692797290590055, "grad_norm": 0.8679461015364843, "learning_rate": 1.0843549328663164e-06, "loss": 0.8524, "step": 743 }, { "epoch": 0.021721993518437416, "grad_norm": 0.9480746713880507, "learning_rate": 1.085814360770578e-06, "loss": 0.9099, "step": 744 }, { "epoch": 0.02175118974628478, "grad_norm": 0.9790321558684866, "learning_rate": 1.0872737886748396e-06, "loss": 0.8706, "step": 745 }, { "epoch": 0.021780385974132142, "grad_norm": 1.2076690052572505, "learning_rate": 1.088733216579101e-06, "loss": 0.8012, "step": 746 }, { "epoch": 0.021809582201979504, "grad_norm": 1.2636246476379667, "learning_rate": 1.0901926444833625e-06, "loss": 1.0114, "step": 747 }, { "epoch": 0.021838778429826865, "grad_norm": 0.7884158585327645, "learning_rate": 1.0916520723876242e-06, "loss": 0.7677, "step": 748 }, { "epoch": 0.02186797465767423, "grad_norm": 0.8779058602377598, "learning_rate": 1.0931115002918858e-06, "loss": 0.91, "step": 749 }, { "epoch": 0.02189717088552159, "grad_norm": 0.851529728094876, "learning_rate": 1.0945709281961473e-06, "loss": 0.8526, "step": 750 }, { "epoch": 0.021926367113368952, "grad_norm": 0.9041378863875362, "learning_rate": 1.0960303561004087e-06, "loss": 0.9111, "step": 751 }, { "epoch": 0.021955563341216316, "grad_norm": 0.9051232748959133, "learning_rate": 1.0974897840046702e-06, "loss": 0.9003, "step": 752 }, { "epoch": 0.021984759569063678, "grad_norm": 1.591755337195569, "learning_rate": 1.0989492119089319e-06, "loss": 0.8706, "step": 753 }, { "epoch": 0.02201395579691104, "grad_norm": 0.9160145409929896, "learning_rate": 1.1004086398131933e-06, "loss": 0.9205, "step": 754 }, { "epoch": 0.0220431520247584, "grad_norm": 1.0125333418528826, "learning_rate": 1.1018680677174548e-06, "loss": 0.9523, "step": 755 }, { "epoch": 0.022072348252605765, "grad_norm": 0.8795662598929702, "learning_rate": 1.1033274956217162e-06, "loss": 0.8976, "step": 756 }, { "epoch": 0.022101544480453126, "grad_norm": 0.8775993938275166, "learning_rate": 1.104786923525978e-06, "loss": 0.7752, "step": 757 }, { "epoch": 0.022130740708300487, "grad_norm": 0.9560691881544885, "learning_rate": 1.1062463514302396e-06, "loss": 0.915, "step": 758 }, { "epoch": 0.02215993693614785, "grad_norm": 0.7758732947945064, "learning_rate": 1.107705779334501e-06, "loss": 0.6767, "step": 759 }, { "epoch": 0.022189133163995213, "grad_norm": 0.8581667394547945, "learning_rate": 1.1091652072387625e-06, "loss": 0.822, "step": 760 }, { "epoch": 0.022218329391842574, "grad_norm": 2.1137402158546004, "learning_rate": 1.110624635143024e-06, "loss": 0.9419, "step": 761 }, { "epoch": 0.022247525619689935, "grad_norm": 1.6600674854739572, "learning_rate": 1.1120840630472856e-06, "loss": 0.8096, "step": 762 }, { "epoch": 0.022276721847537297, "grad_norm": 1.0561875344744691, "learning_rate": 1.113543490951547e-06, "loss": 0.9336, "step": 763 }, { "epoch": 0.02230591807538466, "grad_norm": 0.8874782169342967, "learning_rate": 1.1150029188558085e-06, "loss": 0.8747, "step": 764 }, { "epoch": 0.022335114303232022, "grad_norm": 1.045577101138875, "learning_rate": 1.1164623467600702e-06, "loss": 0.8362, "step": 765 }, { "epoch": 0.022364310531079384, "grad_norm": 0.9614720845557505, "learning_rate": 1.1179217746643317e-06, "loss": 0.9245, "step": 766 }, { "epoch": 0.02239350675892675, "grad_norm": 0.8487946585194109, "learning_rate": 1.1193812025685931e-06, "loss": 0.8598, "step": 767 }, { "epoch": 0.02242270298677411, "grad_norm": 0.9338370925032912, "learning_rate": 1.1208406304728548e-06, "loss": 0.8807, "step": 768 }, { "epoch": 0.02245189921462147, "grad_norm": 0.8805210940123328, "learning_rate": 1.1223000583771163e-06, "loss": 0.8149, "step": 769 }, { "epoch": 0.022481095442468832, "grad_norm": 1.0208843435094557, "learning_rate": 1.1237594862813777e-06, "loss": 0.9513, "step": 770 }, { "epoch": 0.022510291670316197, "grad_norm": 1.2027950000564103, "learning_rate": 1.1252189141856392e-06, "loss": 0.9606, "step": 771 }, { "epoch": 0.022539487898163558, "grad_norm": 0.8361295279231411, "learning_rate": 1.1266783420899008e-06, "loss": 0.8112, "step": 772 }, { "epoch": 0.02256868412601092, "grad_norm": 0.8820911976860503, "learning_rate": 1.1281377699941625e-06, "loss": 0.8264, "step": 773 }, { "epoch": 0.02259788035385828, "grad_norm": 0.9383113074109376, "learning_rate": 1.129597197898424e-06, "loss": 0.8876, "step": 774 }, { "epoch": 0.022627076581705645, "grad_norm": 0.8751053402517525, "learning_rate": 1.1310566258026854e-06, "loss": 0.8093, "step": 775 }, { "epoch": 0.022656272809553006, "grad_norm": 0.8325631535708188, "learning_rate": 1.1325160537069469e-06, "loss": 0.8128, "step": 776 }, { "epoch": 0.022685469037400367, "grad_norm": 1.0261577013338188, "learning_rate": 1.1339754816112086e-06, "loss": 0.8582, "step": 777 }, { "epoch": 0.02271466526524773, "grad_norm": 0.889877492676451, "learning_rate": 1.13543490951547e-06, "loss": 0.8986, "step": 778 }, { "epoch": 0.022743861493095093, "grad_norm": 1.1034468207278574, "learning_rate": 1.1368943374197315e-06, "loss": 0.9454, "step": 779 }, { "epoch": 0.022773057720942454, "grad_norm": 0.7959122780126012, "learning_rate": 1.138353765323993e-06, "loss": 0.7245, "step": 780 }, { "epoch": 0.022802253948789816, "grad_norm": 1.129504916002729, "learning_rate": 1.1398131932282546e-06, "loss": 0.8837, "step": 781 }, { "epoch": 0.022831450176637177, "grad_norm": 1.214142874056995, "learning_rate": 1.1412726211325163e-06, "loss": 1.007, "step": 782 }, { "epoch": 0.02286064640448454, "grad_norm": 0.9028171442870457, "learning_rate": 1.1427320490367777e-06, "loss": 0.9002, "step": 783 }, { "epoch": 0.022889842632331903, "grad_norm": 0.8580524967265937, "learning_rate": 1.1441914769410392e-06, "loss": 0.8366, "step": 784 }, { "epoch": 0.022919038860179264, "grad_norm": 0.8638999005830017, "learning_rate": 1.1456509048453006e-06, "loss": 0.8617, "step": 785 }, { "epoch": 0.02294823508802663, "grad_norm": 0.8778544730676783, "learning_rate": 1.1471103327495623e-06, "loss": 0.9163, "step": 786 }, { "epoch": 0.02297743131587399, "grad_norm": 1.0017866658963892, "learning_rate": 1.1485697606538238e-06, "loss": 0.9642, "step": 787 }, { "epoch": 0.02300662754372135, "grad_norm": 1.0651688230760123, "learning_rate": 1.1500291885580852e-06, "loss": 0.9526, "step": 788 }, { "epoch": 0.023035823771568712, "grad_norm": 0.9421687879538967, "learning_rate": 1.151488616462347e-06, "loss": 0.8725, "step": 789 }, { "epoch": 0.023065019999416077, "grad_norm": 1.4275915270849204, "learning_rate": 1.1529480443666084e-06, "loss": 0.8795, "step": 790 }, { "epoch": 0.023094216227263438, "grad_norm": 0.9723151916901019, "learning_rate": 1.15440747227087e-06, "loss": 0.8829, "step": 791 }, { "epoch": 0.0231234124551108, "grad_norm": 0.822495321885781, "learning_rate": 1.1558669001751315e-06, "loss": 0.9053, "step": 792 }, { "epoch": 0.02315260868295816, "grad_norm": 0.9607523269708933, "learning_rate": 1.157326328079393e-06, "loss": 0.9944, "step": 793 }, { "epoch": 0.023181804910805525, "grad_norm": 0.8166022815471954, "learning_rate": 1.1587857559836544e-06, "loss": 0.7205, "step": 794 }, { "epoch": 0.023211001138652886, "grad_norm": 0.8090427974056267, "learning_rate": 1.160245183887916e-06, "loss": 0.8374, "step": 795 }, { "epoch": 0.023240197366500247, "grad_norm": 0.962624059571664, "learning_rate": 1.1617046117921775e-06, "loss": 0.9096, "step": 796 }, { "epoch": 0.02326939359434761, "grad_norm": 0.9125448089362307, "learning_rate": 1.1631640396964392e-06, "loss": 0.8729, "step": 797 }, { "epoch": 0.023298589822194973, "grad_norm": 0.8392372763587893, "learning_rate": 1.1646234676007007e-06, "loss": 0.8638, "step": 798 }, { "epoch": 0.023327786050042335, "grad_norm": 0.8507096011467496, "learning_rate": 1.1660828955049621e-06, "loss": 0.8663, "step": 799 }, { "epoch": 0.023356982277889696, "grad_norm": 1.6841834439154537, "learning_rate": 1.1675423234092238e-06, "loss": 0.8942, "step": 800 }, { "epoch": 0.02338617850573706, "grad_norm": 0.9446066925849977, "learning_rate": 1.1690017513134853e-06, "loss": 0.7532, "step": 801 }, { "epoch": 0.02341537473358442, "grad_norm": 0.8736250277391788, "learning_rate": 1.1704611792177467e-06, "loss": 0.9151, "step": 802 }, { "epoch": 0.023444570961431783, "grad_norm": 0.9592571306928414, "learning_rate": 1.1719206071220082e-06, "loss": 0.8644, "step": 803 }, { "epoch": 0.023473767189279144, "grad_norm": 0.9098190577864399, "learning_rate": 1.1733800350262698e-06, "loss": 0.9135, "step": 804 }, { "epoch": 0.02350296341712651, "grad_norm": 0.9637897697020773, "learning_rate": 1.1748394629305313e-06, "loss": 0.8054, "step": 805 }, { "epoch": 0.02353215964497387, "grad_norm": 0.966022839720748, "learning_rate": 1.176298890834793e-06, "loss": 0.8966, "step": 806 }, { "epoch": 0.02356135587282123, "grad_norm": 0.8155583183713129, "learning_rate": 1.1777583187390544e-06, "loss": 0.8344, "step": 807 }, { "epoch": 0.023590552100668592, "grad_norm": 0.8638107859191297, "learning_rate": 1.1792177466433159e-06, "loss": 0.7622, "step": 808 }, { "epoch": 0.023619748328515957, "grad_norm": 1.0081140249974034, "learning_rate": 1.1806771745475773e-06, "loss": 0.86, "step": 809 }, { "epoch": 0.023648944556363318, "grad_norm": 1.502610666439772, "learning_rate": 1.182136602451839e-06, "loss": 0.9554, "step": 810 }, { "epoch": 0.02367814078421068, "grad_norm": 1.1223485261288553, "learning_rate": 1.1835960303561005e-06, "loss": 0.9498, "step": 811 }, { "epoch": 0.02370733701205804, "grad_norm": 0.89554432153813, "learning_rate": 1.185055458260362e-06, "loss": 0.8142, "step": 812 }, { "epoch": 0.023736533239905405, "grad_norm": 0.8039538074508795, "learning_rate": 1.1865148861646236e-06, "loss": 0.8047, "step": 813 }, { "epoch": 0.023765729467752766, "grad_norm": 0.9187936990821521, "learning_rate": 1.187974314068885e-06, "loss": 0.8891, "step": 814 }, { "epoch": 0.023794925695600128, "grad_norm": 0.8200788317860651, "learning_rate": 1.1894337419731467e-06, "loss": 0.8495, "step": 815 }, { "epoch": 0.023824121923447492, "grad_norm": 1.0356434483090453, "learning_rate": 1.1908931698774082e-06, "loss": 0.8807, "step": 816 }, { "epoch": 0.023853318151294853, "grad_norm": 1.0772293891072442, "learning_rate": 1.1923525977816696e-06, "loss": 0.88, "step": 817 }, { "epoch": 0.023882514379142215, "grad_norm": 0.8442149581017581, "learning_rate": 1.193812025685931e-06, "loss": 0.8201, "step": 818 }, { "epoch": 0.023911710606989576, "grad_norm": 0.8769348303517646, "learning_rate": 1.1952714535901928e-06, "loss": 0.8728, "step": 819 }, { "epoch": 0.02394090683483694, "grad_norm": 0.7831620133327762, "learning_rate": 1.1967308814944542e-06, "loss": 0.7094, "step": 820 }, { "epoch": 0.023970103062684302, "grad_norm": 0.7706503386226372, "learning_rate": 1.198190309398716e-06, "loss": 0.6963, "step": 821 }, { "epoch": 0.023999299290531663, "grad_norm": 0.9996429634023838, "learning_rate": 1.1996497373029774e-06, "loss": 0.8506, "step": 822 }, { "epoch": 0.024028495518379024, "grad_norm": 0.9094435361378714, "learning_rate": 1.2011091652072388e-06, "loss": 0.8451, "step": 823 }, { "epoch": 0.02405769174622639, "grad_norm": 0.8035384945337745, "learning_rate": 1.2025685931115005e-06, "loss": 0.8415, "step": 824 }, { "epoch": 0.02408688797407375, "grad_norm": 0.8924556053617732, "learning_rate": 1.204028021015762e-06, "loss": 0.8132, "step": 825 }, { "epoch": 0.02411608420192111, "grad_norm": 0.8876269919419517, "learning_rate": 1.2054874489200234e-06, "loss": 0.8924, "step": 826 }, { "epoch": 0.024145280429768472, "grad_norm": 0.8972247012817639, "learning_rate": 1.2069468768242849e-06, "loss": 0.8705, "step": 827 }, { "epoch": 0.024174476657615837, "grad_norm": 0.8632829216411438, "learning_rate": 1.2084063047285465e-06, "loss": 0.8465, "step": 828 }, { "epoch": 0.0242036728854632, "grad_norm": 0.8524663681417193, "learning_rate": 1.2098657326328082e-06, "loss": 0.8873, "step": 829 }, { "epoch": 0.02423286911331056, "grad_norm": 0.9007457915693096, "learning_rate": 1.2113251605370697e-06, "loss": 0.8195, "step": 830 }, { "epoch": 0.02426206534115792, "grad_norm": 0.8547665760146298, "learning_rate": 1.2127845884413311e-06, "loss": 0.8056, "step": 831 }, { "epoch": 0.024291261569005285, "grad_norm": 0.8958283556084695, "learning_rate": 1.2142440163455926e-06, "loss": 0.8463, "step": 832 }, { "epoch": 0.024320457796852647, "grad_norm": 0.8311906709687678, "learning_rate": 1.2157034442498542e-06, "loss": 0.7837, "step": 833 }, { "epoch": 0.024349654024700008, "grad_norm": 0.9012819490851763, "learning_rate": 1.2171628721541157e-06, "loss": 0.8117, "step": 834 }, { "epoch": 0.024378850252547372, "grad_norm": 0.8834557777319716, "learning_rate": 1.2186223000583772e-06, "loss": 0.9124, "step": 835 }, { "epoch": 0.024408046480394734, "grad_norm": 0.907590371856446, "learning_rate": 1.2200817279626386e-06, "loss": 0.8307, "step": 836 }, { "epoch": 0.024437242708242095, "grad_norm": 0.8519854402317482, "learning_rate": 1.2215411558669003e-06, "loss": 0.9076, "step": 837 }, { "epoch": 0.024466438936089456, "grad_norm": 1.1073765245417553, "learning_rate": 1.2230005837711617e-06, "loss": 0.8815, "step": 838 }, { "epoch": 0.02449563516393682, "grad_norm": 1.1769287781937199, "learning_rate": 1.2244600116754234e-06, "loss": 0.9682, "step": 839 }, { "epoch": 0.024524831391784182, "grad_norm": 1.015094032241832, "learning_rate": 1.2259194395796849e-06, "loss": 0.8891, "step": 840 }, { "epoch": 0.024554027619631543, "grad_norm": 1.2381466230622487, "learning_rate": 1.2273788674839463e-06, "loss": 0.9584, "step": 841 }, { "epoch": 0.024583223847478904, "grad_norm": 0.911751007152446, "learning_rate": 1.2288382953882078e-06, "loss": 0.9022, "step": 842 }, { "epoch": 0.02461242007532627, "grad_norm": 0.8428714204227942, "learning_rate": 1.2302977232924695e-06, "loss": 0.9022, "step": 843 }, { "epoch": 0.02464161630317363, "grad_norm": 0.8855325886879192, "learning_rate": 1.231757151196731e-06, "loss": 0.8876, "step": 844 }, { "epoch": 0.02467081253102099, "grad_norm": 1.0046616261983539, "learning_rate": 1.2332165791009926e-06, "loss": 0.9247, "step": 845 }, { "epoch": 0.024700008758868353, "grad_norm": 0.8676320785160587, "learning_rate": 1.234676007005254e-06, "loss": 0.847, "step": 846 }, { "epoch": 0.024729204986715717, "grad_norm": 0.7936262594607031, "learning_rate": 1.2361354349095155e-06, "loss": 0.7757, "step": 847 }, { "epoch": 0.02475840121456308, "grad_norm": 1.0621419122571893, "learning_rate": 1.2375948628137772e-06, "loss": 0.9728, "step": 848 }, { "epoch": 0.02478759744241044, "grad_norm": 1.2063898319845463, "learning_rate": 1.2390542907180386e-06, "loss": 0.9424, "step": 849 }, { "epoch": 0.024816793670257804, "grad_norm": 1.0701402746287385, "learning_rate": 1.2405137186223e-06, "loss": 0.8261, "step": 850 }, { "epoch": 0.024845989898105166, "grad_norm": 1.0225871800732593, "learning_rate": 1.2419731465265616e-06, "loss": 0.8739, "step": 851 }, { "epoch": 0.024875186125952527, "grad_norm": 0.967138622694859, "learning_rate": 1.2434325744308232e-06, "loss": 0.8446, "step": 852 }, { "epoch": 0.024904382353799888, "grad_norm": 1.0708506173472745, "learning_rate": 1.2448920023350849e-06, "loss": 1.0093, "step": 853 }, { "epoch": 0.024933578581647253, "grad_norm": 0.9547872567716214, "learning_rate": 1.2463514302393464e-06, "loss": 0.9033, "step": 854 }, { "epoch": 0.024962774809494614, "grad_norm": 0.8282227080192944, "learning_rate": 1.2478108581436078e-06, "loss": 0.8055, "step": 855 }, { "epoch": 0.024991971037341975, "grad_norm": 0.9621423615369806, "learning_rate": 1.2492702860478693e-06, "loss": 0.8946, "step": 856 }, { "epoch": 0.025021167265189336, "grad_norm": 1.159132866785922, "learning_rate": 1.250729713952131e-06, "loss": 0.9875, "step": 857 }, { "epoch": 0.0250503634930367, "grad_norm": 0.8441086198974734, "learning_rate": 1.2521891418563926e-06, "loss": 0.8042, "step": 858 }, { "epoch": 0.025079559720884062, "grad_norm": 0.8022841836314109, "learning_rate": 1.253648569760654e-06, "loss": 0.7933, "step": 859 }, { "epoch": 0.025108755948731423, "grad_norm": 0.811246809593128, "learning_rate": 1.2551079976649155e-06, "loss": 0.8762, "step": 860 }, { "epoch": 0.025137952176578784, "grad_norm": 0.9027911229929503, "learning_rate": 1.256567425569177e-06, "loss": 0.8016, "step": 861 }, { "epoch": 0.02516714840442615, "grad_norm": 0.8044940068857735, "learning_rate": 1.2580268534734387e-06, "loss": 0.8553, "step": 862 }, { "epoch": 0.02519634463227351, "grad_norm": 0.8193628160807719, "learning_rate": 1.2594862813777001e-06, "loss": 0.7946, "step": 863 }, { "epoch": 0.02522554086012087, "grad_norm": 0.9473579370195696, "learning_rate": 1.2609457092819616e-06, "loss": 0.8935, "step": 864 }, { "epoch": 0.025254737087968236, "grad_norm": 0.9181552587779485, "learning_rate": 1.262405137186223e-06, "loss": 0.7945, "step": 865 }, { "epoch": 0.025283933315815597, "grad_norm": 1.007233249975577, "learning_rate": 1.2638645650904847e-06, "loss": 0.9029, "step": 866 }, { "epoch": 0.02531312954366296, "grad_norm": 0.9991253469602352, "learning_rate": 1.2653239929947462e-06, "loss": 1.0485, "step": 867 }, { "epoch": 0.02534232577151032, "grad_norm": 0.8446329776218722, "learning_rate": 1.2667834208990076e-06, "loss": 0.8848, "step": 868 }, { "epoch": 0.025371521999357684, "grad_norm": 0.895409694802307, "learning_rate": 1.268242848803269e-06, "loss": 0.7951, "step": 869 }, { "epoch": 0.025400718227205046, "grad_norm": 0.945170728269753, "learning_rate": 1.2697022767075307e-06, "loss": 0.9032, "step": 870 }, { "epoch": 0.025429914455052407, "grad_norm": 0.9201422544916601, "learning_rate": 1.2711617046117922e-06, "loss": 0.8333, "step": 871 }, { "epoch": 0.025459110682899768, "grad_norm": 0.8506011165958938, "learning_rate": 1.2726211325160537e-06, "loss": 0.8339, "step": 872 }, { "epoch": 0.025488306910747133, "grad_norm": 0.8296662789348118, "learning_rate": 1.2740805604203155e-06, "loss": 0.8202, "step": 873 }, { "epoch": 0.025517503138594494, "grad_norm": 0.8146474106708074, "learning_rate": 1.275539988324577e-06, "loss": 0.7806, "step": 874 }, { "epoch": 0.025546699366441855, "grad_norm": 0.8988912837014893, "learning_rate": 1.2769994162288385e-06, "loss": 0.8752, "step": 875 }, { "epoch": 0.025575895594289216, "grad_norm": 0.8192346824773622, "learning_rate": 1.2784588441331e-06, "loss": 0.795, "step": 876 }, { "epoch": 0.02560509182213658, "grad_norm": 0.9325304222974464, "learning_rate": 1.2799182720373616e-06, "loss": 0.8885, "step": 877 }, { "epoch": 0.025634288049983942, "grad_norm": 0.8545823900088786, "learning_rate": 1.281377699941623e-06, "loss": 0.7916, "step": 878 }, { "epoch": 0.025663484277831303, "grad_norm": 0.9655984851824166, "learning_rate": 1.2828371278458845e-06, "loss": 0.8898, "step": 879 }, { "epoch": 0.025692680505678665, "grad_norm": 0.8039087713020069, "learning_rate": 1.284296555750146e-06, "loss": 0.8045, "step": 880 }, { "epoch": 0.02572187673352603, "grad_norm": 0.7402404534701228, "learning_rate": 1.2857559836544076e-06, "loss": 0.67, "step": 881 }, { "epoch": 0.02575107296137339, "grad_norm": 0.8881922159164062, "learning_rate": 1.287215411558669e-06, "loss": 0.8819, "step": 882 }, { "epoch": 0.02578026918922075, "grad_norm": 0.7951392809265634, "learning_rate": 1.2886748394629305e-06, "loss": 0.7411, "step": 883 }, { "epoch": 0.025809465417068116, "grad_norm": 0.8221764673995763, "learning_rate": 1.290134267367192e-06, "loss": 0.8571, "step": 884 }, { "epoch": 0.025838661644915478, "grad_norm": 0.8970179465628704, "learning_rate": 1.2915936952714537e-06, "loss": 0.8054, "step": 885 }, { "epoch": 0.02586785787276284, "grad_norm": 0.8456420258141742, "learning_rate": 1.2930531231757151e-06, "loss": 0.766, "step": 886 }, { "epoch": 0.0258970541006102, "grad_norm": 0.933377094643716, "learning_rate": 1.2945125510799766e-06, "loss": 0.809, "step": 887 }, { "epoch": 0.025926250328457565, "grad_norm": 0.865778429774459, "learning_rate": 1.295971978984238e-06, "loss": 0.7945, "step": 888 }, { "epoch": 0.025955446556304926, "grad_norm": 0.7879498840044862, "learning_rate": 1.2974314068885e-06, "loss": 0.7646, "step": 889 }, { "epoch": 0.025984642784152287, "grad_norm": 0.8135730549075845, "learning_rate": 1.2988908347927614e-06, "loss": 0.7908, "step": 890 }, { "epoch": 0.026013839011999648, "grad_norm": 1.0672308648635773, "learning_rate": 1.300350262697023e-06, "loss": 0.8448, "step": 891 }, { "epoch": 0.026043035239847013, "grad_norm": 0.9318377847856709, "learning_rate": 1.3018096906012845e-06, "loss": 0.8922, "step": 892 }, { "epoch": 0.026072231467694374, "grad_norm": 0.8380220007141336, "learning_rate": 1.303269118505546e-06, "loss": 0.8201, "step": 893 }, { "epoch": 0.026101427695541735, "grad_norm": 0.8190083725559758, "learning_rate": 1.3047285464098074e-06, "loss": 0.792, "step": 894 }, { "epoch": 0.026130623923389096, "grad_norm": 1.1824305318435155, "learning_rate": 1.306187974314069e-06, "loss": 0.9199, "step": 895 }, { "epoch": 0.02615982015123646, "grad_norm": 0.8124255840331391, "learning_rate": 1.3076474022183306e-06, "loss": 0.8529, "step": 896 }, { "epoch": 0.026189016379083822, "grad_norm": 0.9732223921587889, "learning_rate": 1.309106830122592e-06, "loss": 0.8995, "step": 897 }, { "epoch": 0.026218212606931184, "grad_norm": 0.8632866932048355, "learning_rate": 1.3105662580268535e-06, "loss": 0.781, "step": 898 }, { "epoch": 0.026247408834778548, "grad_norm": 1.08700241169093, "learning_rate": 1.3120256859311151e-06, "loss": 0.7243, "step": 899 }, { "epoch": 0.02627660506262591, "grad_norm": 0.8817840436065574, "learning_rate": 1.3134851138353766e-06, "loss": 0.8352, "step": 900 }, { "epoch": 0.02630580129047327, "grad_norm": 1.17473884561396, "learning_rate": 1.314944541739638e-06, "loss": 0.9706, "step": 901 }, { "epoch": 0.026334997518320632, "grad_norm": 0.8411495113534291, "learning_rate": 1.3164039696438995e-06, "loss": 0.8062, "step": 902 }, { "epoch": 0.026364193746167996, "grad_norm": 0.9519070132743697, "learning_rate": 1.3178633975481612e-06, "loss": 0.8431, "step": 903 }, { "epoch": 0.026393389974015358, "grad_norm": 0.834255574669817, "learning_rate": 1.3193228254524227e-06, "loss": 0.8378, "step": 904 }, { "epoch": 0.02642258620186272, "grad_norm": 1.0150679336958779, "learning_rate": 1.3207822533566843e-06, "loss": 0.8726, "step": 905 }, { "epoch": 0.02645178242971008, "grad_norm": 0.830346557483143, "learning_rate": 1.322241681260946e-06, "loss": 0.8301, "step": 906 }, { "epoch": 0.026480978657557445, "grad_norm": 0.8936049909982275, "learning_rate": 1.3237011091652075e-06, "loss": 0.7899, "step": 907 }, { "epoch": 0.026510174885404806, "grad_norm": 0.8563267285729116, "learning_rate": 1.325160537069469e-06, "loss": 0.8845, "step": 908 }, { "epoch": 0.026539371113252167, "grad_norm": 1.4615884910004828, "learning_rate": 1.3266199649737304e-06, "loss": 1.0234, "step": 909 }, { "epoch": 0.02656856734109953, "grad_norm": 0.8076275654995104, "learning_rate": 1.328079392877992e-06, "loss": 0.7289, "step": 910 }, { "epoch": 0.026597763568946893, "grad_norm": 0.788404171907924, "learning_rate": 1.3295388207822535e-06, "loss": 0.7602, "step": 911 }, { "epoch": 0.026626959796794254, "grad_norm": 1.1935946855373212, "learning_rate": 1.330998248686515e-06, "loss": 0.8347, "step": 912 }, { "epoch": 0.026656156024641615, "grad_norm": 0.8193898432742754, "learning_rate": 1.3324576765907764e-06, "loss": 0.86, "step": 913 }, { "epoch": 0.02668535225248898, "grad_norm": 0.9393547931533802, "learning_rate": 1.333917104495038e-06, "loss": 0.9336, "step": 914 }, { "epoch": 0.02671454848033634, "grad_norm": 0.8557543200446571, "learning_rate": 1.3353765323992995e-06, "loss": 0.8497, "step": 915 }, { "epoch": 0.026743744708183703, "grad_norm": 1.131048878636728, "learning_rate": 1.336835960303561e-06, "loss": 0.8752, "step": 916 }, { "epoch": 0.026772940936031064, "grad_norm": 0.9978677085642895, "learning_rate": 1.3382953882078227e-06, "loss": 0.8259, "step": 917 }, { "epoch": 0.02680213716387843, "grad_norm": 0.876995647183348, "learning_rate": 1.3397548161120841e-06, "loss": 0.8751, "step": 918 }, { "epoch": 0.02683133339172579, "grad_norm": 0.8723783435775951, "learning_rate": 1.3412142440163456e-06, "loss": 0.8886, "step": 919 }, { "epoch": 0.02686052961957315, "grad_norm": 0.8463748809506586, "learning_rate": 1.342673671920607e-06, "loss": 0.74, "step": 920 }, { "epoch": 0.026889725847420512, "grad_norm": 0.7675327079041908, "learning_rate": 1.344133099824869e-06, "loss": 0.7395, "step": 921 }, { "epoch": 0.026918922075267877, "grad_norm": 0.9675708358035435, "learning_rate": 1.3455925277291304e-06, "loss": 0.9548, "step": 922 }, { "epoch": 0.026948118303115238, "grad_norm": 0.8258915228939436, "learning_rate": 1.3470519556333918e-06, "loss": 0.8304, "step": 923 }, { "epoch": 0.0269773145309626, "grad_norm": 0.788147494849591, "learning_rate": 1.3485113835376535e-06, "loss": 0.7757, "step": 924 }, { "epoch": 0.02700651075880996, "grad_norm": 0.9069084606052223, "learning_rate": 1.349970811441915e-06, "loss": 0.8554, "step": 925 }, { "epoch": 0.027035706986657325, "grad_norm": 1.0479010221501983, "learning_rate": 1.3514302393461764e-06, "loss": 0.7972, "step": 926 }, { "epoch": 0.027064903214504686, "grad_norm": 0.8370962133165993, "learning_rate": 1.3528896672504379e-06, "loss": 0.8367, "step": 927 }, { "epoch": 0.027094099442352047, "grad_norm": 0.8395672209674335, "learning_rate": 1.3543490951546996e-06, "loss": 0.8466, "step": 928 }, { "epoch": 0.02712329567019941, "grad_norm": 0.8595689747485369, "learning_rate": 1.355808523058961e-06, "loss": 0.8531, "step": 929 }, { "epoch": 0.027152491898046773, "grad_norm": 2.142084040074574, "learning_rate": 1.3572679509632225e-06, "loss": 0.9787, "step": 930 }, { "epoch": 0.027181688125894134, "grad_norm": 0.9223516484455614, "learning_rate": 1.358727378867484e-06, "loss": 0.936, "step": 931 }, { "epoch": 0.027210884353741496, "grad_norm": 0.9880953836187679, "learning_rate": 1.3601868067717456e-06, "loss": 0.8294, "step": 932 }, { "epoch": 0.02724008058158886, "grad_norm": 0.7724197539391184, "learning_rate": 1.361646234676007e-06, "loss": 0.738, "step": 933 }, { "epoch": 0.02726927680943622, "grad_norm": 0.9436847858038951, "learning_rate": 1.3631056625802685e-06, "loss": 0.9406, "step": 934 }, { "epoch": 0.027298473037283583, "grad_norm": 0.8073583181484901, "learning_rate": 1.36456509048453e-06, "loss": 0.7999, "step": 935 }, { "epoch": 0.027327669265130944, "grad_norm": 1.0591117811366224, "learning_rate": 1.3660245183887916e-06, "loss": 0.8929, "step": 936 }, { "epoch": 0.02735686549297831, "grad_norm": 0.8101008340070723, "learning_rate": 1.3674839462930533e-06, "loss": 0.6766, "step": 937 }, { "epoch": 0.02738606172082567, "grad_norm": 0.9220142316760759, "learning_rate": 1.3689433741973148e-06, "loss": 0.88, "step": 938 }, { "epoch": 0.02741525794867303, "grad_norm": 1.0688992177521568, "learning_rate": 1.3704028021015764e-06, "loss": 0.8609, "step": 939 }, { "epoch": 0.027444454176520392, "grad_norm": 0.869774494470012, "learning_rate": 1.371862230005838e-06, "loss": 0.7834, "step": 940 }, { "epoch": 0.027473650404367757, "grad_norm": 0.9425200246316308, "learning_rate": 1.3733216579100994e-06, "loss": 0.8283, "step": 941 }, { "epoch": 0.027502846632215118, "grad_norm": 1.217370160301856, "learning_rate": 1.374781085814361e-06, "loss": 0.971, "step": 942 }, { "epoch": 0.02753204286006248, "grad_norm": 0.8574288110979255, "learning_rate": 1.3762405137186225e-06, "loss": 0.9116, "step": 943 }, { "epoch": 0.02756123908790984, "grad_norm": 0.9145212409786289, "learning_rate": 1.377699941622884e-06, "loss": 0.8157, "step": 944 }, { "epoch": 0.027590435315757205, "grad_norm": 0.8446776213939959, "learning_rate": 1.3791593695271454e-06, "loss": 0.8624, "step": 945 }, { "epoch": 0.027619631543604566, "grad_norm": 0.9130161155647957, "learning_rate": 1.380618797431407e-06, "loss": 0.8431, "step": 946 }, { "epoch": 0.027648827771451927, "grad_norm": 0.8312926049915116, "learning_rate": 1.3820782253356685e-06, "loss": 0.8518, "step": 947 }, { "epoch": 0.027678023999299292, "grad_norm": 0.8605101747233023, "learning_rate": 1.38353765323993e-06, "loss": 0.8527, "step": 948 }, { "epoch": 0.027707220227146653, "grad_norm": 0.7618264621747121, "learning_rate": 1.3849970811441915e-06, "loss": 0.72, "step": 949 }, { "epoch": 0.027736416454994015, "grad_norm": 0.9181708470853313, "learning_rate": 1.3864565090484531e-06, "loss": 0.9255, "step": 950 }, { "epoch": 0.027765612682841376, "grad_norm": 0.8463126604156616, "learning_rate": 1.3879159369527146e-06, "loss": 0.7625, "step": 951 }, { "epoch": 0.02779480891068874, "grad_norm": 0.9002602198282943, "learning_rate": 1.389375364856976e-06, "loss": 0.941, "step": 952 }, { "epoch": 0.0278240051385361, "grad_norm": 0.8381085924026181, "learning_rate": 1.390834792761238e-06, "loss": 0.893, "step": 953 }, { "epoch": 0.027853201366383463, "grad_norm": 1.1606860151401277, "learning_rate": 1.3922942206654994e-06, "loss": 0.9997, "step": 954 }, { "epoch": 0.027882397594230824, "grad_norm": 0.8510545876691423, "learning_rate": 1.3937536485697608e-06, "loss": 0.9164, "step": 955 }, { "epoch": 0.02791159382207819, "grad_norm": 0.8236166938853148, "learning_rate": 1.3952130764740223e-06, "loss": 0.8234, "step": 956 }, { "epoch": 0.02794079004992555, "grad_norm": 0.8541440245881471, "learning_rate": 1.396672504378284e-06, "loss": 0.8107, "step": 957 }, { "epoch": 0.02796998627777291, "grad_norm": 0.8156768850428006, "learning_rate": 1.3981319322825454e-06, "loss": 0.8296, "step": 958 }, { "epoch": 0.027999182505620272, "grad_norm": 0.8646940460573783, "learning_rate": 1.3995913601868069e-06, "loss": 0.7958, "step": 959 }, { "epoch": 0.028028378733467637, "grad_norm": 0.9404687495891165, "learning_rate": 1.4010507880910683e-06, "loss": 0.8113, "step": 960 }, { "epoch": 0.028057574961314998, "grad_norm": 0.7878331007989994, "learning_rate": 1.40251021599533e-06, "loss": 0.8076, "step": 961 }, { "epoch": 0.02808677118916236, "grad_norm": 0.8332251370687966, "learning_rate": 1.4039696438995915e-06, "loss": 0.8674, "step": 962 }, { "epoch": 0.028115967417009724, "grad_norm": 0.9581077260966252, "learning_rate": 1.405429071803853e-06, "loss": 0.8188, "step": 963 }, { "epoch": 0.028145163644857085, "grad_norm": 0.8800384222875135, "learning_rate": 1.4068884997081144e-06, "loss": 0.8667, "step": 964 }, { "epoch": 0.028174359872704446, "grad_norm": 0.8225923043888254, "learning_rate": 1.408347927612376e-06, "loss": 0.8186, "step": 965 }, { "epoch": 0.028203556100551808, "grad_norm": 0.9208940931345992, "learning_rate": 1.4098073555166375e-06, "loss": 0.9323, "step": 966 }, { "epoch": 0.028232752328399172, "grad_norm": 0.8142250333569762, "learning_rate": 1.411266783420899e-06, "loss": 0.843, "step": 967 }, { "epoch": 0.028261948556246533, "grad_norm": 1.0934357921429187, "learning_rate": 1.4127262113251604e-06, "loss": 0.8936, "step": 968 }, { "epoch": 0.028291144784093895, "grad_norm": 0.9782423842807247, "learning_rate": 1.4141856392294223e-06, "loss": 0.861, "step": 969 }, { "epoch": 0.028320341011941256, "grad_norm": 0.8703619680653046, "learning_rate": 1.4156450671336838e-06, "loss": 0.8504, "step": 970 }, { "epoch": 0.02834953723978862, "grad_norm": 0.8097975551434163, "learning_rate": 1.4171044950379454e-06, "loss": 0.7971, "step": 971 }, { "epoch": 0.028378733467635982, "grad_norm": 0.8500520807157949, "learning_rate": 1.418563922942207e-06, "loss": 0.9256, "step": 972 }, { "epoch": 0.028407929695483343, "grad_norm": 0.9008980421031929, "learning_rate": 1.4200233508464684e-06, "loss": 0.914, "step": 973 }, { "epoch": 0.028437125923330704, "grad_norm": 0.9558998760711884, "learning_rate": 1.4214827787507298e-06, "loss": 0.8819, "step": 974 }, { "epoch": 0.02846632215117807, "grad_norm": 0.8186185789937709, "learning_rate": 1.4229422066549915e-06, "loss": 0.8737, "step": 975 }, { "epoch": 0.02849551837902543, "grad_norm": 0.8286115759327033, "learning_rate": 1.424401634559253e-06, "loss": 0.782, "step": 976 }, { "epoch": 0.02852471460687279, "grad_norm": 1.1223525123785167, "learning_rate": 1.4258610624635144e-06, "loss": 0.8768, "step": 977 }, { "epoch": 0.028553910834720152, "grad_norm": 0.8648044151830997, "learning_rate": 1.4273204903677759e-06, "loss": 0.8388, "step": 978 }, { "epoch": 0.028583107062567517, "grad_norm": 1.1214396383989969, "learning_rate": 1.4287799182720375e-06, "loss": 0.9855, "step": 979 }, { "epoch": 0.02861230329041488, "grad_norm": 0.8166875405903132, "learning_rate": 1.430239346176299e-06, "loss": 0.7532, "step": 980 }, { "epoch": 0.02864149951826224, "grad_norm": 0.8339358245928736, "learning_rate": 1.4316987740805604e-06, "loss": 0.8381, "step": 981 }, { "epoch": 0.028670695746109604, "grad_norm": 0.9413607507707864, "learning_rate": 1.433158201984822e-06, "loss": 0.9255, "step": 982 }, { "epoch": 0.028699891973956965, "grad_norm": 1.3228659108164476, "learning_rate": 1.4346176298890836e-06, "loss": 0.8194, "step": 983 }, { "epoch": 0.028729088201804327, "grad_norm": 0.8717627631939536, "learning_rate": 1.436077057793345e-06, "loss": 0.7941, "step": 984 }, { "epoch": 0.028758284429651688, "grad_norm": 0.8407236067587945, "learning_rate": 1.4375364856976067e-06, "loss": 0.8653, "step": 985 }, { "epoch": 0.028787480657499052, "grad_norm": 0.8266988393471115, "learning_rate": 1.4389959136018684e-06, "loss": 0.8404, "step": 986 }, { "epoch": 0.028816676885346414, "grad_norm": 0.7978441939907701, "learning_rate": 1.4404553415061298e-06, "loss": 0.746, "step": 987 }, { "epoch": 0.028845873113193775, "grad_norm": 0.9109776636688826, "learning_rate": 1.4419147694103913e-06, "loss": 0.9093, "step": 988 }, { "epoch": 0.028875069341041136, "grad_norm": 0.7966005814801838, "learning_rate": 1.4433741973146527e-06, "loss": 0.7461, "step": 989 }, { "epoch": 0.0289042655688885, "grad_norm": 0.8449792484026653, "learning_rate": 1.4448336252189144e-06, "loss": 0.8529, "step": 990 }, { "epoch": 0.028933461796735862, "grad_norm": 0.8030783405356409, "learning_rate": 1.4462930531231759e-06, "loss": 0.7781, "step": 991 }, { "epoch": 0.028962658024583223, "grad_norm": 0.8122768012298706, "learning_rate": 1.4477524810274373e-06, "loss": 0.818, "step": 992 }, { "epoch": 0.028991854252430584, "grad_norm": 0.8464649870032945, "learning_rate": 1.4492119089316988e-06, "loss": 0.845, "step": 993 }, { "epoch": 0.02902105048027795, "grad_norm": 1.0540585359212367, "learning_rate": 1.4506713368359605e-06, "loss": 0.7878, "step": 994 }, { "epoch": 0.02905024670812531, "grad_norm": 0.8565444560590912, "learning_rate": 1.452130764740222e-06, "loss": 0.8557, "step": 995 }, { "epoch": 0.02907944293597267, "grad_norm": 1.1423418828716523, "learning_rate": 1.4535901926444834e-06, "loss": 0.8967, "step": 996 }, { "epoch": 0.029108639163820036, "grad_norm": 0.8501742438466033, "learning_rate": 1.4550496205487448e-06, "loss": 0.8625, "step": 997 }, { "epoch": 0.029137835391667397, "grad_norm": 0.9127303600335573, "learning_rate": 1.4565090484530065e-06, "loss": 0.9224, "step": 998 }, { "epoch": 0.02916703161951476, "grad_norm": 0.9545490706636917, "learning_rate": 1.457968476357268e-06, "loss": 0.8521, "step": 999 }, { "epoch": 0.02919622784736212, "grad_norm": 0.7771613726529936, "learning_rate": 1.4594279042615294e-06, "loss": 0.7046, "step": 1000 }, { "epoch": 0.029225424075209484, "grad_norm": 0.8647188309351156, "learning_rate": 1.4608873321657913e-06, "loss": 0.6939, "step": 1001 }, { "epoch": 0.029254620303056846, "grad_norm": 2.5615649466158987, "learning_rate": 1.4623467600700528e-06, "loss": 0.8492, "step": 1002 }, { "epoch": 0.029283816530904207, "grad_norm": 0.8044744868476505, "learning_rate": 1.4638061879743142e-06, "loss": 0.7873, "step": 1003 }, { "epoch": 0.029313012758751568, "grad_norm": 0.8541042382620035, "learning_rate": 1.4652656158785759e-06, "loss": 0.855, "step": 1004 }, { "epoch": 0.029342208986598933, "grad_norm": 0.8083479199716805, "learning_rate": 1.4667250437828373e-06, "loss": 0.7177, "step": 1005 }, { "epoch": 0.029371405214446294, "grad_norm": 0.7987101133988754, "learning_rate": 1.4681844716870988e-06, "loss": 0.8021, "step": 1006 }, { "epoch": 0.029400601442293655, "grad_norm": 0.9239926193944661, "learning_rate": 1.4696438995913603e-06, "loss": 0.7883, "step": 1007 }, { "epoch": 0.029429797670141016, "grad_norm": 1.1129855603720018, "learning_rate": 1.471103327495622e-06, "loss": 0.8915, "step": 1008 }, { "epoch": 0.02945899389798838, "grad_norm": 0.7594505288410772, "learning_rate": 1.4725627553998834e-06, "loss": 0.704, "step": 1009 }, { "epoch": 0.029488190125835742, "grad_norm": 1.032035623274471, "learning_rate": 1.4740221833041449e-06, "loss": 0.8835, "step": 1010 }, { "epoch": 0.029517386353683103, "grad_norm": 0.8617025635086499, "learning_rate": 1.4754816112084063e-06, "loss": 0.7917, "step": 1011 }, { "epoch": 0.029546582581530468, "grad_norm": 0.9370262193348865, "learning_rate": 1.476941039112668e-06, "loss": 0.9746, "step": 1012 }, { "epoch": 0.02957577880937783, "grad_norm": 0.8835713025759736, "learning_rate": 1.4784004670169294e-06, "loss": 0.837, "step": 1013 }, { "epoch": 0.02960497503722519, "grad_norm": 1.0469847294882202, "learning_rate": 1.479859894921191e-06, "loss": 0.9837, "step": 1014 }, { "epoch": 0.02963417126507255, "grad_norm": 0.9387211797252786, "learning_rate": 1.4813193228254524e-06, "loss": 0.8655, "step": 1015 }, { "epoch": 0.029663367492919916, "grad_norm": 0.9007811937499892, "learning_rate": 1.482778750729714e-06, "loss": 0.8244, "step": 1016 }, { "epoch": 0.029692563720767277, "grad_norm": 0.8372044474875262, "learning_rate": 1.4842381786339757e-06, "loss": 0.7096, "step": 1017 }, { "epoch": 0.02972175994861464, "grad_norm": 0.9364692620478566, "learning_rate": 1.4856976065382372e-06, "loss": 0.9054, "step": 1018 }, { "epoch": 0.029750956176462, "grad_norm": 0.8064443750385216, "learning_rate": 1.4871570344424988e-06, "loss": 0.8254, "step": 1019 }, { "epoch": 0.029780152404309364, "grad_norm": 0.8010153418845918, "learning_rate": 1.4886164623467603e-06, "loss": 0.8143, "step": 1020 }, { "epoch": 0.029809348632156726, "grad_norm": 0.7336108865318064, "learning_rate": 1.4900758902510217e-06, "loss": 0.7183, "step": 1021 }, { "epoch": 0.029838544860004087, "grad_norm": 0.9640161557785738, "learning_rate": 1.4915353181552832e-06, "loss": 0.857, "step": 1022 }, { "epoch": 0.029867741087851448, "grad_norm": 0.8352530164574334, "learning_rate": 1.4929947460595449e-06, "loss": 0.8344, "step": 1023 }, { "epoch": 0.029896937315698813, "grad_norm": 0.8730245322327131, "learning_rate": 1.4944541739638063e-06, "loss": 0.8972, "step": 1024 }, { "epoch": 0.029926133543546174, "grad_norm": 0.8053431003602801, "learning_rate": 1.4959136018680678e-06, "loss": 0.7745, "step": 1025 }, { "epoch": 0.029955329771393535, "grad_norm": 0.8735884123624365, "learning_rate": 1.4973730297723292e-06, "loss": 0.8311, "step": 1026 }, { "epoch": 0.029984525999240896, "grad_norm": 1.1150804648264359, "learning_rate": 1.498832457676591e-06, "loss": 0.9372, "step": 1027 }, { "epoch": 0.03001372222708826, "grad_norm": 0.9115941001587754, "learning_rate": 1.5002918855808524e-06, "loss": 0.7948, "step": 1028 }, { "epoch": 0.030042918454935622, "grad_norm": 0.842870586785331, "learning_rate": 1.5017513134851138e-06, "loss": 0.7565, "step": 1029 }, { "epoch": 0.030072114682782983, "grad_norm": 0.8529511571038865, "learning_rate": 1.5032107413893753e-06, "loss": 0.8769, "step": 1030 }, { "epoch": 0.030101310910630348, "grad_norm": 0.8131262317945124, "learning_rate": 1.504670169293637e-06, "loss": 0.7445, "step": 1031 }, { "epoch": 0.03013050713847771, "grad_norm": 0.8832940823908858, "learning_rate": 1.5061295971978984e-06, "loss": 0.8088, "step": 1032 }, { "epoch": 0.03015970336632507, "grad_norm": 0.9201336440738953, "learning_rate": 1.5075890251021603e-06, "loss": 0.98, "step": 1033 }, { "epoch": 0.03018889959417243, "grad_norm": 0.7831619567817265, "learning_rate": 1.5090484530064218e-06, "loss": 0.755, "step": 1034 }, { "epoch": 0.030218095822019796, "grad_norm": 0.8416461777759962, "learning_rate": 1.5105078809106832e-06, "loss": 0.8532, "step": 1035 }, { "epoch": 0.030247292049867158, "grad_norm": 0.9244989606517859, "learning_rate": 1.5119673088149447e-06, "loss": 0.7942, "step": 1036 }, { "epoch": 0.03027648827771452, "grad_norm": 0.8744383090894322, "learning_rate": 1.5134267367192063e-06, "loss": 0.8722, "step": 1037 }, { "epoch": 0.03030568450556188, "grad_norm": 0.8714083999399076, "learning_rate": 1.5148861646234678e-06, "loss": 0.8323, "step": 1038 }, { "epoch": 0.030334880733409245, "grad_norm": 0.7686122759917685, "learning_rate": 1.5163455925277293e-06, "loss": 0.7419, "step": 1039 }, { "epoch": 0.030364076961256606, "grad_norm": 0.8563840087616248, "learning_rate": 1.5178050204319907e-06, "loss": 0.8448, "step": 1040 }, { "epoch": 0.030393273189103967, "grad_norm": 0.9143571857325814, "learning_rate": 1.5192644483362524e-06, "loss": 0.8295, "step": 1041 }, { "epoch": 0.030422469416951328, "grad_norm": 1.0436782710068542, "learning_rate": 1.5207238762405138e-06, "loss": 0.9131, "step": 1042 }, { "epoch": 0.030451665644798693, "grad_norm": 0.9532335247949969, "learning_rate": 1.5221833041447753e-06, "loss": 0.8101, "step": 1043 }, { "epoch": 0.030480861872646054, "grad_norm": 0.9301031274426359, "learning_rate": 1.5236427320490368e-06, "loss": 0.9335, "step": 1044 }, { "epoch": 0.030510058100493415, "grad_norm": 0.9416509949874902, "learning_rate": 1.5251021599532984e-06, "loss": 0.9011, "step": 1045 }, { "epoch": 0.03053925432834078, "grad_norm": 0.7984793052202712, "learning_rate": 1.5265615878575599e-06, "loss": 0.7364, "step": 1046 }, { "epoch": 0.03056845055618814, "grad_norm": 0.8037603661817584, "learning_rate": 1.5280210157618213e-06, "loss": 0.7688, "step": 1047 }, { "epoch": 0.030597646784035502, "grad_norm": 0.8306825953946474, "learning_rate": 1.5294804436660828e-06, "loss": 0.8702, "step": 1048 }, { "epoch": 0.030626843011882864, "grad_norm": 0.8493617609362717, "learning_rate": 1.5309398715703447e-06, "loss": 0.885, "step": 1049 }, { "epoch": 0.030656039239730228, "grad_norm": 0.9368966764767721, "learning_rate": 1.5323992994746061e-06, "loss": 0.8087, "step": 1050 }, { "epoch": 0.03068523546757759, "grad_norm": 0.7565742514604875, "learning_rate": 1.5338587273788676e-06, "loss": 0.7012, "step": 1051 }, { "epoch": 0.03071443169542495, "grad_norm": 1.1998333784615283, "learning_rate": 1.5353181552831293e-06, "loss": 0.8967, "step": 1052 }, { "epoch": 0.030743627923272312, "grad_norm": 0.8628465333571345, "learning_rate": 1.5367775831873907e-06, "loss": 0.8861, "step": 1053 }, { "epoch": 0.030772824151119677, "grad_norm": 0.8658547377634109, "learning_rate": 1.5382370110916522e-06, "loss": 0.9093, "step": 1054 }, { "epoch": 0.030802020378967038, "grad_norm": 0.988329997054124, "learning_rate": 1.5396964389959137e-06, "loss": 0.9238, "step": 1055 }, { "epoch": 0.0308312166068144, "grad_norm": 0.8319518614056959, "learning_rate": 1.5411558669001753e-06, "loss": 0.8402, "step": 1056 }, { "epoch": 0.03086041283466176, "grad_norm": 0.7742976683905711, "learning_rate": 1.5426152948044368e-06, "loss": 0.7611, "step": 1057 }, { "epoch": 0.030889609062509125, "grad_norm": 0.7862609604713673, "learning_rate": 1.5440747227086982e-06, "loss": 0.7084, "step": 1058 }, { "epoch": 0.030918805290356486, "grad_norm": 0.8745031209534118, "learning_rate": 1.54553415061296e-06, "loss": 0.8775, "step": 1059 }, { "epoch": 0.030948001518203847, "grad_norm": 1.1282462975723642, "learning_rate": 1.5469935785172214e-06, "loss": 0.7835, "step": 1060 }, { "epoch": 0.030977197746051212, "grad_norm": 0.8533574296375219, "learning_rate": 1.5484530064214828e-06, "loss": 0.7289, "step": 1061 }, { "epoch": 0.031006393973898573, "grad_norm": 0.8293227827987778, "learning_rate": 1.5499124343257443e-06, "loss": 0.7716, "step": 1062 }, { "epoch": 0.031035590201745934, "grad_norm": 0.7862532177035395, "learning_rate": 1.551371862230006e-06, "loss": 0.7312, "step": 1063 }, { "epoch": 0.031064786429593295, "grad_norm": 0.8314232743926002, "learning_rate": 1.5528312901342674e-06, "loss": 0.9081, "step": 1064 }, { "epoch": 0.03109398265744066, "grad_norm": 0.8315880317585617, "learning_rate": 1.554290718038529e-06, "loss": 0.7892, "step": 1065 }, { "epoch": 0.03112317888528802, "grad_norm": 0.7864500643219494, "learning_rate": 1.5557501459427908e-06, "loss": 0.7502, "step": 1066 }, { "epoch": 0.031152375113135383, "grad_norm": 0.9363273697875735, "learning_rate": 1.5572095738470522e-06, "loss": 0.8663, "step": 1067 }, { "epoch": 0.031181571340982744, "grad_norm": 0.8149899583125662, "learning_rate": 1.5586690017513137e-06, "loss": 0.8414, "step": 1068 }, { "epoch": 0.03121076756883011, "grad_norm": 0.8404609504000013, "learning_rate": 1.5601284296555751e-06, "loss": 0.8889, "step": 1069 }, { "epoch": 0.03123996379667747, "grad_norm": 0.8718193251722478, "learning_rate": 1.5615878575598368e-06, "loss": 0.8688, "step": 1070 }, { "epoch": 0.03126916002452483, "grad_norm": 1.0168019736513205, "learning_rate": 1.5630472854640983e-06, "loss": 0.7673, "step": 1071 }, { "epoch": 0.03129835625237219, "grad_norm": 0.9153028969797575, "learning_rate": 1.5645067133683597e-06, "loss": 0.7801, "step": 1072 }, { "epoch": 0.03132755248021955, "grad_norm": 1.0912131057511092, "learning_rate": 1.5659661412726212e-06, "loss": 0.8851, "step": 1073 }, { "epoch": 0.031356748708066914, "grad_norm": 0.8492619474095038, "learning_rate": 1.5674255691768828e-06, "loss": 0.8219, "step": 1074 }, { "epoch": 0.03138594493591428, "grad_norm": 0.8600170025015661, "learning_rate": 1.5688849970811443e-06, "loss": 0.8302, "step": 1075 }, { "epoch": 0.031415141163761644, "grad_norm": 0.7961570961879286, "learning_rate": 1.5703444249854058e-06, "loss": 0.7783, "step": 1076 }, { "epoch": 0.031444337391609005, "grad_norm": 0.9296479555388117, "learning_rate": 1.5718038528896672e-06, "loss": 0.9524, "step": 1077 }, { "epoch": 0.031473533619456366, "grad_norm": 0.7745223664061796, "learning_rate": 1.5732632807939289e-06, "loss": 0.7083, "step": 1078 }, { "epoch": 0.03150272984730373, "grad_norm": 0.9618860959260871, "learning_rate": 1.5747227086981903e-06, "loss": 0.8687, "step": 1079 }, { "epoch": 0.03153192607515109, "grad_norm": 0.8213557018229055, "learning_rate": 1.5761821366024518e-06, "loss": 0.771, "step": 1080 }, { "epoch": 0.03156112230299845, "grad_norm": 0.9303092942747037, "learning_rate": 1.5776415645067137e-06, "loss": 0.8467, "step": 1081 }, { "epoch": 0.03159031853084582, "grad_norm": 0.9488277302807397, "learning_rate": 1.5791009924109751e-06, "loss": 0.8409, "step": 1082 }, { "epoch": 0.03161951475869318, "grad_norm": 0.8518352847395008, "learning_rate": 1.5805604203152366e-06, "loss": 0.7855, "step": 1083 }, { "epoch": 0.03164871098654054, "grad_norm": 1.1044676288618196, "learning_rate": 1.5820198482194983e-06, "loss": 0.8947, "step": 1084 }, { "epoch": 0.0316779072143879, "grad_norm": 0.903592155566432, "learning_rate": 1.5834792761237597e-06, "loss": 0.7815, "step": 1085 }, { "epoch": 0.03170710344223526, "grad_norm": 0.8531565070494546, "learning_rate": 1.5849387040280212e-06, "loss": 0.7841, "step": 1086 }, { "epoch": 0.031736299670082624, "grad_norm": 0.8464256603883766, "learning_rate": 1.5863981319322826e-06, "loss": 0.8565, "step": 1087 }, { "epoch": 0.031765495897929985, "grad_norm": 0.8137151304642083, "learning_rate": 1.5878575598365443e-06, "loss": 0.8196, "step": 1088 }, { "epoch": 0.031794692125777346, "grad_norm": 0.7820283866177805, "learning_rate": 1.5893169877408058e-06, "loss": 0.8246, "step": 1089 }, { "epoch": 0.031823888353624714, "grad_norm": 0.881057411905999, "learning_rate": 1.5907764156450672e-06, "loss": 0.8736, "step": 1090 }, { "epoch": 0.031853084581472076, "grad_norm": 0.886564298335554, "learning_rate": 1.5922358435493287e-06, "loss": 0.8023, "step": 1091 }, { "epoch": 0.03188228080931944, "grad_norm": 0.8305525381861364, "learning_rate": 1.5936952714535904e-06, "loss": 0.8382, "step": 1092 }, { "epoch": 0.0319114770371668, "grad_norm": 0.847074814944864, "learning_rate": 1.5951546993578518e-06, "loss": 0.8304, "step": 1093 }, { "epoch": 0.03194067326501416, "grad_norm": 0.8230976882941804, "learning_rate": 1.5966141272621133e-06, "loss": 0.7819, "step": 1094 }, { "epoch": 0.03196986949286152, "grad_norm": 0.9317898860652862, "learning_rate": 1.5980735551663747e-06, "loss": 0.838, "step": 1095 }, { "epoch": 0.03199906572070888, "grad_norm": 0.8804814033626297, "learning_rate": 1.5995329830706364e-06, "loss": 0.729, "step": 1096 }, { "epoch": 0.03202826194855625, "grad_norm": 0.8671818707321857, "learning_rate": 1.600992410974898e-06, "loss": 0.7572, "step": 1097 }, { "epoch": 0.03205745817640361, "grad_norm": 0.8345556655345127, "learning_rate": 1.6024518388791595e-06, "loss": 0.8171, "step": 1098 }, { "epoch": 0.03208665440425097, "grad_norm": 0.8836604373852134, "learning_rate": 1.6039112667834212e-06, "loss": 0.8018, "step": 1099 }, { "epoch": 0.03211585063209833, "grad_norm": 1.4608804427140463, "learning_rate": 1.6053706946876827e-06, "loss": 0.8933, "step": 1100 }, { "epoch": 0.032145046859945695, "grad_norm": 0.781784712605264, "learning_rate": 1.6068301225919441e-06, "loss": 0.7549, "step": 1101 }, { "epoch": 0.032174243087793056, "grad_norm": 0.8762183491505917, "learning_rate": 1.6082895504962056e-06, "loss": 0.8309, "step": 1102 }, { "epoch": 0.03220343931564042, "grad_norm": 0.9090630297002189, "learning_rate": 1.6097489784004672e-06, "loss": 0.8087, "step": 1103 }, { "epoch": 0.03223263554348778, "grad_norm": 0.8078620195955103, "learning_rate": 1.6112084063047287e-06, "loss": 0.769, "step": 1104 }, { "epoch": 0.032261831771335146, "grad_norm": 0.9550514555028864, "learning_rate": 1.6126678342089902e-06, "loss": 0.871, "step": 1105 }, { "epoch": 0.03229102799918251, "grad_norm": 0.7918045180869986, "learning_rate": 1.6141272621132516e-06, "loss": 0.7726, "step": 1106 }, { "epoch": 0.03232022422702987, "grad_norm": 0.8795329653692749, "learning_rate": 1.6155866900175133e-06, "loss": 0.8804, "step": 1107 }, { "epoch": 0.03234942045487723, "grad_norm": 2.166646098883895, "learning_rate": 1.6170461179217748e-06, "loss": 0.8584, "step": 1108 }, { "epoch": 0.03237861668272459, "grad_norm": 0.796615501735554, "learning_rate": 1.6185055458260362e-06, "loss": 0.774, "step": 1109 }, { "epoch": 0.03240781291057195, "grad_norm": 0.863377175324778, "learning_rate": 1.6199649737302977e-06, "loss": 0.8063, "step": 1110 }, { "epoch": 0.032437009138419313, "grad_norm": 0.8447953513862048, "learning_rate": 1.6214244016345593e-06, "loss": 0.7006, "step": 1111 }, { "epoch": 0.03246620536626668, "grad_norm": 0.8486550370700896, "learning_rate": 1.6228838295388208e-06, "loss": 0.8412, "step": 1112 }, { "epoch": 0.03249540159411404, "grad_norm": 0.7742201103282609, "learning_rate": 1.6243432574430827e-06, "loss": 0.7394, "step": 1113 }, { "epoch": 0.032524597821961404, "grad_norm": 1.0560950383178191, "learning_rate": 1.6258026853473441e-06, "loss": 0.7919, "step": 1114 }, { "epoch": 0.032553794049808765, "grad_norm": 0.9405347747465522, "learning_rate": 1.6272621132516056e-06, "loss": 0.8001, "step": 1115 }, { "epoch": 0.032582990277656126, "grad_norm": 0.9222602105887723, "learning_rate": 1.628721541155867e-06, "loss": 0.8319, "step": 1116 }, { "epoch": 0.03261218650550349, "grad_norm": 0.9028139083269136, "learning_rate": 1.6301809690601287e-06, "loss": 0.82, "step": 1117 }, { "epoch": 0.03264138273335085, "grad_norm": 0.8030229501811817, "learning_rate": 1.6316403969643902e-06, "loss": 0.8052, "step": 1118 }, { "epoch": 0.03267057896119821, "grad_norm": 0.7911874567529031, "learning_rate": 1.6330998248686516e-06, "loss": 0.7239, "step": 1119 }, { "epoch": 0.03269977518904558, "grad_norm": 0.8050675106187507, "learning_rate": 1.634559252772913e-06, "loss": 0.8069, "step": 1120 }, { "epoch": 0.03272897141689294, "grad_norm": 0.849907059573934, "learning_rate": 1.6360186806771748e-06, "loss": 0.8663, "step": 1121 }, { "epoch": 0.0327581676447403, "grad_norm": 0.7891322223723325, "learning_rate": 1.6374781085814362e-06, "loss": 0.7637, "step": 1122 }, { "epoch": 0.03278736387258766, "grad_norm": 0.8710855533873493, "learning_rate": 1.6389375364856977e-06, "loss": 0.8349, "step": 1123 }, { "epoch": 0.03281656010043502, "grad_norm": 0.8984036602812557, "learning_rate": 1.6403969643899591e-06, "loss": 0.8658, "step": 1124 }, { "epoch": 0.032845756328282384, "grad_norm": 0.8010337628852575, "learning_rate": 1.6418563922942208e-06, "loss": 0.7759, "step": 1125 }, { "epoch": 0.032874952556129745, "grad_norm": 0.8342822486128935, "learning_rate": 1.6433158201984823e-06, "loss": 0.8181, "step": 1126 }, { "epoch": 0.032904148783977114, "grad_norm": 0.8403581936616826, "learning_rate": 1.6447752481027437e-06, "loss": 0.8422, "step": 1127 }, { "epoch": 0.032933345011824475, "grad_norm": 0.8494082081391924, "learning_rate": 1.6462346760070052e-06, "loss": 0.7952, "step": 1128 }, { "epoch": 0.032962541239671836, "grad_norm": 1.0132499378082047, "learning_rate": 1.647694103911267e-06, "loss": 0.8164, "step": 1129 }, { "epoch": 0.0329917374675192, "grad_norm": 0.7969117853822448, "learning_rate": 1.6491535318155285e-06, "loss": 0.7209, "step": 1130 }, { "epoch": 0.03302093369536656, "grad_norm": 0.8891699360593667, "learning_rate": 1.65061295971979e-06, "loss": 0.8084, "step": 1131 }, { "epoch": 0.03305012992321392, "grad_norm": 0.8080448573939165, "learning_rate": 1.6520723876240517e-06, "loss": 0.7815, "step": 1132 }, { "epoch": 0.03307932615106128, "grad_norm": 0.8093416683309154, "learning_rate": 1.6535318155283131e-06, "loss": 0.7009, "step": 1133 }, { "epoch": 0.03310852237890864, "grad_norm": 0.8903609305723756, "learning_rate": 1.6549912434325746e-06, "loss": 0.9007, "step": 1134 }, { "epoch": 0.03313771860675601, "grad_norm": 0.7866687147801342, "learning_rate": 1.656450671336836e-06, "loss": 0.7523, "step": 1135 }, { "epoch": 0.03316691483460337, "grad_norm": 0.7791989556595518, "learning_rate": 1.6579100992410977e-06, "loss": 0.8046, "step": 1136 }, { "epoch": 0.03319611106245073, "grad_norm": 0.8677513116936542, "learning_rate": 1.6593695271453592e-06, "loss": 0.7586, "step": 1137 }, { "epoch": 0.033225307290298094, "grad_norm": 0.8752230524401847, "learning_rate": 1.6608289550496206e-06, "loss": 0.8891, "step": 1138 }, { "epoch": 0.033254503518145455, "grad_norm": 0.886560824621893, "learning_rate": 1.662288382953882e-06, "loss": 0.9289, "step": 1139 }, { "epoch": 0.033283699745992816, "grad_norm": 0.8466134572781856, "learning_rate": 1.6637478108581437e-06, "loss": 0.7762, "step": 1140 }, { "epoch": 0.03331289597384018, "grad_norm": 0.9650224105863678, "learning_rate": 1.6652072387624052e-06, "loss": 0.9066, "step": 1141 }, { "epoch": 0.033342092201687545, "grad_norm": 0.945923529196215, "learning_rate": 1.6666666666666667e-06, "loss": 0.8254, "step": 1142 }, { "epoch": 0.03337128842953491, "grad_norm": 2.4389416664416514, "learning_rate": 1.6681260945709281e-06, "loss": 0.7842, "step": 1143 }, { "epoch": 0.03340048465738227, "grad_norm": 0.9748703504313814, "learning_rate": 1.6695855224751898e-06, "loss": 0.8454, "step": 1144 }, { "epoch": 0.03342968088522963, "grad_norm": 0.8523824476768703, "learning_rate": 1.6710449503794515e-06, "loss": 0.7441, "step": 1145 }, { "epoch": 0.03345887711307699, "grad_norm": 0.8667058069438321, "learning_rate": 1.6725043782837131e-06, "loss": 0.9577, "step": 1146 }, { "epoch": 0.03348807334092435, "grad_norm": 0.9105630083697444, "learning_rate": 1.6739638061879746e-06, "loss": 0.8792, "step": 1147 }, { "epoch": 0.03351726956877171, "grad_norm": 0.833536816025453, "learning_rate": 1.675423234092236e-06, "loss": 0.8262, "step": 1148 }, { "epoch": 0.033546465796619074, "grad_norm": 0.8861536391902085, "learning_rate": 1.6768826619964975e-06, "loss": 0.7308, "step": 1149 }, { "epoch": 0.03357566202446644, "grad_norm": 0.8597078926811141, "learning_rate": 1.6783420899007592e-06, "loss": 0.7853, "step": 1150 }, { "epoch": 0.0336048582523138, "grad_norm": 0.8537077319301264, "learning_rate": 1.6798015178050206e-06, "loss": 0.844, "step": 1151 }, { "epoch": 0.033634054480161164, "grad_norm": 0.8379253451850389, "learning_rate": 1.681260945709282e-06, "loss": 0.7874, "step": 1152 }, { "epoch": 0.033663250708008526, "grad_norm": 0.8977164102999684, "learning_rate": 1.6827203736135435e-06, "loss": 0.7936, "step": 1153 }, { "epoch": 0.03369244693585589, "grad_norm": 0.8220176038151339, "learning_rate": 1.6841798015178052e-06, "loss": 0.7991, "step": 1154 }, { "epoch": 0.03372164316370325, "grad_norm": 0.7866729747364519, "learning_rate": 1.6856392294220667e-06, "loss": 0.7282, "step": 1155 }, { "epoch": 0.03375083939155061, "grad_norm": 0.827162320332441, "learning_rate": 1.6870986573263281e-06, "loss": 0.8957, "step": 1156 }, { "epoch": 0.03378003561939797, "grad_norm": 0.8491007628912776, "learning_rate": 1.6885580852305896e-06, "loss": 0.8821, "step": 1157 }, { "epoch": 0.03380923184724534, "grad_norm": 0.8427978533979809, "learning_rate": 1.6900175131348513e-06, "loss": 0.7951, "step": 1158 }, { "epoch": 0.0338384280750927, "grad_norm": 0.8050307572568104, "learning_rate": 1.6914769410391127e-06, "loss": 0.7681, "step": 1159 }, { "epoch": 0.03386762430294006, "grad_norm": 1.1758339618024916, "learning_rate": 1.6929363689433742e-06, "loss": 0.8919, "step": 1160 }, { "epoch": 0.03389682053078742, "grad_norm": 0.8215369076342406, "learning_rate": 1.694395796847636e-06, "loss": 0.8052, "step": 1161 }, { "epoch": 0.03392601675863478, "grad_norm": 0.8830748202552996, "learning_rate": 1.6958552247518975e-06, "loss": 0.9125, "step": 1162 }, { "epoch": 0.033955212986482144, "grad_norm": 0.8557883498572479, "learning_rate": 1.697314652656159e-06, "loss": 0.8621, "step": 1163 }, { "epoch": 0.033984409214329506, "grad_norm": 0.883931786782134, "learning_rate": 1.6987740805604204e-06, "loss": 0.9559, "step": 1164 }, { "epoch": 0.034013605442176874, "grad_norm": 0.8892381430853387, "learning_rate": 1.7002335084646821e-06, "loss": 0.8725, "step": 1165 }, { "epoch": 0.034042801670024235, "grad_norm": 0.8949399527211996, "learning_rate": 1.7016929363689436e-06, "loss": 0.8569, "step": 1166 }, { "epoch": 0.034071997897871596, "grad_norm": 0.8450906329833471, "learning_rate": 1.703152364273205e-06, "loss": 0.8212, "step": 1167 }, { "epoch": 0.03410119412571896, "grad_norm": 0.8175564412505565, "learning_rate": 1.7046117921774665e-06, "loss": 0.7778, "step": 1168 }, { "epoch": 0.03413039035356632, "grad_norm": 0.8058674042675559, "learning_rate": 1.7060712200817282e-06, "loss": 0.7626, "step": 1169 }, { "epoch": 0.03415958658141368, "grad_norm": 0.7764252007557092, "learning_rate": 1.7075306479859896e-06, "loss": 0.7475, "step": 1170 }, { "epoch": 0.03418878280926104, "grad_norm": 1.0641433364997006, "learning_rate": 1.708990075890251e-06, "loss": 0.9098, "step": 1171 }, { "epoch": 0.0342179790371084, "grad_norm": 0.8844523184419764, "learning_rate": 1.7104495037945127e-06, "loss": 0.9167, "step": 1172 }, { "epoch": 0.03424717526495577, "grad_norm": 0.7702776870375629, "learning_rate": 1.7119089316987742e-06, "loss": 0.6922, "step": 1173 }, { "epoch": 0.03427637149280313, "grad_norm": 0.872680403896552, "learning_rate": 1.7133683596030357e-06, "loss": 0.8038, "step": 1174 }, { "epoch": 0.03430556772065049, "grad_norm": 0.920985488274028, "learning_rate": 1.7148277875072971e-06, "loss": 0.8247, "step": 1175 }, { "epoch": 0.034334763948497854, "grad_norm": 0.8292717723876759, "learning_rate": 1.7162872154115588e-06, "loss": 0.8334, "step": 1176 }, { "epoch": 0.034363960176345215, "grad_norm": 0.84494913413388, "learning_rate": 1.7177466433158205e-06, "loss": 0.8866, "step": 1177 }, { "epoch": 0.034393156404192576, "grad_norm": 0.7650706864671419, "learning_rate": 1.719206071220082e-06, "loss": 0.7408, "step": 1178 }, { "epoch": 0.03442235263203994, "grad_norm": 0.7988187639887816, "learning_rate": 1.7206654991243436e-06, "loss": 0.7984, "step": 1179 }, { "epoch": 0.034451548859887306, "grad_norm": 0.7995063530979526, "learning_rate": 1.722124927028605e-06, "loss": 0.7618, "step": 1180 }, { "epoch": 0.03448074508773467, "grad_norm": 0.9111920733137812, "learning_rate": 1.7235843549328665e-06, "loss": 0.88, "step": 1181 }, { "epoch": 0.03450994131558203, "grad_norm": 0.8998964823730219, "learning_rate": 1.725043782837128e-06, "loss": 0.8419, "step": 1182 }, { "epoch": 0.03453913754342939, "grad_norm": 0.8184073319442915, "learning_rate": 1.7265032107413896e-06, "loss": 0.8478, "step": 1183 }, { "epoch": 0.03456833377127675, "grad_norm": 0.8352242377464971, "learning_rate": 1.727962638645651e-06, "loss": 0.8149, "step": 1184 }, { "epoch": 0.03459752999912411, "grad_norm": 0.8814832292675685, "learning_rate": 1.7294220665499125e-06, "loss": 0.8311, "step": 1185 }, { "epoch": 0.03462672622697147, "grad_norm": 0.9714938928989502, "learning_rate": 1.730881494454174e-06, "loss": 0.8276, "step": 1186 }, { "epoch": 0.034655922454818834, "grad_norm": 0.7829617839904216, "learning_rate": 1.7323409223584357e-06, "loss": 0.7429, "step": 1187 }, { "epoch": 0.0346851186826662, "grad_norm": 1.0617674914407573, "learning_rate": 1.7338003502626971e-06, "loss": 0.8697, "step": 1188 }, { "epoch": 0.03471431491051356, "grad_norm": 0.8543820852855648, "learning_rate": 1.7352597781669586e-06, "loss": 0.8641, "step": 1189 }, { "epoch": 0.034743511138360925, "grad_norm": 0.860876403427801, "learning_rate": 1.73671920607122e-06, "loss": 0.9178, "step": 1190 }, { "epoch": 0.034772707366208286, "grad_norm": 0.7967959169232256, "learning_rate": 1.7381786339754817e-06, "loss": 0.6901, "step": 1191 }, { "epoch": 0.03480190359405565, "grad_norm": 0.9143075312439567, "learning_rate": 1.7396380618797432e-06, "loss": 0.8863, "step": 1192 }, { "epoch": 0.03483109982190301, "grad_norm": 0.8866668387057016, "learning_rate": 1.7410974897840048e-06, "loss": 0.8714, "step": 1193 }, { "epoch": 0.03486029604975037, "grad_norm": 0.8214834411112754, "learning_rate": 1.7425569176882665e-06, "loss": 0.7654, "step": 1194 }, { "epoch": 0.03488949227759774, "grad_norm": 0.9221484801168766, "learning_rate": 1.744016345592528e-06, "loss": 0.826, "step": 1195 }, { "epoch": 0.0349186885054451, "grad_norm": 0.8845206950364296, "learning_rate": 1.7454757734967894e-06, "loss": 1.0221, "step": 1196 }, { "epoch": 0.03494788473329246, "grad_norm": 0.8885669130152022, "learning_rate": 1.7469352014010509e-06, "loss": 0.7914, "step": 1197 }, { "epoch": 0.03497708096113982, "grad_norm": 0.8303660872145675, "learning_rate": 1.7483946293053126e-06, "loss": 0.7717, "step": 1198 }, { "epoch": 0.03500627718898718, "grad_norm": 1.0306973820020835, "learning_rate": 1.749854057209574e-06, "loss": 0.7965, "step": 1199 }, { "epoch": 0.035035473416834544, "grad_norm": 0.8297610813763926, "learning_rate": 1.7513134851138355e-06, "loss": 0.8011, "step": 1200 }, { "epoch": 0.035064669644681905, "grad_norm": 0.8598071993038191, "learning_rate": 1.7527729130180971e-06, "loss": 0.899, "step": 1201 }, { "epoch": 0.035093865872529266, "grad_norm": 0.8295274033569779, "learning_rate": 1.7542323409223586e-06, "loss": 0.7565, "step": 1202 }, { "epoch": 0.035123062100376634, "grad_norm": 0.8157697549832637, "learning_rate": 1.75569176882662e-06, "loss": 0.7154, "step": 1203 }, { "epoch": 0.035152258328223995, "grad_norm": 0.9288445447505521, "learning_rate": 1.7571511967308815e-06, "loss": 0.9013, "step": 1204 }, { "epoch": 0.035181454556071357, "grad_norm": 0.8186031315253067, "learning_rate": 1.7586106246351432e-06, "loss": 0.804, "step": 1205 }, { "epoch": 0.03521065078391872, "grad_norm": 0.8568923264231786, "learning_rate": 1.7600700525394046e-06, "loss": 0.9227, "step": 1206 }, { "epoch": 0.03523984701176608, "grad_norm": 0.7745421502182251, "learning_rate": 1.7615294804436661e-06, "loss": 0.8159, "step": 1207 }, { "epoch": 0.03526904323961344, "grad_norm": 0.8883172959645124, "learning_rate": 1.7629889083479276e-06, "loss": 0.9061, "step": 1208 }, { "epoch": 0.0352982394674608, "grad_norm": 0.9564587168790154, "learning_rate": 1.7644483362521894e-06, "loss": 0.9279, "step": 1209 }, { "epoch": 0.03532743569530817, "grad_norm": 0.7601153674637127, "learning_rate": 1.765907764156451e-06, "loss": 0.7278, "step": 1210 }, { "epoch": 0.03535663192315553, "grad_norm": 0.8002062553210263, "learning_rate": 1.7673671920607124e-06, "loss": 0.7766, "step": 1211 }, { "epoch": 0.03538582815100289, "grad_norm": 0.939589601052436, "learning_rate": 1.768826619964974e-06, "loss": 0.9085, "step": 1212 }, { "epoch": 0.03541502437885025, "grad_norm": 0.8777728729655178, "learning_rate": 1.7702860478692355e-06, "loss": 0.9008, "step": 1213 }, { "epoch": 0.035444220606697614, "grad_norm": 1.6981456159768358, "learning_rate": 1.771745475773497e-06, "loss": 0.7837, "step": 1214 }, { "epoch": 0.035473416834544975, "grad_norm": 0.8719354780398472, "learning_rate": 1.7732049036777584e-06, "loss": 0.837, "step": 1215 }, { "epoch": 0.03550261306239234, "grad_norm": 0.8162902213267959, "learning_rate": 1.77466433158202e-06, "loss": 0.8634, "step": 1216 }, { "epoch": 0.0355318092902397, "grad_norm": 0.9888315040954453, "learning_rate": 1.7761237594862815e-06, "loss": 0.8164, "step": 1217 }, { "epoch": 0.035561005518087066, "grad_norm": 1.0438033570453134, "learning_rate": 1.777583187390543e-06, "loss": 0.7576, "step": 1218 }, { "epoch": 0.03559020174593443, "grad_norm": 0.931360963081222, "learning_rate": 1.7790426152948045e-06, "loss": 0.8002, "step": 1219 }, { "epoch": 0.03561939797378179, "grad_norm": 1.008987840122939, "learning_rate": 1.7805020431990661e-06, "loss": 0.8828, "step": 1220 }, { "epoch": 0.03564859420162915, "grad_norm": 0.9702664713975407, "learning_rate": 1.7819614711033276e-06, "loss": 0.7413, "step": 1221 }, { "epoch": 0.03567779042947651, "grad_norm": 0.8101073275908351, "learning_rate": 1.783420899007589e-06, "loss": 0.76, "step": 1222 }, { "epoch": 0.03570698665732387, "grad_norm": 0.8972105127479035, "learning_rate": 1.7848803269118505e-06, "loss": 0.8609, "step": 1223 }, { "epoch": 0.03573618288517123, "grad_norm": 0.8070880787268047, "learning_rate": 1.7863397548161122e-06, "loss": 0.7376, "step": 1224 }, { "epoch": 0.0357653791130186, "grad_norm": 0.7530903018305831, "learning_rate": 1.7877991827203738e-06, "loss": 0.745, "step": 1225 }, { "epoch": 0.03579457534086596, "grad_norm": 0.9191812941854646, "learning_rate": 1.7892586106246355e-06, "loss": 0.8042, "step": 1226 }, { "epoch": 0.035823771568713324, "grad_norm": 0.8544483498859574, "learning_rate": 1.790718038528897e-06, "loss": 0.8413, "step": 1227 }, { "epoch": 0.035852967796560685, "grad_norm": 0.9642029833683072, "learning_rate": 1.7921774664331584e-06, "loss": 0.916, "step": 1228 }, { "epoch": 0.035882164024408046, "grad_norm": 0.7977234438613593, "learning_rate": 1.7936368943374199e-06, "loss": 0.7758, "step": 1229 }, { "epoch": 0.03591136025225541, "grad_norm": 1.0981885260409723, "learning_rate": 1.7950963222416816e-06, "loss": 0.8067, "step": 1230 }, { "epoch": 0.03594055648010277, "grad_norm": 0.9345121196882403, "learning_rate": 1.796555750145943e-06, "loss": 0.8494, "step": 1231 }, { "epoch": 0.03596975270795013, "grad_norm": 0.8382465262431807, "learning_rate": 1.7980151780502045e-06, "loss": 0.8131, "step": 1232 }, { "epoch": 0.0359989489357975, "grad_norm": 0.8443427994750936, "learning_rate": 1.799474605954466e-06, "loss": 0.8388, "step": 1233 }, { "epoch": 0.03602814516364486, "grad_norm": 0.8839122264851468, "learning_rate": 1.8009340338587276e-06, "loss": 0.8543, "step": 1234 }, { "epoch": 0.03605734139149222, "grad_norm": 1.621312472476686, "learning_rate": 1.802393461762989e-06, "loss": 0.8808, "step": 1235 }, { "epoch": 0.03608653761933958, "grad_norm": 0.7713334498573389, "learning_rate": 1.8038528896672505e-06, "loss": 0.7898, "step": 1236 }, { "epoch": 0.03611573384718694, "grad_norm": 0.9444084922483724, "learning_rate": 1.805312317571512e-06, "loss": 0.8091, "step": 1237 }, { "epoch": 0.036144930075034304, "grad_norm": 0.8685850010071327, "learning_rate": 1.8067717454757736e-06, "loss": 0.7521, "step": 1238 }, { "epoch": 0.036174126302881665, "grad_norm": 0.8558304572804044, "learning_rate": 1.808231173380035e-06, "loss": 0.8789, "step": 1239 }, { "epoch": 0.03620332253072903, "grad_norm": 0.8480394182084933, "learning_rate": 1.8096906012842966e-06, "loss": 0.8386, "step": 1240 }, { "epoch": 0.036232518758576394, "grad_norm": 0.8322373363342038, "learning_rate": 1.8111500291885584e-06, "loss": 0.7739, "step": 1241 }, { "epoch": 0.036261714986423756, "grad_norm": 0.8329298697959443, "learning_rate": 1.81260945709282e-06, "loss": 0.7391, "step": 1242 }, { "epoch": 0.03629091121427112, "grad_norm": 0.8297177001436369, "learning_rate": 1.8140688849970814e-06, "loss": 0.7989, "step": 1243 }, { "epoch": 0.03632010744211848, "grad_norm": 0.7796987172332606, "learning_rate": 1.8155283129013428e-06, "loss": 0.7341, "step": 1244 }, { "epoch": 0.03634930366996584, "grad_norm": 0.9410924217256154, "learning_rate": 1.8169877408056045e-06, "loss": 0.798, "step": 1245 }, { "epoch": 0.0363784998978132, "grad_norm": 0.8913031733117125, "learning_rate": 1.818447168709866e-06, "loss": 0.8415, "step": 1246 }, { "epoch": 0.03640769612566056, "grad_norm": 0.9101807132742618, "learning_rate": 1.8199065966141274e-06, "loss": 0.8764, "step": 1247 }, { "epoch": 0.03643689235350793, "grad_norm": 0.8572429223826109, "learning_rate": 1.8213660245183889e-06, "loss": 0.8041, "step": 1248 }, { "epoch": 0.03646608858135529, "grad_norm": 0.7705154027915256, "learning_rate": 1.8228254524226505e-06, "loss": 0.7045, "step": 1249 }, { "epoch": 0.03649528480920265, "grad_norm": 0.9777606594131624, "learning_rate": 1.824284880326912e-06, "loss": 0.7403, "step": 1250 }, { "epoch": 0.03652448103705001, "grad_norm": 0.8209037404829358, "learning_rate": 1.8257443082311734e-06, "loss": 0.7885, "step": 1251 }, { "epoch": 0.036553677264897375, "grad_norm": 0.8322267734070912, "learning_rate": 1.827203736135435e-06, "loss": 0.827, "step": 1252 }, { "epoch": 0.036582873492744736, "grad_norm": 0.8767679190477156, "learning_rate": 1.8286631640396966e-06, "loss": 0.8311, "step": 1253 }, { "epoch": 0.0366120697205921, "grad_norm": 0.8719756511220206, "learning_rate": 1.830122591943958e-06, "loss": 0.8717, "step": 1254 }, { "epoch": 0.03664126594843946, "grad_norm": 0.8875228702439053, "learning_rate": 1.8315820198482195e-06, "loss": 0.8799, "step": 1255 }, { "epoch": 0.036670462176286826, "grad_norm": 0.8338635021563992, "learning_rate": 1.833041447752481e-06, "loss": 0.7386, "step": 1256 }, { "epoch": 0.03669965840413419, "grad_norm": 0.8668300972011932, "learning_rate": 1.8345008756567428e-06, "loss": 0.8817, "step": 1257 }, { "epoch": 0.03672885463198155, "grad_norm": 0.7809836001310267, "learning_rate": 1.8359603035610043e-06, "loss": 0.7194, "step": 1258 }, { "epoch": 0.03675805085982891, "grad_norm": 0.8311298341528142, "learning_rate": 1.837419731465266e-06, "loss": 0.8878, "step": 1259 }, { "epoch": 0.03678724708767627, "grad_norm": 0.868806621135501, "learning_rate": 1.8388791593695274e-06, "loss": 0.8454, "step": 1260 }, { "epoch": 0.03681644331552363, "grad_norm": 0.9096325423656516, "learning_rate": 1.8403385872737889e-06, "loss": 0.8123, "step": 1261 }, { "epoch": 0.036845639543370994, "grad_norm": 0.8466726318022029, "learning_rate": 1.8417980151780503e-06, "loss": 0.7589, "step": 1262 }, { "epoch": 0.03687483577121836, "grad_norm": 0.823790384483609, "learning_rate": 1.843257443082312e-06, "loss": 0.8116, "step": 1263 }, { "epoch": 0.03690403199906572, "grad_norm": 0.7553195137292099, "learning_rate": 1.8447168709865735e-06, "loss": 0.7181, "step": 1264 }, { "epoch": 0.036933228226913084, "grad_norm": 0.9017175137629921, "learning_rate": 1.846176298890835e-06, "loss": 0.8448, "step": 1265 }, { "epoch": 0.036962424454760445, "grad_norm": 0.8886252111070005, "learning_rate": 1.8476357267950964e-06, "loss": 0.7896, "step": 1266 }, { "epoch": 0.036991620682607806, "grad_norm": 0.8368476565514462, "learning_rate": 1.849095154699358e-06, "loss": 0.8469, "step": 1267 }, { "epoch": 0.03702081691045517, "grad_norm": 0.9681538942911109, "learning_rate": 1.8505545826036195e-06, "loss": 0.8735, "step": 1268 }, { "epoch": 0.03705001313830253, "grad_norm": 1.334293511947178, "learning_rate": 1.852014010507881e-06, "loss": 0.7761, "step": 1269 }, { "epoch": 0.03707920936614989, "grad_norm": 0.7749879346902419, "learning_rate": 1.8534734384121424e-06, "loss": 0.7403, "step": 1270 }, { "epoch": 0.03710840559399726, "grad_norm": 0.8864050660743612, "learning_rate": 1.854932866316404e-06, "loss": 0.889, "step": 1271 }, { "epoch": 0.03713760182184462, "grad_norm": 0.961996182806565, "learning_rate": 1.8563922942206656e-06, "loss": 0.8051, "step": 1272 }, { "epoch": 0.03716679804969198, "grad_norm": 0.7964777872460681, "learning_rate": 1.8578517221249272e-06, "loss": 0.7046, "step": 1273 }, { "epoch": 0.03719599427753934, "grad_norm": 0.9016055651563691, "learning_rate": 1.859311150029189e-06, "loss": 0.8245, "step": 1274 }, { "epoch": 0.0372251905053867, "grad_norm": 0.7469405490048818, "learning_rate": 1.8607705779334504e-06, "loss": 0.6817, "step": 1275 }, { "epoch": 0.037254386733234064, "grad_norm": 0.961969858258375, "learning_rate": 1.8622300058377118e-06, "loss": 0.8797, "step": 1276 }, { "epoch": 0.037283582961081425, "grad_norm": 0.8938054162363442, "learning_rate": 1.8636894337419733e-06, "loss": 0.8491, "step": 1277 }, { "epoch": 0.037312779188928794, "grad_norm": 0.9601682527655437, "learning_rate": 1.865148861646235e-06, "loss": 0.8263, "step": 1278 }, { "epoch": 0.037341975416776155, "grad_norm": 0.8362795393789143, "learning_rate": 1.8666082895504964e-06, "loss": 0.8173, "step": 1279 }, { "epoch": 0.037371171644623516, "grad_norm": 1.165176158078772, "learning_rate": 1.8680677174547579e-06, "loss": 0.7446, "step": 1280 }, { "epoch": 0.03740036787247088, "grad_norm": 0.8727568047230198, "learning_rate": 1.8695271453590193e-06, "loss": 0.8172, "step": 1281 }, { "epoch": 0.03742956410031824, "grad_norm": 0.8604010925522267, "learning_rate": 1.870986573263281e-06, "loss": 0.8188, "step": 1282 }, { "epoch": 0.0374587603281656, "grad_norm": 1.1852042741447333, "learning_rate": 1.8724460011675424e-06, "loss": 0.804, "step": 1283 }, { "epoch": 0.03748795655601296, "grad_norm": 0.7831597441317482, "learning_rate": 1.873905429071804e-06, "loss": 0.7503, "step": 1284 }, { "epoch": 0.03751715278386032, "grad_norm": 0.8007340431094482, "learning_rate": 1.8753648569760654e-06, "loss": 0.7655, "step": 1285 }, { "epoch": 0.03754634901170769, "grad_norm": 0.8218609653519615, "learning_rate": 1.876824284880327e-06, "loss": 0.8422, "step": 1286 }, { "epoch": 0.03757554523955505, "grad_norm": 1.009910376916405, "learning_rate": 1.8782837127845885e-06, "loss": 0.7572, "step": 1287 }, { "epoch": 0.03760474146740241, "grad_norm": 0.9514621537775231, "learning_rate": 1.87974314068885e-06, "loss": 0.793, "step": 1288 }, { "epoch": 0.037633937695249774, "grad_norm": 0.8220145603129387, "learning_rate": 1.8812025685931118e-06, "loss": 0.7508, "step": 1289 }, { "epoch": 0.037663133923097135, "grad_norm": 0.8319365306854659, "learning_rate": 1.8826619964973733e-06, "loss": 0.8135, "step": 1290 }, { "epoch": 0.037692330150944496, "grad_norm": 0.8350791693099048, "learning_rate": 1.8841214244016347e-06, "loss": 0.7436, "step": 1291 }, { "epoch": 0.03772152637879186, "grad_norm": 0.8256216562004466, "learning_rate": 1.8855808523058964e-06, "loss": 0.8284, "step": 1292 }, { "epoch": 0.037750722606639225, "grad_norm": 1.6268653687095405, "learning_rate": 1.8870402802101579e-06, "loss": 0.8858, "step": 1293 }, { "epoch": 0.03777991883448659, "grad_norm": 0.8048394974827421, "learning_rate": 1.8884997081144193e-06, "loss": 0.726, "step": 1294 }, { "epoch": 0.03780911506233395, "grad_norm": 0.8382767661258477, "learning_rate": 1.8899591360186808e-06, "loss": 0.7993, "step": 1295 }, { "epoch": 0.03783831129018131, "grad_norm": 0.8031428118240246, "learning_rate": 1.8914185639229425e-06, "loss": 0.7178, "step": 1296 }, { "epoch": 0.03786750751802867, "grad_norm": 0.8172468027918505, "learning_rate": 1.892877991827204e-06, "loss": 0.8412, "step": 1297 }, { "epoch": 0.03789670374587603, "grad_norm": 0.7678200025950711, "learning_rate": 1.8943374197314654e-06, "loss": 0.778, "step": 1298 }, { "epoch": 0.03792589997372339, "grad_norm": 0.8080663481522887, "learning_rate": 1.8957968476357268e-06, "loss": 0.7275, "step": 1299 }, { "epoch": 0.037955096201570754, "grad_norm": 0.815071326796938, "learning_rate": 1.8972562755399885e-06, "loss": 0.7066, "step": 1300 }, { "epoch": 0.03798429242941812, "grad_norm": 0.947759311145556, "learning_rate": 1.89871570344425e-06, "loss": 0.8588, "step": 1301 }, { "epoch": 0.03801348865726548, "grad_norm": 0.8519527355353729, "learning_rate": 1.9001751313485114e-06, "loss": 0.8701, "step": 1302 }, { "epoch": 0.038042684885112844, "grad_norm": 0.8585837271645913, "learning_rate": 1.9016345592527729e-06, "loss": 0.7978, "step": 1303 }, { "epoch": 0.038071881112960206, "grad_norm": 0.9865964961639064, "learning_rate": 1.9030939871570345e-06, "loss": 0.7023, "step": 1304 }, { "epoch": 0.03810107734080757, "grad_norm": 0.8622243103350985, "learning_rate": 1.9045534150612962e-06, "loss": 0.8614, "step": 1305 }, { "epoch": 0.03813027356865493, "grad_norm": 0.9013299319257688, "learning_rate": 1.9060128429655577e-06, "loss": 0.8603, "step": 1306 }, { "epoch": 0.03815946979650229, "grad_norm": 0.7991808169742141, "learning_rate": 1.9074722708698193e-06, "loss": 0.709, "step": 1307 }, { "epoch": 0.03818866602434966, "grad_norm": 0.7647123092929937, "learning_rate": 1.9089316987740806e-06, "loss": 0.7773, "step": 1308 }, { "epoch": 0.03821786225219702, "grad_norm": 0.8577742098654488, "learning_rate": 1.9103911266783423e-06, "loss": 0.8341, "step": 1309 }, { "epoch": 0.03824705848004438, "grad_norm": 0.8653232986523937, "learning_rate": 1.911850554582604e-06, "loss": 0.7956, "step": 1310 }, { "epoch": 0.03827625470789174, "grad_norm": 1.6521216855113523, "learning_rate": 1.913309982486865e-06, "loss": 0.8516, "step": 1311 }, { "epoch": 0.0383054509357391, "grad_norm": 0.82874786166145, "learning_rate": 1.914769410391127e-06, "loss": 0.7236, "step": 1312 }, { "epoch": 0.03833464716358646, "grad_norm": 1.14672690111913, "learning_rate": 1.9162288382953885e-06, "loss": 0.8505, "step": 1313 }, { "epoch": 0.038363843391433824, "grad_norm": 0.8348721148303158, "learning_rate": 1.9176882661996498e-06, "loss": 0.7394, "step": 1314 }, { "epoch": 0.038393039619281186, "grad_norm": 0.8699605227052971, "learning_rate": 1.9191476941039114e-06, "loss": 0.8184, "step": 1315 }, { "epoch": 0.038422235847128554, "grad_norm": 0.8188459992954971, "learning_rate": 1.920607122008173e-06, "loss": 0.8129, "step": 1316 }, { "epoch": 0.038451432074975915, "grad_norm": 0.8890741703239018, "learning_rate": 1.9220665499124344e-06, "loss": 0.9037, "step": 1317 }, { "epoch": 0.038480628302823276, "grad_norm": 0.8862676194072491, "learning_rate": 1.923525977816696e-06, "loss": 0.737, "step": 1318 }, { "epoch": 0.03850982453067064, "grad_norm": 0.7805770525514435, "learning_rate": 1.9249854057209573e-06, "loss": 0.7429, "step": 1319 }, { "epoch": 0.038539020758518, "grad_norm": 0.8286480694595988, "learning_rate": 1.926444833625219e-06, "loss": 0.8171, "step": 1320 }, { "epoch": 0.03856821698636536, "grad_norm": 1.0105839790669695, "learning_rate": 1.9279042615294806e-06, "loss": 0.8125, "step": 1321 }, { "epoch": 0.03859741321421272, "grad_norm": 0.8669279723844443, "learning_rate": 1.9293636894337423e-06, "loss": 0.7688, "step": 1322 }, { "epoch": 0.03862660944206009, "grad_norm": 0.8532444626945924, "learning_rate": 1.930823117338004e-06, "loss": 0.7935, "step": 1323 }, { "epoch": 0.03865580566990745, "grad_norm": 0.8840359229350163, "learning_rate": 1.932282545242265e-06, "loss": 0.8306, "step": 1324 }, { "epoch": 0.03868500189775481, "grad_norm": 0.8094710596650944, "learning_rate": 1.933741973146527e-06, "loss": 0.8399, "step": 1325 }, { "epoch": 0.03871419812560217, "grad_norm": 0.952217574367986, "learning_rate": 1.935201401050788e-06, "loss": 0.8737, "step": 1326 }, { "epoch": 0.038743394353449534, "grad_norm": 0.875387420069794, "learning_rate": 1.9366608289550498e-06, "loss": 0.8649, "step": 1327 }, { "epoch": 0.038772590581296895, "grad_norm": 0.8728035659360758, "learning_rate": 1.9381202568593115e-06, "loss": 0.8744, "step": 1328 }, { "epoch": 0.038801786809144256, "grad_norm": 0.8505841045952808, "learning_rate": 1.9395796847635727e-06, "loss": 0.7997, "step": 1329 }, { "epoch": 0.03883098303699162, "grad_norm": 0.7878784997118699, "learning_rate": 1.9410391126678344e-06, "loss": 0.6941, "step": 1330 }, { "epoch": 0.038860179264838986, "grad_norm": 0.8072960336764897, "learning_rate": 1.942498540572096e-06, "loss": 0.7959, "step": 1331 }, { "epoch": 0.03888937549268635, "grad_norm": 0.9470802069654173, "learning_rate": 1.9439579684763573e-06, "loss": 0.7433, "step": 1332 }, { "epoch": 0.03891857172053371, "grad_norm": 0.8995598072250014, "learning_rate": 1.945417396380619e-06, "loss": 0.8587, "step": 1333 }, { "epoch": 0.03894776794838107, "grad_norm": 1.1961002682359185, "learning_rate": 1.94687682428488e-06, "loss": 0.8798, "step": 1334 }, { "epoch": 0.03897696417622843, "grad_norm": 0.8252458626624589, "learning_rate": 1.948336252189142e-06, "loss": 0.8149, "step": 1335 }, { "epoch": 0.03900616040407579, "grad_norm": 1.3854815834788154, "learning_rate": 1.9497956800934035e-06, "loss": 0.8261, "step": 1336 }, { "epoch": 0.03903535663192315, "grad_norm": 0.9336161955938526, "learning_rate": 1.9512551079976652e-06, "loss": 0.838, "step": 1337 }, { "epoch": 0.03906455285977052, "grad_norm": 0.7856627967916454, "learning_rate": 1.952714535901927e-06, "loss": 0.7994, "step": 1338 }, { "epoch": 0.03909374908761788, "grad_norm": 1.2613321255689884, "learning_rate": 1.954173963806188e-06, "loss": 0.8365, "step": 1339 }, { "epoch": 0.03912294531546524, "grad_norm": 0.8965707038552423, "learning_rate": 1.95563339171045e-06, "loss": 0.8653, "step": 1340 }, { "epoch": 0.039152141543312605, "grad_norm": 0.9490466794915425, "learning_rate": 1.9570928196147115e-06, "loss": 0.7852, "step": 1341 }, { "epoch": 0.039181337771159966, "grad_norm": 0.781228816963313, "learning_rate": 1.9585522475189727e-06, "loss": 0.7453, "step": 1342 }, { "epoch": 0.03921053399900733, "grad_norm": 1.1273915707299555, "learning_rate": 1.9600116754232344e-06, "loss": 0.7508, "step": 1343 }, { "epoch": 0.03923973022685469, "grad_norm": 0.7934974482135566, "learning_rate": 1.9614711033274956e-06, "loss": 0.7783, "step": 1344 }, { "epoch": 0.03926892645470205, "grad_norm": 0.8240517335027279, "learning_rate": 1.9629305312317573e-06, "loss": 0.8545, "step": 1345 }, { "epoch": 0.03929812268254942, "grad_norm": 1.8524865401643937, "learning_rate": 1.964389959136019e-06, "loss": 0.7947, "step": 1346 }, { "epoch": 0.03932731891039678, "grad_norm": 0.9951051002335094, "learning_rate": 1.9658493870402802e-06, "loss": 0.8439, "step": 1347 }, { "epoch": 0.03935651513824414, "grad_norm": 0.906577144208568, "learning_rate": 1.967308814944542e-06, "loss": 0.8893, "step": 1348 }, { "epoch": 0.0393857113660915, "grad_norm": 0.9565583699521278, "learning_rate": 1.9687682428488036e-06, "loss": 0.7832, "step": 1349 }, { "epoch": 0.03941490759393886, "grad_norm": 0.8560052414450141, "learning_rate": 1.970227670753065e-06, "loss": 0.8889, "step": 1350 }, { "epoch": 0.039444103821786224, "grad_norm": 0.8212681680595444, "learning_rate": 1.9716870986573265e-06, "loss": 0.8227, "step": 1351 }, { "epoch": 0.039473300049633585, "grad_norm": 0.8194904406712752, "learning_rate": 1.9731465265615877e-06, "loss": 0.9045, "step": 1352 }, { "epoch": 0.039502496277480946, "grad_norm": 0.793961277125535, "learning_rate": 1.97460595446585e-06, "loss": 0.7895, "step": 1353 }, { "epoch": 0.039531692505328314, "grad_norm": 0.8669310984026131, "learning_rate": 1.976065382370111e-06, "loss": 0.9342, "step": 1354 }, { "epoch": 0.039560888733175675, "grad_norm": 0.8327334836588378, "learning_rate": 1.9775248102743727e-06, "loss": 0.7763, "step": 1355 }, { "epoch": 0.03959008496102304, "grad_norm": 0.7831852283261211, "learning_rate": 1.9789842381786344e-06, "loss": 0.7037, "step": 1356 }, { "epoch": 0.0396192811888704, "grad_norm": 0.9662909205363367, "learning_rate": 1.9804436660828956e-06, "loss": 0.8811, "step": 1357 }, { "epoch": 0.03964847741671776, "grad_norm": 0.8371779219392731, "learning_rate": 1.9819030939871573e-06, "loss": 0.8466, "step": 1358 }, { "epoch": 0.03967767364456512, "grad_norm": 0.8146205943061202, "learning_rate": 1.9833625218914186e-06, "loss": 0.7608, "step": 1359 }, { "epoch": 0.03970686987241248, "grad_norm": 0.7968669112674642, "learning_rate": 1.9848219497956802e-06, "loss": 0.768, "step": 1360 }, { "epoch": 0.03973606610025985, "grad_norm": 0.8881273412862941, "learning_rate": 1.986281377699942e-06, "loss": 0.8302, "step": 1361 }, { "epoch": 0.03976526232810721, "grad_norm": 0.8223214061679995, "learning_rate": 1.987740805604203e-06, "loss": 0.854, "step": 1362 }, { "epoch": 0.03979445855595457, "grad_norm": 0.9111221346821664, "learning_rate": 1.989200233508465e-06, "loss": 0.8348, "step": 1363 }, { "epoch": 0.03982365478380193, "grad_norm": 0.8570708013145242, "learning_rate": 1.9906596614127265e-06, "loss": 0.8296, "step": 1364 }, { "epoch": 0.039852851011649294, "grad_norm": 0.8167195412255692, "learning_rate": 1.9921190893169877e-06, "loss": 0.8229, "step": 1365 }, { "epoch": 0.039882047239496655, "grad_norm": 0.8721435197678965, "learning_rate": 1.9935785172212494e-06, "loss": 0.8504, "step": 1366 }, { "epoch": 0.03991124346734402, "grad_norm": 0.7273732571758246, "learning_rate": 1.9950379451255107e-06, "loss": 0.6797, "step": 1367 }, { "epoch": 0.03994043969519138, "grad_norm": 0.8122799067897639, "learning_rate": 1.9964973730297723e-06, "loss": 0.7454, "step": 1368 }, { "epoch": 0.039969635923038746, "grad_norm": 0.7758727528136997, "learning_rate": 1.997956800934034e-06, "loss": 0.8141, "step": 1369 }, { "epoch": 0.03999883215088611, "grad_norm": 0.8478059427144126, "learning_rate": 1.9994162288382957e-06, "loss": 0.9141, "step": 1370 }, { "epoch": 0.04002802837873347, "grad_norm": 0.8259491036646571, "learning_rate": 2.0008756567425573e-06, "loss": 0.8339, "step": 1371 }, { "epoch": 0.04005722460658083, "grad_norm": 1.0055008542524433, "learning_rate": 2.0023350846468186e-06, "loss": 0.8879, "step": 1372 }, { "epoch": 0.04008642083442819, "grad_norm": 0.8215044429854599, "learning_rate": 2.0037945125510803e-06, "loss": 0.8348, "step": 1373 }, { "epoch": 0.04011561706227555, "grad_norm": 1.6112730719507762, "learning_rate": 2.005253940455342e-06, "loss": 0.7276, "step": 1374 }, { "epoch": 0.04014481329012291, "grad_norm": 0.9016823906213615, "learning_rate": 2.006713368359603e-06, "loss": 0.8594, "step": 1375 }, { "epoch": 0.04017400951797028, "grad_norm": 0.7799817136659439, "learning_rate": 2.008172796263865e-06, "loss": 0.7946, "step": 1376 }, { "epoch": 0.04020320574581764, "grad_norm": 0.7571824569722634, "learning_rate": 2.009632224168126e-06, "loss": 0.745, "step": 1377 }, { "epoch": 0.040232401973665004, "grad_norm": 0.8708280920966343, "learning_rate": 2.0110916520723878e-06, "loss": 0.8644, "step": 1378 }, { "epoch": 0.040261598201512365, "grad_norm": 0.8430126496632919, "learning_rate": 2.0125510799766494e-06, "loss": 0.8376, "step": 1379 }, { "epoch": 0.040290794429359726, "grad_norm": 0.9488418636098108, "learning_rate": 2.0140105078809107e-06, "loss": 0.847, "step": 1380 }, { "epoch": 0.04031999065720709, "grad_norm": 0.8002128342745817, "learning_rate": 2.0154699357851723e-06, "loss": 0.6964, "step": 1381 }, { "epoch": 0.04034918688505445, "grad_norm": 0.9584694567605682, "learning_rate": 2.016929363689434e-06, "loss": 0.8941, "step": 1382 }, { "epoch": 0.04037838311290181, "grad_norm": 0.8971301238416897, "learning_rate": 2.0183887915936953e-06, "loss": 0.7801, "step": 1383 }, { "epoch": 0.04040757934074918, "grad_norm": 0.8970126715593166, "learning_rate": 2.019848219497957e-06, "loss": 0.7565, "step": 1384 }, { "epoch": 0.04043677556859654, "grad_norm": 2.3105202847902317, "learning_rate": 2.0213076474022186e-06, "loss": 0.7473, "step": 1385 }, { "epoch": 0.0404659717964439, "grad_norm": 1.02772868010015, "learning_rate": 2.0227670753064803e-06, "loss": 0.9272, "step": 1386 }, { "epoch": 0.04049516802429126, "grad_norm": 0.8252337001084372, "learning_rate": 2.0242265032107415e-06, "loss": 0.7772, "step": 1387 }, { "epoch": 0.04052436425213862, "grad_norm": 1.069928108468529, "learning_rate": 2.025685931115003e-06, "loss": 0.8593, "step": 1388 }, { "epoch": 0.040553560479985984, "grad_norm": 0.8152324544919812, "learning_rate": 2.027145359019265e-06, "loss": 0.7934, "step": 1389 }, { "epoch": 0.040582756707833345, "grad_norm": 0.8269387094016731, "learning_rate": 2.028604786923526e-06, "loss": 0.833, "step": 1390 }, { "epoch": 0.04061195293568071, "grad_norm": 0.871244493541818, "learning_rate": 2.0300642148277878e-06, "loss": 0.7533, "step": 1391 }, { "epoch": 0.040641149163528074, "grad_norm": 0.9972889048774393, "learning_rate": 2.031523642732049e-06, "loss": 0.8656, "step": 1392 }, { "epoch": 0.040670345391375436, "grad_norm": 0.8319068347875862, "learning_rate": 2.0329830706363107e-06, "loss": 0.8294, "step": 1393 }, { "epoch": 0.0406995416192228, "grad_norm": 0.8854984893479022, "learning_rate": 2.0344424985405724e-06, "loss": 0.8356, "step": 1394 }, { "epoch": 0.04072873784707016, "grad_norm": 0.8455721533623453, "learning_rate": 2.0359019264448336e-06, "loss": 0.9509, "step": 1395 }, { "epoch": 0.04075793407491752, "grad_norm": 0.8206543145526084, "learning_rate": 2.0373613543490953e-06, "loss": 0.8446, "step": 1396 }, { "epoch": 0.04078713030276488, "grad_norm": 0.757047683461676, "learning_rate": 2.038820782253357e-06, "loss": 0.7104, "step": 1397 }, { "epoch": 0.04081632653061224, "grad_norm": 0.7809519976277555, "learning_rate": 2.040280210157618e-06, "loss": 0.8115, "step": 1398 }, { "epoch": 0.04084552275845961, "grad_norm": 0.8459447422453918, "learning_rate": 2.04173963806188e-06, "loss": 0.8238, "step": 1399 }, { "epoch": 0.04087471898630697, "grad_norm": 0.8514316120370705, "learning_rate": 2.043199065966141e-06, "loss": 0.7981, "step": 1400 }, { "epoch": 0.04090391521415433, "grad_norm": 0.8427536077840163, "learning_rate": 2.044658493870403e-06, "loss": 0.9134, "step": 1401 }, { "epoch": 0.04093311144200169, "grad_norm": 0.7435428177910315, "learning_rate": 2.0461179217746644e-06, "loss": 0.7062, "step": 1402 }, { "epoch": 0.040962307669849055, "grad_norm": 0.9636509016270804, "learning_rate": 2.047577349678926e-06, "loss": 0.7772, "step": 1403 }, { "epoch": 0.040991503897696416, "grad_norm": 0.892355516694422, "learning_rate": 2.0490367775831878e-06, "loss": 0.8074, "step": 1404 }, { "epoch": 0.04102070012554378, "grad_norm": 0.9294307784832765, "learning_rate": 2.050496205487449e-06, "loss": 0.7684, "step": 1405 }, { "epoch": 0.041049896353391145, "grad_norm": 1.9766707842291134, "learning_rate": 2.0519556333917107e-06, "loss": 0.8299, "step": 1406 }, { "epoch": 0.041079092581238506, "grad_norm": 1.4511669352262921, "learning_rate": 2.0534150612959724e-06, "loss": 0.7255, "step": 1407 }, { "epoch": 0.04110828880908587, "grad_norm": 0.8163892174104213, "learning_rate": 2.0548744892002336e-06, "loss": 0.8202, "step": 1408 }, { "epoch": 0.04113748503693323, "grad_norm": 0.7769399209565322, "learning_rate": 2.0563339171044953e-06, "loss": 0.7262, "step": 1409 }, { "epoch": 0.04116668126478059, "grad_norm": 0.9623379018137714, "learning_rate": 2.0577933450087565e-06, "loss": 0.8574, "step": 1410 }, { "epoch": 0.04119587749262795, "grad_norm": 0.838202362949451, "learning_rate": 2.059252772913018e-06, "loss": 0.762, "step": 1411 }, { "epoch": 0.04122507372047531, "grad_norm": 0.828394266968888, "learning_rate": 2.06071220081728e-06, "loss": 0.8269, "step": 1412 }, { "epoch": 0.041254269948322674, "grad_norm": 0.8180289550597828, "learning_rate": 2.062171628721541e-06, "loss": 0.76, "step": 1413 }, { "epoch": 0.04128346617617004, "grad_norm": 0.7735084852284747, "learning_rate": 2.063631056625803e-06, "loss": 0.674, "step": 1414 }, { "epoch": 0.0413126624040174, "grad_norm": 0.8679059076566721, "learning_rate": 2.0650904845300645e-06, "loss": 0.8466, "step": 1415 }, { "epoch": 0.041341858631864764, "grad_norm": 0.810171759647892, "learning_rate": 2.0665499124343257e-06, "loss": 0.8287, "step": 1416 }, { "epoch": 0.041371054859712125, "grad_norm": 0.8954405956418857, "learning_rate": 2.0680093403385874e-06, "loss": 0.7954, "step": 1417 }, { "epoch": 0.041400251087559486, "grad_norm": 0.7712587033197368, "learning_rate": 2.069468768242849e-06, "loss": 0.7298, "step": 1418 }, { "epoch": 0.04142944731540685, "grad_norm": 0.8358064400350558, "learning_rate": 2.0709281961471107e-06, "loss": 0.9089, "step": 1419 }, { "epoch": 0.04145864354325421, "grad_norm": 0.8568523775775515, "learning_rate": 2.072387624051372e-06, "loss": 0.7492, "step": 1420 }, { "epoch": 0.04148783977110158, "grad_norm": 0.9411969520001165, "learning_rate": 2.0738470519556336e-06, "loss": 0.815, "step": 1421 }, { "epoch": 0.04151703599894894, "grad_norm": 0.7876295770478534, "learning_rate": 2.0753064798598953e-06, "loss": 0.76, "step": 1422 }, { "epoch": 0.0415462322267963, "grad_norm": 0.8869143509081349, "learning_rate": 2.0767659077641566e-06, "loss": 0.8085, "step": 1423 }, { "epoch": 0.04157542845464366, "grad_norm": 0.7700007225519054, "learning_rate": 2.0782253356684182e-06, "loss": 0.7598, "step": 1424 }, { "epoch": 0.04160462468249102, "grad_norm": 1.0849882289963644, "learning_rate": 2.0796847635726795e-06, "loss": 0.8565, "step": 1425 }, { "epoch": 0.04163382091033838, "grad_norm": 0.8216754536071295, "learning_rate": 2.081144191476941e-06, "loss": 0.7416, "step": 1426 }, { "epoch": 0.041663017138185744, "grad_norm": 0.8388206578025221, "learning_rate": 2.082603619381203e-06, "loss": 0.8016, "step": 1427 }, { "epoch": 0.041692213366033105, "grad_norm": 0.9001746730832143, "learning_rate": 2.084063047285464e-06, "loss": 0.9067, "step": 1428 }, { "epoch": 0.041721409593880474, "grad_norm": 0.8699371691111643, "learning_rate": 2.0855224751897257e-06, "loss": 0.8298, "step": 1429 }, { "epoch": 0.041750605821727835, "grad_norm": 0.735482130205054, "learning_rate": 2.0869819030939874e-06, "loss": 0.7423, "step": 1430 }, { "epoch": 0.041779802049575196, "grad_norm": 0.8481590904941864, "learning_rate": 2.0884413309982486e-06, "loss": 0.7444, "step": 1431 }, { "epoch": 0.04180899827742256, "grad_norm": 0.8429796979558167, "learning_rate": 2.0899007589025103e-06, "loss": 0.7509, "step": 1432 }, { "epoch": 0.04183819450526992, "grad_norm": 0.8707232762770591, "learning_rate": 2.091360186806772e-06, "loss": 0.9233, "step": 1433 }, { "epoch": 0.04186739073311728, "grad_norm": 0.9236292736603032, "learning_rate": 2.0928196147110337e-06, "loss": 0.9018, "step": 1434 }, { "epoch": 0.04189658696096464, "grad_norm": 0.7652886826152024, "learning_rate": 2.094279042615295e-06, "loss": 0.7317, "step": 1435 }, { "epoch": 0.04192578318881201, "grad_norm": 0.8726150731050197, "learning_rate": 2.0957384705195566e-06, "loss": 0.7937, "step": 1436 }, { "epoch": 0.04195497941665937, "grad_norm": 1.0935196394941442, "learning_rate": 2.0971978984238182e-06, "loss": 0.8204, "step": 1437 }, { "epoch": 0.04198417564450673, "grad_norm": 0.9185640776760255, "learning_rate": 2.0986573263280795e-06, "loss": 0.8408, "step": 1438 }, { "epoch": 0.04201337187235409, "grad_norm": 1.4415591535103123, "learning_rate": 2.100116754232341e-06, "loss": 0.9774, "step": 1439 }, { "epoch": 0.042042568100201454, "grad_norm": 0.9123929101085764, "learning_rate": 2.101576182136603e-06, "loss": 0.7748, "step": 1440 }, { "epoch": 0.042071764328048815, "grad_norm": 0.8045657404007632, "learning_rate": 2.103035610040864e-06, "loss": 0.7877, "step": 1441 }, { "epoch": 0.042100960555896176, "grad_norm": 0.8709083356937264, "learning_rate": 2.1044950379451257e-06, "loss": 0.8514, "step": 1442 }, { "epoch": 0.04213015678374354, "grad_norm": 1.0176192603192595, "learning_rate": 2.105954465849387e-06, "loss": 0.8497, "step": 1443 }, { "epoch": 0.042159353011590905, "grad_norm": 0.8164234204825745, "learning_rate": 2.1074138937536487e-06, "loss": 0.773, "step": 1444 }, { "epoch": 0.04218854923943827, "grad_norm": 0.7859069775726483, "learning_rate": 2.1088733216579103e-06, "loss": 0.7719, "step": 1445 }, { "epoch": 0.04221774546728563, "grad_norm": 0.9184542237115844, "learning_rate": 2.1103327495621716e-06, "loss": 0.8896, "step": 1446 }, { "epoch": 0.04224694169513299, "grad_norm": 0.7643890239589345, "learning_rate": 2.1117921774664332e-06, "loss": 0.7259, "step": 1447 }, { "epoch": 0.04227613792298035, "grad_norm": 0.926468259694647, "learning_rate": 2.113251605370695e-06, "loss": 0.7611, "step": 1448 }, { "epoch": 0.04230533415082771, "grad_norm": 0.8659096640248817, "learning_rate": 2.1147110332749566e-06, "loss": 0.8177, "step": 1449 }, { "epoch": 0.04233453037867507, "grad_norm": 0.8792268092465517, "learning_rate": 2.116170461179218e-06, "loss": 0.8384, "step": 1450 }, { "epoch": 0.04236372660652244, "grad_norm": 0.7940186013027895, "learning_rate": 2.1176298890834795e-06, "loss": 0.8018, "step": 1451 }, { "epoch": 0.0423929228343698, "grad_norm": 0.8051806875845249, "learning_rate": 2.119089316987741e-06, "loss": 0.7669, "step": 1452 }, { "epoch": 0.04242211906221716, "grad_norm": 0.9930546783038701, "learning_rate": 2.1205487448920024e-06, "loss": 0.9284, "step": 1453 }, { "epoch": 0.042451315290064524, "grad_norm": 0.9800848440258016, "learning_rate": 2.122008172796264e-06, "loss": 0.7851, "step": 1454 }, { "epoch": 0.042480511517911886, "grad_norm": 0.7926272295214607, "learning_rate": 2.1234676007005258e-06, "loss": 0.7345, "step": 1455 }, { "epoch": 0.04250970774575925, "grad_norm": 0.9703663165881036, "learning_rate": 2.124927028604787e-06, "loss": 0.8912, "step": 1456 }, { "epoch": 0.04253890397360661, "grad_norm": 0.8429503668896108, "learning_rate": 2.1263864565090487e-06, "loss": 0.8512, "step": 1457 }, { "epoch": 0.04256810020145397, "grad_norm": 0.922264335280484, "learning_rate": 2.1278458844133103e-06, "loss": 0.7282, "step": 1458 }, { "epoch": 0.04259729642930134, "grad_norm": 0.824132551136684, "learning_rate": 2.1293053123175716e-06, "loss": 0.7653, "step": 1459 }, { "epoch": 0.0426264926571487, "grad_norm": 0.8203662548668766, "learning_rate": 2.1307647402218333e-06, "loss": 0.7509, "step": 1460 }, { "epoch": 0.04265568888499606, "grad_norm": 0.9012731189080856, "learning_rate": 2.1322241681260945e-06, "loss": 0.8406, "step": 1461 }, { "epoch": 0.04268488511284342, "grad_norm": 0.7881190123052811, "learning_rate": 2.133683596030356e-06, "loss": 0.7053, "step": 1462 }, { "epoch": 0.04271408134069078, "grad_norm": 0.8480224863200909, "learning_rate": 2.135143023934618e-06, "loss": 0.7482, "step": 1463 }, { "epoch": 0.04274327756853814, "grad_norm": 0.8782134744681841, "learning_rate": 2.136602451838879e-06, "loss": 0.8056, "step": 1464 }, { "epoch": 0.042772473796385505, "grad_norm": 0.9393027405339682, "learning_rate": 2.138061879743141e-06, "loss": 0.7911, "step": 1465 }, { "epoch": 0.042801670024232866, "grad_norm": 0.8194790990027859, "learning_rate": 2.1395213076474024e-06, "loss": 0.8287, "step": 1466 }, { "epoch": 0.042830866252080234, "grad_norm": 0.877656226997458, "learning_rate": 2.140980735551664e-06, "loss": 0.7587, "step": 1467 }, { "epoch": 0.042860062479927595, "grad_norm": 0.7471502572458446, "learning_rate": 2.1424401634559254e-06, "loss": 0.6854, "step": 1468 }, { "epoch": 0.042889258707774956, "grad_norm": 0.8930347734556296, "learning_rate": 2.143899591360187e-06, "loss": 0.7334, "step": 1469 }, { "epoch": 0.04291845493562232, "grad_norm": 1.126895019563768, "learning_rate": 2.1453590192644487e-06, "loss": 0.9276, "step": 1470 }, { "epoch": 0.04294765116346968, "grad_norm": 0.7538032393548058, "learning_rate": 2.14681844716871e-06, "loss": 0.7022, "step": 1471 }, { "epoch": 0.04297684739131704, "grad_norm": 0.8207718220195779, "learning_rate": 2.1482778750729716e-06, "loss": 0.7449, "step": 1472 }, { "epoch": 0.0430060436191644, "grad_norm": 0.8497386581983974, "learning_rate": 2.1497373029772333e-06, "loss": 0.6725, "step": 1473 }, { "epoch": 0.04303523984701177, "grad_norm": 0.9627404032083816, "learning_rate": 2.1511967308814945e-06, "loss": 0.8068, "step": 1474 }, { "epoch": 0.04306443607485913, "grad_norm": 0.8295369732330025, "learning_rate": 2.152656158785756e-06, "loss": 0.7888, "step": 1475 }, { "epoch": 0.04309363230270649, "grad_norm": 0.8323268861033556, "learning_rate": 2.1541155866900174e-06, "loss": 0.7131, "step": 1476 }, { "epoch": 0.04312282853055385, "grad_norm": 0.9694086683373346, "learning_rate": 2.155575014594279e-06, "loss": 0.7452, "step": 1477 }, { "epoch": 0.043152024758401214, "grad_norm": 0.8387268408162263, "learning_rate": 2.1570344424985408e-06, "loss": 0.7433, "step": 1478 }, { "epoch": 0.043181220986248575, "grad_norm": 0.766684773779977, "learning_rate": 2.158493870402802e-06, "loss": 0.7451, "step": 1479 }, { "epoch": 0.043210417214095936, "grad_norm": 0.7983589462187973, "learning_rate": 2.1599532983070637e-06, "loss": 0.7536, "step": 1480 }, { "epoch": 0.0432396134419433, "grad_norm": 0.915817133955157, "learning_rate": 2.1614127262113254e-06, "loss": 0.7957, "step": 1481 }, { "epoch": 0.043268809669790666, "grad_norm": 0.8390281021603958, "learning_rate": 2.162872154115587e-06, "loss": 0.9225, "step": 1482 }, { "epoch": 0.04329800589763803, "grad_norm": 0.7878680408215372, "learning_rate": 2.1643315820198487e-06, "loss": 0.7978, "step": 1483 }, { "epoch": 0.04332720212548539, "grad_norm": 0.7901424596858835, "learning_rate": 2.16579100992411e-06, "loss": 0.8083, "step": 1484 }, { "epoch": 0.04335639835333275, "grad_norm": 0.771211077679203, "learning_rate": 2.1672504378283716e-06, "loss": 0.7708, "step": 1485 }, { "epoch": 0.04338559458118011, "grad_norm": 0.9024021241511203, "learning_rate": 2.168709865732633e-06, "loss": 0.8742, "step": 1486 }, { "epoch": 0.04341479080902747, "grad_norm": 0.9255511405778526, "learning_rate": 2.1701692936368945e-06, "loss": 0.8852, "step": 1487 }, { "epoch": 0.04344398703687483, "grad_norm": 0.8030656662756784, "learning_rate": 2.171628721541156e-06, "loss": 0.7543, "step": 1488 }, { "epoch": 0.0434731832647222, "grad_norm": 1.0248753083182152, "learning_rate": 2.1730881494454175e-06, "loss": 0.7773, "step": 1489 }, { "epoch": 0.04350237949256956, "grad_norm": 0.9519776650072321, "learning_rate": 2.174547577349679e-06, "loss": 0.7057, "step": 1490 }, { "epoch": 0.043531575720416923, "grad_norm": 0.8212295226161987, "learning_rate": 2.176007005253941e-06, "loss": 0.8176, "step": 1491 }, { "epoch": 0.043560771948264285, "grad_norm": 0.8298919660446389, "learning_rate": 2.177466433158202e-06, "loss": 0.7782, "step": 1492 }, { "epoch": 0.043589968176111646, "grad_norm": 0.8875694516021714, "learning_rate": 2.1789258610624637e-06, "loss": 0.8179, "step": 1493 }, { "epoch": 0.04361916440395901, "grad_norm": 0.957009825505578, "learning_rate": 2.180385288966725e-06, "loss": 0.8378, "step": 1494 }, { "epoch": 0.04364836063180637, "grad_norm": 0.7306084780330153, "learning_rate": 2.1818447168709866e-06, "loss": 0.6804, "step": 1495 }, { "epoch": 0.04367755685965373, "grad_norm": 0.7869730229847843, "learning_rate": 2.1833041447752483e-06, "loss": 0.805, "step": 1496 }, { "epoch": 0.0437067530875011, "grad_norm": 0.8052088153605561, "learning_rate": 2.18476357267951e-06, "loss": 0.751, "step": 1497 }, { "epoch": 0.04373594931534846, "grad_norm": 1.1490850243341342, "learning_rate": 2.1862230005837716e-06, "loss": 0.8162, "step": 1498 }, { "epoch": 0.04376514554319582, "grad_norm": 0.8437889606423683, "learning_rate": 2.187682428488033e-06, "loss": 0.8304, "step": 1499 }, { "epoch": 0.04379434177104318, "grad_norm": 0.8601087902851932, "learning_rate": 2.1891418563922946e-06, "loss": 0.83, "step": 1500 }, { "epoch": 0.04382353799889054, "grad_norm": 0.8017624426090004, "learning_rate": 2.190601284296556e-06, "loss": 0.7738, "step": 1501 }, { "epoch": 0.043852734226737904, "grad_norm": 0.87411977348412, "learning_rate": 2.1920607122008175e-06, "loss": 0.9112, "step": 1502 }, { "epoch": 0.043881930454585265, "grad_norm": 0.874825543128551, "learning_rate": 2.193520140105079e-06, "loss": 0.7832, "step": 1503 }, { "epoch": 0.04391112668243263, "grad_norm": 0.8152794262440252, "learning_rate": 2.1949795680093404e-06, "loss": 0.7844, "step": 1504 }, { "epoch": 0.043940322910279994, "grad_norm": 1.0202332380181158, "learning_rate": 2.196438995913602e-06, "loss": 0.8052, "step": 1505 }, { "epoch": 0.043969519138127355, "grad_norm": 0.7959788329398976, "learning_rate": 2.1978984238178637e-06, "loss": 0.7365, "step": 1506 }, { "epoch": 0.04399871536597472, "grad_norm": 0.791825016643319, "learning_rate": 2.199357851722125e-06, "loss": 0.7409, "step": 1507 }, { "epoch": 0.04402791159382208, "grad_norm": 0.8399663442343992, "learning_rate": 2.2008172796263866e-06, "loss": 0.7062, "step": 1508 }, { "epoch": 0.04405710782166944, "grad_norm": 0.7836912953066241, "learning_rate": 2.202276707530648e-06, "loss": 0.8001, "step": 1509 }, { "epoch": 0.0440863040495168, "grad_norm": 0.7571080078979038, "learning_rate": 2.2037361354349096e-06, "loss": 0.7322, "step": 1510 }, { "epoch": 0.04411550027736416, "grad_norm": 0.8948414448698011, "learning_rate": 2.2051955633391712e-06, "loss": 0.8401, "step": 1511 }, { "epoch": 0.04414469650521153, "grad_norm": 0.9841406179882892, "learning_rate": 2.2066549912434325e-06, "loss": 0.9035, "step": 1512 }, { "epoch": 0.04417389273305889, "grad_norm": 0.9077654381734452, "learning_rate": 2.2081144191476946e-06, "loss": 0.7821, "step": 1513 }, { "epoch": 0.04420308896090625, "grad_norm": 0.7756362529983944, "learning_rate": 2.209573847051956e-06, "loss": 0.7336, "step": 1514 }, { "epoch": 0.04423228518875361, "grad_norm": 0.7996614914267781, "learning_rate": 2.2110332749562175e-06, "loss": 0.7656, "step": 1515 }, { "epoch": 0.044261481416600974, "grad_norm": 0.9699290610255483, "learning_rate": 2.212492702860479e-06, "loss": 0.9171, "step": 1516 }, { "epoch": 0.044290677644448335, "grad_norm": 0.7923676960182707, "learning_rate": 2.2139521307647404e-06, "loss": 0.7576, "step": 1517 }, { "epoch": 0.0443198738722957, "grad_norm": 0.8723065332043793, "learning_rate": 2.215411558669002e-06, "loss": 0.7358, "step": 1518 }, { "epoch": 0.044349070100143065, "grad_norm": 0.7790495525098392, "learning_rate": 2.2168709865732633e-06, "loss": 0.71, "step": 1519 }, { "epoch": 0.044378266327990426, "grad_norm": 0.8311224125043134, "learning_rate": 2.218330414477525e-06, "loss": 0.791, "step": 1520 }, { "epoch": 0.04440746255583779, "grad_norm": 0.8237456201504109, "learning_rate": 2.2197898423817867e-06, "loss": 0.7923, "step": 1521 }, { "epoch": 0.04443665878368515, "grad_norm": 0.8383159247444432, "learning_rate": 2.221249270286048e-06, "loss": 0.7346, "step": 1522 }, { "epoch": 0.04446585501153251, "grad_norm": 0.9827096894017612, "learning_rate": 2.2227086981903096e-06, "loss": 0.8154, "step": 1523 }, { "epoch": 0.04449505123937987, "grad_norm": 0.7718799927669971, "learning_rate": 2.2241681260945713e-06, "loss": 0.7356, "step": 1524 }, { "epoch": 0.04452424746722723, "grad_norm": 0.8498705869498765, "learning_rate": 2.2256275539988325e-06, "loss": 0.7625, "step": 1525 }, { "epoch": 0.04455344369507459, "grad_norm": 0.8013208071722132, "learning_rate": 2.227086981903094e-06, "loss": 0.7876, "step": 1526 }, { "epoch": 0.04458263992292196, "grad_norm": 0.8507998890702594, "learning_rate": 2.2285464098073554e-06, "loss": 0.7916, "step": 1527 }, { "epoch": 0.04461183615076932, "grad_norm": 0.9989161860675572, "learning_rate": 2.230005837711617e-06, "loss": 0.8031, "step": 1528 }, { "epoch": 0.044641032378616684, "grad_norm": 0.9997689344584693, "learning_rate": 2.2314652656158788e-06, "loss": 0.7878, "step": 1529 }, { "epoch": 0.044670228606464045, "grad_norm": 0.8124883074815258, "learning_rate": 2.2329246935201404e-06, "loss": 0.8813, "step": 1530 }, { "epoch": 0.044699424834311406, "grad_norm": 0.8155986968346011, "learning_rate": 2.234384121424402e-06, "loss": 0.8955, "step": 1531 }, { "epoch": 0.04472862106215877, "grad_norm": 0.8291205439136615, "learning_rate": 2.2358435493286633e-06, "loss": 0.875, "step": 1532 }, { "epoch": 0.04475781729000613, "grad_norm": 1.0104132378095254, "learning_rate": 2.237302977232925e-06, "loss": 0.7348, "step": 1533 }, { "epoch": 0.0447870135178535, "grad_norm": 0.790833135727148, "learning_rate": 2.2387624051371863e-06, "loss": 0.7972, "step": 1534 }, { "epoch": 0.04481620974570086, "grad_norm": 0.8749480992860534, "learning_rate": 2.240221833041448e-06, "loss": 0.7428, "step": 1535 }, { "epoch": 0.04484540597354822, "grad_norm": 0.8213782749788557, "learning_rate": 2.2416812609457096e-06, "loss": 0.7297, "step": 1536 }, { "epoch": 0.04487460220139558, "grad_norm": 0.8339405665060091, "learning_rate": 2.243140688849971e-06, "loss": 0.8063, "step": 1537 }, { "epoch": 0.04490379842924294, "grad_norm": 0.7269064634052991, "learning_rate": 2.2446001167542325e-06, "loss": 0.7088, "step": 1538 }, { "epoch": 0.0449329946570903, "grad_norm": 0.7766311321672383, "learning_rate": 2.246059544658494e-06, "loss": 0.7361, "step": 1539 }, { "epoch": 0.044962190884937664, "grad_norm": 0.8195215079302531, "learning_rate": 2.2475189725627554e-06, "loss": 0.7194, "step": 1540 }, { "epoch": 0.044991387112785025, "grad_norm": 0.8137021569704311, "learning_rate": 2.248978400467017e-06, "loss": 0.7425, "step": 1541 }, { "epoch": 0.04502058334063239, "grad_norm": 0.8273947928003458, "learning_rate": 2.2504378283712783e-06, "loss": 0.8551, "step": 1542 }, { "epoch": 0.045049779568479754, "grad_norm": 0.8457976675765558, "learning_rate": 2.25189725627554e-06, "loss": 0.8813, "step": 1543 }, { "epoch": 0.045078975796327116, "grad_norm": 0.7778275633302142, "learning_rate": 2.2533566841798017e-06, "loss": 0.6753, "step": 1544 }, { "epoch": 0.04510817202417448, "grad_norm": 0.8988147841092983, "learning_rate": 2.2548161120840634e-06, "loss": 0.8035, "step": 1545 }, { "epoch": 0.04513736825202184, "grad_norm": 0.8117690431682726, "learning_rate": 2.256275539988325e-06, "loss": 0.7744, "step": 1546 }, { "epoch": 0.0451665644798692, "grad_norm": 0.7993648777987528, "learning_rate": 2.2577349678925863e-06, "loss": 0.7697, "step": 1547 }, { "epoch": 0.04519576070771656, "grad_norm": 0.8413884220851363, "learning_rate": 2.259194395796848e-06, "loss": 0.7216, "step": 1548 }, { "epoch": 0.04522495693556393, "grad_norm": 0.8363640760145449, "learning_rate": 2.2606538237011096e-06, "loss": 0.8453, "step": 1549 }, { "epoch": 0.04525415316341129, "grad_norm": 1.0093177962979245, "learning_rate": 2.262113251605371e-06, "loss": 0.7592, "step": 1550 }, { "epoch": 0.04528334939125865, "grad_norm": 0.7596242736196055, "learning_rate": 2.2635726795096325e-06, "loss": 0.7461, "step": 1551 }, { "epoch": 0.04531254561910601, "grad_norm": 0.8874448266432872, "learning_rate": 2.2650321074138938e-06, "loss": 0.7801, "step": 1552 }, { "epoch": 0.04534174184695337, "grad_norm": 0.878792936965313, "learning_rate": 2.2664915353181554e-06, "loss": 0.8693, "step": 1553 }, { "epoch": 0.045370938074800735, "grad_norm": 0.8309848953914928, "learning_rate": 2.267950963222417e-06, "loss": 0.8297, "step": 1554 }, { "epoch": 0.045400134302648096, "grad_norm": 0.8614958031997357, "learning_rate": 2.2694103911266784e-06, "loss": 0.7633, "step": 1555 }, { "epoch": 0.04542933053049546, "grad_norm": 1.1531818958403208, "learning_rate": 2.27086981903094e-06, "loss": 0.6998, "step": 1556 }, { "epoch": 0.045458526758342825, "grad_norm": 0.8486334401303273, "learning_rate": 2.2723292469352017e-06, "loss": 0.8359, "step": 1557 }, { "epoch": 0.045487722986190186, "grad_norm": 0.8761865222063051, "learning_rate": 2.273788674839463e-06, "loss": 0.8707, "step": 1558 }, { "epoch": 0.04551691921403755, "grad_norm": 0.9617322455224677, "learning_rate": 2.2752481027437246e-06, "loss": 0.8216, "step": 1559 }, { "epoch": 0.04554611544188491, "grad_norm": 1.02974060466912, "learning_rate": 2.276707530647986e-06, "loss": 0.7824, "step": 1560 }, { "epoch": 0.04557531166973227, "grad_norm": 0.8875051897849856, "learning_rate": 2.278166958552248e-06, "loss": 0.7493, "step": 1561 }, { "epoch": 0.04560450789757963, "grad_norm": 0.9400916626931335, "learning_rate": 2.279626386456509e-06, "loss": 0.8543, "step": 1562 }, { "epoch": 0.04563370412542699, "grad_norm": 0.8436372519863578, "learning_rate": 2.281085814360771e-06, "loss": 0.8433, "step": 1563 }, { "epoch": 0.045662900353274354, "grad_norm": 0.8103241597368609, "learning_rate": 2.2825452422650325e-06, "loss": 0.7549, "step": 1564 }, { "epoch": 0.04569209658112172, "grad_norm": 0.7602314352803541, "learning_rate": 2.284004670169294e-06, "loss": 0.7354, "step": 1565 }, { "epoch": 0.04572129280896908, "grad_norm": 0.8189655084436862, "learning_rate": 2.2854640980735555e-06, "loss": 0.7788, "step": 1566 }, { "epoch": 0.045750489036816444, "grad_norm": 1.1828498223502808, "learning_rate": 2.2869235259778167e-06, "loss": 0.7392, "step": 1567 }, { "epoch": 0.045779685264663805, "grad_norm": 1.2583744594665218, "learning_rate": 2.2883829538820784e-06, "loss": 0.8871, "step": 1568 }, { "epoch": 0.045808881492511166, "grad_norm": 0.8040594047470561, "learning_rate": 2.28984238178634e-06, "loss": 0.7716, "step": 1569 }, { "epoch": 0.04583807772035853, "grad_norm": 0.9798930958793787, "learning_rate": 2.2913018096906013e-06, "loss": 0.8722, "step": 1570 }, { "epoch": 0.04586727394820589, "grad_norm": 0.7852914382881799, "learning_rate": 2.292761237594863e-06, "loss": 0.7372, "step": 1571 }, { "epoch": 0.04589647017605326, "grad_norm": 1.2059207120839504, "learning_rate": 2.2942206654991246e-06, "loss": 0.9228, "step": 1572 }, { "epoch": 0.04592566640390062, "grad_norm": 1.090214988871401, "learning_rate": 2.295680093403386e-06, "loss": 0.8537, "step": 1573 }, { "epoch": 0.04595486263174798, "grad_norm": 0.8135310664439422, "learning_rate": 2.2971395213076476e-06, "loss": 0.7933, "step": 1574 }, { "epoch": 0.04598405885959534, "grad_norm": 0.7762643757832158, "learning_rate": 2.2985989492119092e-06, "loss": 0.7508, "step": 1575 }, { "epoch": 0.0460132550874427, "grad_norm": 1.0051690086831309, "learning_rate": 2.3000583771161705e-06, "loss": 0.7738, "step": 1576 }, { "epoch": 0.04604245131529006, "grad_norm": 0.8126495115689611, "learning_rate": 2.301517805020432e-06, "loss": 0.6549, "step": 1577 }, { "epoch": 0.046071647543137424, "grad_norm": 0.8403479652960111, "learning_rate": 2.302977232924694e-06, "loss": 0.7887, "step": 1578 }, { "epoch": 0.046100843770984785, "grad_norm": 0.8126836046886702, "learning_rate": 2.3044366608289555e-06, "loss": 0.7961, "step": 1579 }, { "epoch": 0.046130039998832154, "grad_norm": 0.8969650468057211, "learning_rate": 2.3058960887332167e-06, "loss": 0.7936, "step": 1580 }, { "epoch": 0.046159236226679515, "grad_norm": 0.7964219025894962, "learning_rate": 2.3073555166374784e-06, "loss": 0.7188, "step": 1581 }, { "epoch": 0.046188432454526876, "grad_norm": 0.8665848826832655, "learning_rate": 2.30881494454174e-06, "loss": 0.8631, "step": 1582 }, { "epoch": 0.04621762868237424, "grad_norm": 0.7570653526207879, "learning_rate": 2.3102743724460013e-06, "loss": 0.7395, "step": 1583 }, { "epoch": 0.0462468249102216, "grad_norm": 0.7631995451853018, "learning_rate": 2.311733800350263e-06, "loss": 0.7269, "step": 1584 }, { "epoch": 0.04627602113806896, "grad_norm": 0.8422280874653241, "learning_rate": 2.3131932282545242e-06, "loss": 0.8017, "step": 1585 }, { "epoch": 0.04630521736591632, "grad_norm": 0.838919440909544, "learning_rate": 2.314652656158786e-06, "loss": 0.7264, "step": 1586 }, { "epoch": 0.04633441359376369, "grad_norm": 0.7580008795753548, "learning_rate": 2.3161120840630476e-06, "loss": 0.6804, "step": 1587 }, { "epoch": 0.04636360982161105, "grad_norm": 0.8565352051320904, "learning_rate": 2.317571511967309e-06, "loss": 0.8831, "step": 1588 }, { "epoch": 0.04639280604945841, "grad_norm": 0.8268590426575724, "learning_rate": 2.3190309398715705e-06, "loss": 0.8091, "step": 1589 }, { "epoch": 0.04642200227730577, "grad_norm": 0.8466050284278286, "learning_rate": 2.320490367775832e-06, "loss": 0.8167, "step": 1590 }, { "epoch": 0.046451198505153134, "grad_norm": 1.1298057014240812, "learning_rate": 2.3219497956800934e-06, "loss": 0.8272, "step": 1591 }, { "epoch": 0.046480394733000495, "grad_norm": 0.8199245173343422, "learning_rate": 2.323409223584355e-06, "loss": 0.7917, "step": 1592 }, { "epoch": 0.046509590960847856, "grad_norm": 0.8453216571560995, "learning_rate": 2.3248686514886167e-06, "loss": 0.8486, "step": 1593 }, { "epoch": 0.04653878718869522, "grad_norm": 0.8380853388707868, "learning_rate": 2.3263280793928784e-06, "loss": 0.8124, "step": 1594 }, { "epoch": 0.046567983416542585, "grad_norm": 0.7927213584164812, "learning_rate": 2.3277875072971397e-06, "loss": 0.717, "step": 1595 }, { "epoch": 0.04659717964438995, "grad_norm": 0.7695459879191155, "learning_rate": 2.3292469352014013e-06, "loss": 0.7348, "step": 1596 }, { "epoch": 0.04662637587223731, "grad_norm": 0.8398338251383363, "learning_rate": 2.330706363105663e-06, "loss": 0.8279, "step": 1597 }, { "epoch": 0.04665557210008467, "grad_norm": 0.7773950297318373, "learning_rate": 2.3321657910099242e-06, "loss": 0.7008, "step": 1598 }, { "epoch": 0.04668476832793203, "grad_norm": 0.9796872955429624, "learning_rate": 2.333625218914186e-06, "loss": 0.6986, "step": 1599 }, { "epoch": 0.04671396455577939, "grad_norm": 0.8963967905214248, "learning_rate": 2.3350846468184476e-06, "loss": 0.8282, "step": 1600 }, { "epoch": 0.04674316078362675, "grad_norm": 1.1457162921053483, "learning_rate": 2.336544074722709e-06, "loss": 0.8343, "step": 1601 }, { "epoch": 0.04677235701147412, "grad_norm": 0.8278620670963254, "learning_rate": 2.3380035026269705e-06, "loss": 0.7852, "step": 1602 }, { "epoch": 0.04680155323932148, "grad_norm": 0.9337907499371078, "learning_rate": 2.3394629305312317e-06, "loss": 0.9993, "step": 1603 }, { "epoch": 0.04683074946716884, "grad_norm": 0.8907756861821082, "learning_rate": 2.3409223584354934e-06, "loss": 0.9112, "step": 1604 }, { "epoch": 0.046859945695016204, "grad_norm": 0.8692068978378817, "learning_rate": 2.342381786339755e-06, "loss": 0.8495, "step": 1605 }, { "epoch": 0.046889141922863566, "grad_norm": 0.8147157035959487, "learning_rate": 2.3438412142440163e-06, "loss": 0.8061, "step": 1606 }, { "epoch": 0.04691833815071093, "grad_norm": 0.8128649509501187, "learning_rate": 2.345300642148278e-06, "loss": 0.7981, "step": 1607 }, { "epoch": 0.04694753437855829, "grad_norm": 0.8800083277063476, "learning_rate": 2.3467600700525397e-06, "loss": 0.7621, "step": 1608 }, { "epoch": 0.04697673060640565, "grad_norm": 0.7783272226747457, "learning_rate": 2.3482194979568013e-06, "loss": 0.7961, "step": 1609 }, { "epoch": 0.04700592683425302, "grad_norm": 0.7928457281094338, "learning_rate": 2.3496789258610626e-06, "loss": 0.8039, "step": 1610 }, { "epoch": 0.04703512306210038, "grad_norm": 1.0337881014989616, "learning_rate": 2.3511383537653243e-06, "loss": 0.8108, "step": 1611 }, { "epoch": 0.04706431928994774, "grad_norm": 0.8260361031599065, "learning_rate": 2.352597781669586e-06, "loss": 0.8254, "step": 1612 }, { "epoch": 0.0470935155177951, "grad_norm": 0.830191893532515, "learning_rate": 2.354057209573847e-06, "loss": 0.7642, "step": 1613 }, { "epoch": 0.04712271174564246, "grad_norm": 1.0651489233294766, "learning_rate": 2.355516637478109e-06, "loss": 0.7592, "step": 1614 }, { "epoch": 0.04715190797348982, "grad_norm": 0.7951113566822462, "learning_rate": 2.3569760653823705e-06, "loss": 0.7354, "step": 1615 }, { "epoch": 0.047181104201337185, "grad_norm": 0.7860638113568503, "learning_rate": 2.3584354932866318e-06, "loss": 0.7642, "step": 1616 }, { "epoch": 0.04721030042918455, "grad_norm": 0.8191391546381835, "learning_rate": 2.3598949211908934e-06, "loss": 0.8515, "step": 1617 }, { "epoch": 0.047239496657031914, "grad_norm": 0.7675719390136138, "learning_rate": 2.3613543490951547e-06, "loss": 0.7503, "step": 1618 }, { "epoch": 0.047268692884879275, "grad_norm": 0.7871822223536759, "learning_rate": 2.3628137769994164e-06, "loss": 0.7597, "step": 1619 }, { "epoch": 0.047297889112726636, "grad_norm": 0.8301713673145746, "learning_rate": 2.364273204903678e-06, "loss": 0.7692, "step": 1620 }, { "epoch": 0.047327085340574, "grad_norm": 0.8963542428048789, "learning_rate": 2.3657326328079393e-06, "loss": 0.8113, "step": 1621 }, { "epoch": 0.04735628156842136, "grad_norm": 0.800364971755668, "learning_rate": 2.367192060712201e-06, "loss": 0.7867, "step": 1622 }, { "epoch": 0.04738547779626872, "grad_norm": 0.8892950826758562, "learning_rate": 2.3686514886164626e-06, "loss": 0.764, "step": 1623 }, { "epoch": 0.04741467402411608, "grad_norm": 0.817560950801812, "learning_rate": 2.370110916520724e-06, "loss": 0.7659, "step": 1624 }, { "epoch": 0.04744387025196345, "grad_norm": 0.8380952970565269, "learning_rate": 2.371570344424986e-06, "loss": 0.7589, "step": 1625 }, { "epoch": 0.04747306647981081, "grad_norm": 1.2915478641161804, "learning_rate": 2.373029772329247e-06, "loss": 0.8622, "step": 1626 }, { "epoch": 0.04750226270765817, "grad_norm": 0.9160045294367058, "learning_rate": 2.374489200233509e-06, "loss": 0.9389, "step": 1627 }, { "epoch": 0.04753145893550553, "grad_norm": 0.9070375544714211, "learning_rate": 2.37594862813777e-06, "loss": 0.8573, "step": 1628 }, { "epoch": 0.047560655163352894, "grad_norm": 0.7793575287195033, "learning_rate": 2.3774080560420318e-06, "loss": 0.75, "step": 1629 }, { "epoch": 0.047589851391200255, "grad_norm": 0.8459502170206769, "learning_rate": 2.3788674839462935e-06, "loss": 0.827, "step": 1630 }, { "epoch": 0.047619047619047616, "grad_norm": 0.8107734036391253, "learning_rate": 2.3803269118505547e-06, "loss": 0.7667, "step": 1631 }, { "epoch": 0.047648243846894985, "grad_norm": 0.8434139749525277, "learning_rate": 2.3817863397548164e-06, "loss": 0.854, "step": 1632 }, { "epoch": 0.047677440074742346, "grad_norm": 0.7816416010412672, "learning_rate": 2.383245767659078e-06, "loss": 0.7637, "step": 1633 }, { "epoch": 0.04770663630258971, "grad_norm": 0.8805416896233841, "learning_rate": 2.3847051955633393e-06, "loss": 0.7511, "step": 1634 }, { "epoch": 0.04773583253043707, "grad_norm": 0.7975514159053215, "learning_rate": 2.386164623467601e-06, "loss": 0.779, "step": 1635 }, { "epoch": 0.04776502875828443, "grad_norm": 0.766963807518423, "learning_rate": 2.387624051371862e-06, "loss": 0.7013, "step": 1636 }, { "epoch": 0.04779422498613179, "grad_norm": 1.01284888091941, "learning_rate": 2.389083479276124e-06, "loss": 0.8762, "step": 1637 }, { "epoch": 0.04782342121397915, "grad_norm": 0.7903530256416756, "learning_rate": 2.3905429071803855e-06, "loss": 0.73, "step": 1638 }, { "epoch": 0.04785261744182651, "grad_norm": 0.7809955337512821, "learning_rate": 2.3920023350846468e-06, "loss": 0.7037, "step": 1639 }, { "epoch": 0.04788181366967388, "grad_norm": 0.827539574066132, "learning_rate": 2.3934617629889085e-06, "loss": 0.8722, "step": 1640 }, { "epoch": 0.04791100989752124, "grad_norm": 0.7525479977958011, "learning_rate": 2.39492119089317e-06, "loss": 0.6523, "step": 1641 }, { "epoch": 0.047940206125368603, "grad_norm": 0.9839970927450185, "learning_rate": 2.396380618797432e-06, "loss": 0.8216, "step": 1642 }, { "epoch": 0.047969402353215965, "grad_norm": 0.8638243612629068, "learning_rate": 2.397840046701693e-06, "loss": 0.7667, "step": 1643 }, { "epoch": 0.047998598581063326, "grad_norm": 0.7670907820097178, "learning_rate": 2.3992994746059547e-06, "loss": 0.7889, "step": 1644 }, { "epoch": 0.04802779480891069, "grad_norm": 0.7903302909296838, "learning_rate": 2.4007589025102164e-06, "loss": 0.7855, "step": 1645 }, { "epoch": 0.04805699103675805, "grad_norm": 0.9058825874013655, "learning_rate": 2.4022183304144776e-06, "loss": 0.8093, "step": 1646 }, { "epoch": 0.048086187264605416, "grad_norm": 0.7610678973955162, "learning_rate": 2.4036777583187393e-06, "loss": 0.7216, "step": 1647 }, { "epoch": 0.04811538349245278, "grad_norm": 0.829822454036901, "learning_rate": 2.405137186223001e-06, "loss": 0.8111, "step": 1648 }, { "epoch": 0.04814457972030014, "grad_norm": 0.9016509051325816, "learning_rate": 2.4065966141272622e-06, "loss": 0.884, "step": 1649 }, { "epoch": 0.0481737759481475, "grad_norm": 0.8228370552053701, "learning_rate": 2.408056042031524e-06, "loss": 0.8195, "step": 1650 }, { "epoch": 0.04820297217599486, "grad_norm": 0.8139719573861978, "learning_rate": 2.409515469935785e-06, "loss": 0.6716, "step": 1651 }, { "epoch": 0.04823216840384222, "grad_norm": 0.8255873650396072, "learning_rate": 2.410974897840047e-06, "loss": 0.8538, "step": 1652 }, { "epoch": 0.048261364631689584, "grad_norm": 0.7858376464782789, "learning_rate": 2.4124343257443085e-06, "loss": 0.6816, "step": 1653 }, { "epoch": 0.048290560859536945, "grad_norm": 0.7597039245635874, "learning_rate": 2.4138937536485697e-06, "loss": 0.6763, "step": 1654 }, { "epoch": 0.04831975708738431, "grad_norm": 0.9563673707782073, "learning_rate": 2.4153531815528314e-06, "loss": 0.8277, "step": 1655 }, { "epoch": 0.048348953315231674, "grad_norm": 0.8164788961763899, "learning_rate": 2.416812609457093e-06, "loss": 0.8712, "step": 1656 }, { "epoch": 0.048378149543079035, "grad_norm": 0.7992183104145102, "learning_rate": 2.4182720373613547e-06, "loss": 0.847, "step": 1657 }, { "epoch": 0.0484073457709264, "grad_norm": 0.7831120261949273, "learning_rate": 2.4197314652656164e-06, "loss": 0.7881, "step": 1658 }, { "epoch": 0.04843654199877376, "grad_norm": 0.7801673285447792, "learning_rate": 2.4211908931698776e-06, "loss": 0.6842, "step": 1659 }, { "epoch": 0.04846573822662112, "grad_norm": 0.7980150377963491, "learning_rate": 2.4226503210741393e-06, "loss": 0.8618, "step": 1660 }, { "epoch": 0.04849493445446848, "grad_norm": 0.6810330552356417, "learning_rate": 2.4241097489784006e-06, "loss": 0.5797, "step": 1661 }, { "epoch": 0.04852413068231584, "grad_norm": 0.779073767197933, "learning_rate": 2.4255691768826622e-06, "loss": 0.7731, "step": 1662 }, { "epoch": 0.04855332691016321, "grad_norm": 0.9411535052250359, "learning_rate": 2.427028604786924e-06, "loss": 0.7158, "step": 1663 }, { "epoch": 0.04858252313801057, "grad_norm": 0.7297854465394517, "learning_rate": 2.428488032691185e-06, "loss": 0.6449, "step": 1664 }, { "epoch": 0.04861171936585793, "grad_norm": 0.8384583498756368, "learning_rate": 2.429947460595447e-06, "loss": 0.8728, "step": 1665 }, { "epoch": 0.04864091559370529, "grad_norm": 0.8627850820431272, "learning_rate": 2.4314068884997085e-06, "loss": 0.8672, "step": 1666 }, { "epoch": 0.048670111821552654, "grad_norm": 0.7616224772037141, "learning_rate": 2.4328663164039697e-06, "loss": 0.652, "step": 1667 }, { "epoch": 0.048699308049400016, "grad_norm": 0.8498403205730495, "learning_rate": 2.4343257443082314e-06, "loss": 0.9069, "step": 1668 }, { "epoch": 0.04872850427724738, "grad_norm": 0.8851924005072678, "learning_rate": 2.4357851722124927e-06, "loss": 0.8281, "step": 1669 }, { "epoch": 0.048757700505094745, "grad_norm": 0.8126248679577276, "learning_rate": 2.4372446001167543e-06, "loss": 0.8335, "step": 1670 }, { "epoch": 0.048786896732942106, "grad_norm": 0.7750459039891272, "learning_rate": 2.438704028021016e-06, "loss": 0.7624, "step": 1671 }, { "epoch": 0.04881609296078947, "grad_norm": 1.1454336990391218, "learning_rate": 2.4401634559252772e-06, "loss": 0.7743, "step": 1672 }, { "epoch": 0.04884528918863683, "grad_norm": 1.07935938324088, "learning_rate": 2.4416228838295393e-06, "loss": 0.7235, "step": 1673 }, { "epoch": 0.04887448541648419, "grad_norm": 0.8361733436212617, "learning_rate": 2.4430823117338006e-06, "loss": 0.6568, "step": 1674 }, { "epoch": 0.04890368164433155, "grad_norm": 0.7350701069293033, "learning_rate": 2.4445417396380622e-06, "loss": 0.6447, "step": 1675 }, { "epoch": 0.04893287787217891, "grad_norm": 0.8430327049029525, "learning_rate": 2.4460011675423235e-06, "loss": 0.7904, "step": 1676 }, { "epoch": 0.04896207410002627, "grad_norm": 0.8804547372938428, "learning_rate": 2.447460595446585e-06, "loss": 0.9025, "step": 1677 }, { "epoch": 0.04899127032787364, "grad_norm": 0.8998916413406834, "learning_rate": 2.448920023350847e-06, "loss": 0.8675, "step": 1678 }, { "epoch": 0.049020466555721, "grad_norm": 1.2745994105239518, "learning_rate": 2.450379451255108e-06, "loss": 0.7358, "step": 1679 }, { "epoch": 0.049049662783568364, "grad_norm": 0.8727509390980269, "learning_rate": 2.4518388791593698e-06, "loss": 0.837, "step": 1680 }, { "epoch": 0.049078859011415725, "grad_norm": 0.8067431442188316, "learning_rate": 2.4532983070636314e-06, "loss": 0.8057, "step": 1681 }, { "epoch": 0.049108055239263086, "grad_norm": 1.0858603646786382, "learning_rate": 2.4547577349678927e-06, "loss": 0.7968, "step": 1682 }, { "epoch": 0.04913725146711045, "grad_norm": 0.7657488249468944, "learning_rate": 2.4562171628721543e-06, "loss": 0.7678, "step": 1683 }, { "epoch": 0.04916644769495781, "grad_norm": 0.7558490468844309, "learning_rate": 2.4576765907764156e-06, "loss": 0.7481, "step": 1684 }, { "epoch": 0.04919564392280518, "grad_norm": 1.0459224082468321, "learning_rate": 2.4591360186806773e-06, "loss": 0.7014, "step": 1685 }, { "epoch": 0.04922484015065254, "grad_norm": 0.7780495889885977, "learning_rate": 2.460595446584939e-06, "loss": 0.671, "step": 1686 }, { "epoch": 0.0492540363784999, "grad_norm": 0.8075201183304416, "learning_rate": 2.4620548744892e-06, "loss": 0.8406, "step": 1687 }, { "epoch": 0.04928323260634726, "grad_norm": 0.8110324039387014, "learning_rate": 2.463514302393462e-06, "loss": 0.798, "step": 1688 }, { "epoch": 0.04931242883419462, "grad_norm": 0.7297370911622282, "learning_rate": 2.4649737302977235e-06, "loss": 0.6098, "step": 1689 }, { "epoch": 0.04934162506204198, "grad_norm": 0.8006597539759105, "learning_rate": 2.466433158201985e-06, "loss": 0.7222, "step": 1690 }, { "epoch": 0.049370821289889344, "grad_norm": 0.7929475145298838, "learning_rate": 2.467892586106247e-06, "loss": 0.7852, "step": 1691 }, { "epoch": 0.049400017517736705, "grad_norm": 0.8601004100137685, "learning_rate": 2.469352014010508e-06, "loss": 0.8044, "step": 1692 }, { "epoch": 0.04942921374558407, "grad_norm": 1.053057931034883, "learning_rate": 2.4708114419147698e-06, "loss": 0.8695, "step": 1693 }, { "epoch": 0.049458409973431434, "grad_norm": 0.8141996686769919, "learning_rate": 2.472270869819031e-06, "loss": 0.7682, "step": 1694 }, { "epoch": 0.049487606201278796, "grad_norm": 0.7945690582512798, "learning_rate": 2.4737302977232927e-06, "loss": 0.7283, "step": 1695 }, { "epoch": 0.04951680242912616, "grad_norm": 0.7831440669139279, "learning_rate": 2.4751897256275544e-06, "loss": 0.7569, "step": 1696 }, { "epoch": 0.04954599865697352, "grad_norm": 0.8475592581247732, "learning_rate": 2.4766491535318156e-06, "loss": 0.7664, "step": 1697 }, { "epoch": 0.04957519488482088, "grad_norm": 0.8258601155174788, "learning_rate": 2.4781085814360773e-06, "loss": 0.7957, "step": 1698 }, { "epoch": 0.04960439111266824, "grad_norm": 0.8953400593031653, "learning_rate": 2.479568009340339e-06, "loss": 0.8141, "step": 1699 }, { "epoch": 0.04963358734051561, "grad_norm": 0.7547712304840809, "learning_rate": 2.4810274372446e-06, "loss": 0.73, "step": 1700 }, { "epoch": 0.04966278356836297, "grad_norm": 0.88609069259447, "learning_rate": 2.482486865148862e-06, "loss": 0.8998, "step": 1701 }, { "epoch": 0.04969197979621033, "grad_norm": 0.9840261510250933, "learning_rate": 2.483946293053123e-06, "loss": 0.7417, "step": 1702 }, { "epoch": 0.04972117602405769, "grad_norm": 0.793567794427986, "learning_rate": 2.4854057209573848e-06, "loss": 0.7369, "step": 1703 }, { "epoch": 0.04975037225190505, "grad_norm": 0.7685583998959375, "learning_rate": 2.4868651488616464e-06, "loss": 0.7054, "step": 1704 }, { "epoch": 0.049779568479752415, "grad_norm": 0.9731863099229853, "learning_rate": 2.488324576765908e-06, "loss": 0.7118, "step": 1705 }, { "epoch": 0.049808764707599776, "grad_norm": 0.7810183634275882, "learning_rate": 2.4897840046701698e-06, "loss": 0.8009, "step": 1706 }, { "epoch": 0.04983796093544714, "grad_norm": 0.7679990645023899, "learning_rate": 2.491243432574431e-06, "loss": 0.738, "step": 1707 }, { "epoch": 0.049867157163294505, "grad_norm": 0.8937363552322706, "learning_rate": 2.4927028604786927e-06, "loss": 0.6626, "step": 1708 }, { "epoch": 0.049896353391141866, "grad_norm": 0.8923392600944491, "learning_rate": 2.494162288382954e-06, "loss": 0.7984, "step": 1709 }, { "epoch": 0.04992554961898923, "grad_norm": 0.9033617761043724, "learning_rate": 2.4956217162872156e-06, "loss": 0.8251, "step": 1710 }, { "epoch": 0.04995474584683659, "grad_norm": 0.8496882485289664, "learning_rate": 2.4970811441914773e-06, "loss": 0.8744, "step": 1711 }, { "epoch": 0.04998394207468395, "grad_norm": 0.853504195936712, "learning_rate": 2.4985405720957385e-06, "loss": 0.8636, "step": 1712 }, { "epoch": 0.05001313830253131, "grad_norm": 0.8111009207661289, "learning_rate": 2.5e-06, "loss": 0.7805, "step": 1713 }, { "epoch": 0.05004233453037867, "grad_norm": 0.8466914195790723, "learning_rate": 2.501459427904262e-06, "loss": 0.8589, "step": 1714 }, { "epoch": 0.05007153075822604, "grad_norm": 0.7972429066930327, "learning_rate": 2.502918855808523e-06, "loss": 0.705, "step": 1715 }, { "epoch": 0.0501007269860734, "grad_norm": 0.8472344043792727, "learning_rate": 2.5043782837127852e-06, "loss": 0.8064, "step": 1716 }, { "epoch": 0.05012992321392076, "grad_norm": 0.796998524308374, "learning_rate": 2.5058377116170465e-06, "loss": 0.8163, "step": 1717 }, { "epoch": 0.050159119441768124, "grad_norm": 0.849477311392534, "learning_rate": 2.507297139521308e-06, "loss": 0.7653, "step": 1718 }, { "epoch": 0.050188315669615485, "grad_norm": 0.8014497421551572, "learning_rate": 2.5087565674255694e-06, "loss": 0.6876, "step": 1719 }, { "epoch": 0.050217511897462846, "grad_norm": 0.9268789317172622, "learning_rate": 2.510215995329831e-06, "loss": 0.8576, "step": 1720 }, { "epoch": 0.05024670812531021, "grad_norm": 0.7688415863170461, "learning_rate": 2.5116754232340923e-06, "loss": 0.7151, "step": 1721 }, { "epoch": 0.05027590435315757, "grad_norm": 0.7433363983429402, "learning_rate": 2.513134851138354e-06, "loss": 0.6478, "step": 1722 }, { "epoch": 0.05030510058100494, "grad_norm": 0.7883259451027239, "learning_rate": 2.514594279042615e-06, "loss": 0.672, "step": 1723 }, { "epoch": 0.0503342968088523, "grad_norm": 0.8238207556182752, "learning_rate": 2.5160537069468773e-06, "loss": 0.7978, "step": 1724 }, { "epoch": 0.05036349303669966, "grad_norm": 0.9936387100192894, "learning_rate": 2.5175131348511386e-06, "loss": 0.7925, "step": 1725 }, { "epoch": 0.05039268926454702, "grad_norm": 0.7878684559436608, "learning_rate": 2.5189725627554002e-06, "loss": 0.7481, "step": 1726 }, { "epoch": 0.05042188549239438, "grad_norm": 0.9028315360885133, "learning_rate": 2.5204319906596615e-06, "loss": 0.8719, "step": 1727 }, { "epoch": 0.05045108172024174, "grad_norm": 0.8590179732120612, "learning_rate": 2.521891418563923e-06, "loss": 0.7346, "step": 1728 }, { "epoch": 0.050480277948089104, "grad_norm": 0.9735982508454125, "learning_rate": 2.5233508464681844e-06, "loss": 0.7588, "step": 1729 }, { "epoch": 0.05050947417593647, "grad_norm": 0.7853537491725768, "learning_rate": 2.524810274372446e-06, "loss": 0.7466, "step": 1730 }, { "epoch": 0.050538670403783834, "grad_norm": 0.9418798822997108, "learning_rate": 2.526269702276708e-06, "loss": 0.8192, "step": 1731 }, { "epoch": 0.050567866631631195, "grad_norm": 0.7958181535356869, "learning_rate": 2.5277291301809694e-06, "loss": 0.6823, "step": 1732 }, { "epoch": 0.050597062859478556, "grad_norm": 0.760561731446286, "learning_rate": 2.529188558085231e-06, "loss": 0.7391, "step": 1733 }, { "epoch": 0.05062625908732592, "grad_norm": 0.9887785763212849, "learning_rate": 2.5306479859894923e-06, "loss": 0.7997, "step": 1734 }, { "epoch": 0.05065545531517328, "grad_norm": 0.8498660976401506, "learning_rate": 2.532107413893754e-06, "loss": 0.7129, "step": 1735 }, { "epoch": 0.05068465154302064, "grad_norm": 0.7608117150149091, "learning_rate": 2.5335668417980152e-06, "loss": 0.7153, "step": 1736 }, { "epoch": 0.050713847770868, "grad_norm": 0.7845440244178542, "learning_rate": 2.535026269702277e-06, "loss": 0.7457, "step": 1737 }, { "epoch": 0.05074304399871537, "grad_norm": 1.0094531689017159, "learning_rate": 2.536485697606538e-06, "loss": 0.8409, "step": 1738 }, { "epoch": 0.05077224022656273, "grad_norm": 0.9582679403389165, "learning_rate": 2.5379451255108002e-06, "loss": 0.9769, "step": 1739 }, { "epoch": 0.05080143645441009, "grad_norm": 0.7429266986930784, "learning_rate": 2.5394045534150615e-06, "loss": 0.7321, "step": 1740 }, { "epoch": 0.05083063268225745, "grad_norm": 0.9071665385553965, "learning_rate": 2.540863981319323e-06, "loss": 0.8511, "step": 1741 }, { "epoch": 0.050859828910104814, "grad_norm": 0.8601975234575754, "learning_rate": 2.5423234092235844e-06, "loss": 0.6692, "step": 1742 }, { "epoch": 0.050889025137952175, "grad_norm": 0.8122699272314521, "learning_rate": 2.543782837127846e-06, "loss": 0.7132, "step": 1743 }, { "epoch": 0.050918221365799536, "grad_norm": 0.8248355876850805, "learning_rate": 2.5452422650321073e-06, "loss": 0.8359, "step": 1744 }, { "epoch": 0.050947417593646904, "grad_norm": 0.8324635886387317, "learning_rate": 2.546701692936369e-06, "loss": 0.7954, "step": 1745 }, { "epoch": 0.050976613821494265, "grad_norm": 0.8426920598347843, "learning_rate": 2.548161120840631e-06, "loss": 0.8708, "step": 1746 }, { "epoch": 0.05100581004934163, "grad_norm": 0.8105137114657247, "learning_rate": 2.5496205487448923e-06, "loss": 0.807, "step": 1747 }, { "epoch": 0.05103500627718899, "grad_norm": 0.7471657182298767, "learning_rate": 2.551079976649154e-06, "loss": 0.73, "step": 1748 }, { "epoch": 0.05106420250503635, "grad_norm": 0.8495641805848742, "learning_rate": 2.5525394045534152e-06, "loss": 0.7296, "step": 1749 }, { "epoch": 0.05109339873288371, "grad_norm": 0.8765940473857066, "learning_rate": 2.553998832457677e-06, "loss": 0.6643, "step": 1750 }, { "epoch": 0.05112259496073107, "grad_norm": 0.8259884717666429, "learning_rate": 2.555458260361938e-06, "loss": 0.7657, "step": 1751 }, { "epoch": 0.05115179118857843, "grad_norm": 0.9021457503052644, "learning_rate": 2.5569176882662e-06, "loss": 0.8023, "step": 1752 }, { "epoch": 0.0511809874164258, "grad_norm": 0.8586493594496422, "learning_rate": 2.558377116170461e-06, "loss": 0.7875, "step": 1753 }, { "epoch": 0.05121018364427316, "grad_norm": 0.785612043936072, "learning_rate": 2.559836544074723e-06, "loss": 0.7927, "step": 1754 }, { "epoch": 0.05123937987212052, "grad_norm": 0.8115751160881063, "learning_rate": 2.5612959719789844e-06, "loss": 0.7307, "step": 1755 }, { "epoch": 0.051268576099967884, "grad_norm": 0.7558524846089982, "learning_rate": 2.562755399883246e-06, "loss": 0.7597, "step": 1756 }, { "epoch": 0.051297772327815246, "grad_norm": 1.0290449724737156, "learning_rate": 2.5642148277875073e-06, "loss": 0.8259, "step": 1757 }, { "epoch": 0.05132696855566261, "grad_norm": 0.8806901691226876, "learning_rate": 2.565674255691769e-06, "loss": 0.8425, "step": 1758 }, { "epoch": 0.05135616478350997, "grad_norm": 0.8351298175961597, "learning_rate": 2.5671336835960302e-06, "loss": 0.7804, "step": 1759 }, { "epoch": 0.05138536101135733, "grad_norm": 0.8019514004433066, "learning_rate": 2.568593111500292e-06, "loss": 0.7218, "step": 1760 }, { "epoch": 0.0514145572392047, "grad_norm": 0.9264062194344755, "learning_rate": 2.570052539404553e-06, "loss": 0.7327, "step": 1761 }, { "epoch": 0.05144375346705206, "grad_norm": 0.9686781640419231, "learning_rate": 2.5715119673088153e-06, "loss": 0.8578, "step": 1762 }, { "epoch": 0.05147294969489942, "grad_norm": 0.8073479313067485, "learning_rate": 2.572971395213077e-06, "loss": 0.8023, "step": 1763 }, { "epoch": 0.05150214592274678, "grad_norm": 0.9492890534113103, "learning_rate": 2.574430823117338e-06, "loss": 0.7949, "step": 1764 }, { "epoch": 0.05153134215059414, "grad_norm": 0.7798555012462685, "learning_rate": 2.5758902510216e-06, "loss": 0.766, "step": 1765 }, { "epoch": 0.0515605383784415, "grad_norm": 0.9778317982358146, "learning_rate": 2.577349678925861e-06, "loss": 0.7606, "step": 1766 }, { "epoch": 0.051589734606288865, "grad_norm": 0.9203830640997447, "learning_rate": 2.578809106830123e-06, "loss": 0.8203, "step": 1767 }, { "epoch": 0.05161893083413623, "grad_norm": 0.7660732165998605, "learning_rate": 2.580268534734384e-06, "loss": 0.75, "step": 1768 }, { "epoch": 0.051648127061983594, "grad_norm": 0.911496209502028, "learning_rate": 2.581727962638646e-06, "loss": 0.9061, "step": 1769 }, { "epoch": 0.051677323289830955, "grad_norm": 0.8763118039456214, "learning_rate": 2.5831873905429073e-06, "loss": 0.7959, "step": 1770 }, { "epoch": 0.051706519517678316, "grad_norm": 0.8347126942523014, "learning_rate": 2.584646818447169e-06, "loss": 0.8487, "step": 1771 }, { "epoch": 0.05173571574552568, "grad_norm": 0.8022981052790593, "learning_rate": 2.5861062463514303e-06, "loss": 0.7682, "step": 1772 }, { "epoch": 0.05176491197337304, "grad_norm": 1.6415578445668297, "learning_rate": 2.587565674255692e-06, "loss": 0.7989, "step": 1773 }, { "epoch": 0.0517941082012204, "grad_norm": 0.8235357103693671, "learning_rate": 2.589025102159953e-06, "loss": 0.7581, "step": 1774 }, { "epoch": 0.05182330442906776, "grad_norm": 0.8022312627606722, "learning_rate": 2.5904845300642153e-06, "loss": 0.6937, "step": 1775 }, { "epoch": 0.05185250065691513, "grad_norm": 0.8034426973580399, "learning_rate": 2.591943957968476e-06, "loss": 0.7188, "step": 1776 }, { "epoch": 0.05188169688476249, "grad_norm": 0.8170133158530625, "learning_rate": 2.593403385872738e-06, "loss": 0.7261, "step": 1777 }, { "epoch": 0.05191089311260985, "grad_norm": 1.2586159938092938, "learning_rate": 2.594862813777e-06, "loss": 0.7591, "step": 1778 }, { "epoch": 0.05194008934045721, "grad_norm": 0.9012401870211323, "learning_rate": 2.596322241681261e-06, "loss": 0.8272, "step": 1779 }, { "epoch": 0.051969285568304574, "grad_norm": 0.904339893780054, "learning_rate": 2.5977816695855228e-06, "loss": 0.8112, "step": 1780 }, { "epoch": 0.051998481796151935, "grad_norm": 1.0122835331228357, "learning_rate": 2.599241097489784e-06, "loss": 0.8079, "step": 1781 }, { "epoch": 0.052027678023999296, "grad_norm": 0.7931803263561911, "learning_rate": 2.600700525394046e-06, "loss": 0.8115, "step": 1782 }, { "epoch": 0.052056874251846665, "grad_norm": 0.8382222266132798, "learning_rate": 2.6021599532983074e-06, "loss": 0.8029, "step": 1783 }, { "epoch": 0.052086070479694026, "grad_norm": 0.9095606205302098, "learning_rate": 2.603619381202569e-06, "loss": 0.8287, "step": 1784 }, { "epoch": 0.05211526670754139, "grad_norm": 0.8125419419074631, "learning_rate": 2.6050788091068303e-06, "loss": 0.8105, "step": 1785 }, { "epoch": 0.05214446293538875, "grad_norm": 1.1331145866820125, "learning_rate": 2.606538237011092e-06, "loss": 0.85, "step": 1786 }, { "epoch": 0.05217365916323611, "grad_norm": 0.83896004391911, "learning_rate": 2.607997664915353e-06, "loss": 0.7871, "step": 1787 }, { "epoch": 0.05220285539108347, "grad_norm": 0.9018597306321939, "learning_rate": 2.609457092819615e-06, "loss": 0.8159, "step": 1788 }, { "epoch": 0.05223205161893083, "grad_norm": 0.7980743118118618, "learning_rate": 2.610916520723876e-06, "loss": 0.7586, "step": 1789 }, { "epoch": 0.05226124784677819, "grad_norm": 0.9054210126684006, "learning_rate": 2.612375948628138e-06, "loss": 0.8274, "step": 1790 }, { "epoch": 0.05229044407462556, "grad_norm": 0.9701231647783918, "learning_rate": 2.6138353765323995e-06, "loss": 0.8421, "step": 1791 }, { "epoch": 0.05231964030247292, "grad_norm": 0.8593210231267849, "learning_rate": 2.615294804436661e-06, "loss": 0.7873, "step": 1792 }, { "epoch": 0.052348836530320283, "grad_norm": 0.8004588745875775, "learning_rate": 2.6167542323409224e-06, "loss": 0.6901, "step": 1793 }, { "epoch": 0.052378032758167645, "grad_norm": 0.8267267409880866, "learning_rate": 2.618213660245184e-06, "loss": 0.7962, "step": 1794 }, { "epoch": 0.052407228986015006, "grad_norm": 0.9880228468725021, "learning_rate": 2.6196730881494457e-06, "loss": 0.7419, "step": 1795 }, { "epoch": 0.05243642521386237, "grad_norm": 0.7690314633558495, "learning_rate": 2.621132516053707e-06, "loss": 0.7005, "step": 1796 }, { "epoch": 0.05246562144170973, "grad_norm": 0.826341461179794, "learning_rate": 2.622591943957969e-06, "loss": 0.8134, "step": 1797 }, { "epoch": 0.052494817669557096, "grad_norm": 0.8488035270810999, "learning_rate": 2.6240513718622303e-06, "loss": 0.7957, "step": 1798 }, { "epoch": 0.05252401389740446, "grad_norm": 0.8861466788412817, "learning_rate": 2.625510799766492e-06, "loss": 0.7556, "step": 1799 }, { "epoch": 0.05255321012525182, "grad_norm": 0.7529645329008596, "learning_rate": 2.6269702276707532e-06, "loss": 0.7503, "step": 1800 }, { "epoch": 0.05258240635309918, "grad_norm": 0.8251299445111152, "learning_rate": 2.628429655575015e-06, "loss": 0.8138, "step": 1801 }, { "epoch": 0.05261160258094654, "grad_norm": 1.0719594117889013, "learning_rate": 2.629889083479276e-06, "loss": 0.7634, "step": 1802 }, { "epoch": 0.0526407988087939, "grad_norm": 0.7992636135465568, "learning_rate": 2.631348511383538e-06, "loss": 0.7886, "step": 1803 }, { "epoch": 0.052669995036641264, "grad_norm": 0.7639919793655227, "learning_rate": 2.632807939287799e-06, "loss": 0.7623, "step": 1804 }, { "epoch": 0.052699191264488625, "grad_norm": 0.7579902974580345, "learning_rate": 2.634267367192061e-06, "loss": 0.7054, "step": 1805 }, { "epoch": 0.05272838749233599, "grad_norm": 1.023574174009989, "learning_rate": 2.6357267950963224e-06, "loss": 0.7826, "step": 1806 }, { "epoch": 0.052757583720183354, "grad_norm": 0.8649273658176375, "learning_rate": 2.637186223000584e-06, "loss": 0.8054, "step": 1807 }, { "epoch": 0.052786779948030715, "grad_norm": 0.7945180487884042, "learning_rate": 2.6386456509048453e-06, "loss": 0.7071, "step": 1808 }, { "epoch": 0.05281597617587808, "grad_norm": 0.8253306581719134, "learning_rate": 2.640105078809107e-06, "loss": 0.8178, "step": 1809 }, { "epoch": 0.05284517240372544, "grad_norm": 1.1176354684439231, "learning_rate": 2.6415645067133686e-06, "loss": 0.682, "step": 1810 }, { "epoch": 0.0528743686315728, "grad_norm": 0.8231319996468213, "learning_rate": 2.64302393461763e-06, "loss": 0.8165, "step": 1811 }, { "epoch": 0.05290356485942016, "grad_norm": 0.9292749954175503, "learning_rate": 2.644483362521892e-06, "loss": 0.8817, "step": 1812 }, { "epoch": 0.05293276108726753, "grad_norm": 1.0447836853165173, "learning_rate": 2.6459427904261532e-06, "loss": 0.8263, "step": 1813 }, { "epoch": 0.05296195731511489, "grad_norm": 0.784822207947592, "learning_rate": 2.647402218330415e-06, "loss": 0.697, "step": 1814 }, { "epoch": 0.05299115354296225, "grad_norm": 0.8094602229224378, "learning_rate": 2.648861646234676e-06, "loss": 0.8044, "step": 1815 }, { "epoch": 0.05302034977080961, "grad_norm": 0.8755609106494119, "learning_rate": 2.650321074138938e-06, "loss": 0.8575, "step": 1816 }, { "epoch": 0.05304954599865697, "grad_norm": 0.748482542847554, "learning_rate": 2.651780502043199e-06, "loss": 0.6859, "step": 1817 }, { "epoch": 0.053078742226504334, "grad_norm": 0.8759003228732141, "learning_rate": 2.6532399299474607e-06, "loss": 0.702, "step": 1818 }, { "epoch": 0.053107938454351696, "grad_norm": 0.9527010772216414, "learning_rate": 2.654699357851722e-06, "loss": 0.8126, "step": 1819 }, { "epoch": 0.05313713468219906, "grad_norm": 0.8024367045158142, "learning_rate": 2.656158785755984e-06, "loss": 0.8276, "step": 1820 }, { "epoch": 0.053166330910046425, "grad_norm": 0.7317534735204877, "learning_rate": 2.6576182136602453e-06, "loss": 0.6379, "step": 1821 }, { "epoch": 0.053195527137893786, "grad_norm": 0.8769452797659227, "learning_rate": 2.659077641564507e-06, "loss": 0.7334, "step": 1822 }, { "epoch": 0.05322472336574115, "grad_norm": 0.8317583394282403, "learning_rate": 2.6605370694687682e-06, "loss": 0.8505, "step": 1823 }, { "epoch": 0.05325391959358851, "grad_norm": 0.8545879643587481, "learning_rate": 2.66199649737303e-06, "loss": 0.8361, "step": 1824 }, { "epoch": 0.05328311582143587, "grad_norm": 0.7917106918300696, "learning_rate": 2.663455925277291e-06, "loss": 0.752, "step": 1825 }, { "epoch": 0.05331231204928323, "grad_norm": 0.7817694242938442, "learning_rate": 2.664915353181553e-06, "loss": 0.7937, "step": 1826 }, { "epoch": 0.05334150827713059, "grad_norm": 0.78376157829207, "learning_rate": 2.666374781085815e-06, "loss": 0.7285, "step": 1827 }, { "epoch": 0.05337070450497796, "grad_norm": 0.8789689745948633, "learning_rate": 2.667834208990076e-06, "loss": 0.8274, "step": 1828 }, { "epoch": 0.05339990073282532, "grad_norm": 0.7491248702089838, "learning_rate": 2.669293636894338e-06, "loss": 0.6897, "step": 1829 }, { "epoch": 0.05342909696067268, "grad_norm": 0.85318690069982, "learning_rate": 2.670753064798599e-06, "loss": 0.7884, "step": 1830 }, { "epoch": 0.053458293188520044, "grad_norm": 0.7584018638387956, "learning_rate": 2.6722124927028608e-06, "loss": 0.7281, "step": 1831 }, { "epoch": 0.053487489416367405, "grad_norm": 0.8679172688566896, "learning_rate": 2.673671920607122e-06, "loss": 0.6538, "step": 1832 }, { "epoch": 0.053516685644214766, "grad_norm": 1.046399812681977, "learning_rate": 2.675131348511384e-06, "loss": 0.7483, "step": 1833 }, { "epoch": 0.05354588187206213, "grad_norm": 0.8528504440286474, "learning_rate": 2.6765907764156453e-06, "loss": 0.7798, "step": 1834 }, { "epoch": 0.05357507809990949, "grad_norm": 0.7522512163327865, "learning_rate": 2.678050204319907e-06, "loss": 0.7226, "step": 1835 }, { "epoch": 0.05360427432775686, "grad_norm": 0.8734713874815414, "learning_rate": 2.6795096322241683e-06, "loss": 0.9251, "step": 1836 }, { "epoch": 0.05363347055560422, "grad_norm": 0.8134478783879171, "learning_rate": 2.68096906012843e-06, "loss": 0.6991, "step": 1837 }, { "epoch": 0.05366266678345158, "grad_norm": 0.7422291286653814, "learning_rate": 2.682428488032691e-06, "loss": 0.7106, "step": 1838 }, { "epoch": 0.05369186301129894, "grad_norm": 0.8111864745245834, "learning_rate": 2.683887915936953e-06, "loss": 0.7631, "step": 1839 }, { "epoch": 0.0537210592391463, "grad_norm": 1.2802464081402105, "learning_rate": 2.685347343841214e-06, "loss": 0.7835, "step": 1840 }, { "epoch": 0.05375025546699366, "grad_norm": 0.8027860765131236, "learning_rate": 2.686806771745476e-06, "loss": 0.7225, "step": 1841 }, { "epoch": 0.053779451694841024, "grad_norm": 0.7378304645725808, "learning_rate": 2.688266199649738e-06, "loss": 0.6838, "step": 1842 }, { "epoch": 0.05380864792268839, "grad_norm": 0.856018841107494, "learning_rate": 2.689725627553999e-06, "loss": 0.8509, "step": 1843 }, { "epoch": 0.05383784415053575, "grad_norm": 0.7481164708879693, "learning_rate": 2.6911850554582608e-06, "loss": 0.7522, "step": 1844 }, { "epoch": 0.053867040378383114, "grad_norm": 0.9810586341254888, "learning_rate": 2.692644483362522e-06, "loss": 0.8215, "step": 1845 }, { "epoch": 0.053896236606230476, "grad_norm": 0.7798203477025074, "learning_rate": 2.6941039112667837e-06, "loss": 0.7246, "step": 1846 }, { "epoch": 0.05392543283407784, "grad_norm": 1.0904576410298774, "learning_rate": 2.695563339171045e-06, "loss": 0.8654, "step": 1847 }, { "epoch": 0.0539546290619252, "grad_norm": 0.7723316389544542, "learning_rate": 2.697022767075307e-06, "loss": 0.7172, "step": 1848 }, { "epoch": 0.05398382528977256, "grad_norm": 1.0381084427935494, "learning_rate": 2.6984821949795683e-06, "loss": 0.8064, "step": 1849 }, { "epoch": 0.05401302151761992, "grad_norm": 0.8509300427963394, "learning_rate": 2.69994162288383e-06, "loss": 0.8029, "step": 1850 }, { "epoch": 0.05404221774546729, "grad_norm": 0.8231881385659457, "learning_rate": 2.701401050788091e-06, "loss": 0.8216, "step": 1851 }, { "epoch": 0.05407141397331465, "grad_norm": 0.8389125924082185, "learning_rate": 2.702860478692353e-06, "loss": 0.7432, "step": 1852 }, { "epoch": 0.05410061020116201, "grad_norm": 0.8489273437765283, "learning_rate": 2.704319906596614e-06, "loss": 0.8066, "step": 1853 }, { "epoch": 0.05412980642900937, "grad_norm": 0.8097396277801991, "learning_rate": 2.7057793345008758e-06, "loss": 0.7379, "step": 1854 }, { "epoch": 0.05415900265685673, "grad_norm": 0.778887545644779, "learning_rate": 2.707238762405137e-06, "loss": 0.7077, "step": 1855 }, { "epoch": 0.054188198884704095, "grad_norm": 0.7378042538622192, "learning_rate": 2.708698190309399e-06, "loss": 0.6739, "step": 1856 }, { "epoch": 0.054217395112551456, "grad_norm": 0.8519845957254342, "learning_rate": 2.7101576182136604e-06, "loss": 0.8416, "step": 1857 }, { "epoch": 0.05424659134039882, "grad_norm": 0.8194287063637561, "learning_rate": 2.711617046117922e-06, "loss": 0.742, "step": 1858 }, { "epoch": 0.054275787568246185, "grad_norm": 1.0130899626716356, "learning_rate": 2.7130764740221837e-06, "loss": 0.8381, "step": 1859 }, { "epoch": 0.054304983796093546, "grad_norm": 0.837485198789628, "learning_rate": 2.714535901926445e-06, "loss": 0.8194, "step": 1860 }, { "epoch": 0.05433418002394091, "grad_norm": 0.7617803762359343, "learning_rate": 2.7159953298307066e-06, "loss": 0.6684, "step": 1861 }, { "epoch": 0.05436337625178827, "grad_norm": 0.7696573641531324, "learning_rate": 2.717454757734968e-06, "loss": 0.7721, "step": 1862 }, { "epoch": 0.05439257247963563, "grad_norm": 0.9140111192376864, "learning_rate": 2.71891418563923e-06, "loss": 0.7855, "step": 1863 }, { "epoch": 0.05442176870748299, "grad_norm": 0.7462430057586422, "learning_rate": 2.720373613543491e-06, "loss": 0.7453, "step": 1864 }, { "epoch": 0.05445096493533035, "grad_norm": 0.8402786390238391, "learning_rate": 2.721833041447753e-06, "loss": 0.7884, "step": 1865 }, { "epoch": 0.05448016116317772, "grad_norm": 1.0159070638884917, "learning_rate": 2.723292469352014e-06, "loss": 0.7834, "step": 1866 }, { "epoch": 0.05450935739102508, "grad_norm": 0.9547746100346852, "learning_rate": 2.724751897256276e-06, "loss": 0.8505, "step": 1867 }, { "epoch": 0.05453855361887244, "grad_norm": 0.8041583298679031, "learning_rate": 2.726211325160537e-06, "loss": 0.8019, "step": 1868 }, { "epoch": 0.054567749846719804, "grad_norm": 0.8243769227851634, "learning_rate": 2.7276707530647987e-06, "loss": 0.8374, "step": 1869 }, { "epoch": 0.054596946074567165, "grad_norm": 0.7564079490670351, "learning_rate": 2.72913018096906e-06, "loss": 0.6844, "step": 1870 }, { "epoch": 0.054626142302414527, "grad_norm": 0.7929944519089054, "learning_rate": 2.730589608873322e-06, "loss": 0.7834, "step": 1871 }, { "epoch": 0.05465533853026189, "grad_norm": 0.8769894272498352, "learning_rate": 2.7320490367775833e-06, "loss": 0.8231, "step": 1872 }, { "epoch": 0.05468453475810925, "grad_norm": 0.8227209150245748, "learning_rate": 2.733508464681845e-06, "loss": 0.8327, "step": 1873 }, { "epoch": 0.05471373098595662, "grad_norm": 0.8051023312799337, "learning_rate": 2.7349678925861066e-06, "loss": 0.7723, "step": 1874 }, { "epoch": 0.05474292721380398, "grad_norm": 0.7474723456041106, "learning_rate": 2.736427320490368e-06, "loss": 0.6516, "step": 1875 }, { "epoch": 0.05477212344165134, "grad_norm": 0.7853677508184553, "learning_rate": 2.7378867483946295e-06, "loss": 0.7612, "step": 1876 }, { "epoch": 0.0548013196694987, "grad_norm": 0.8133536609762984, "learning_rate": 2.739346176298891e-06, "loss": 0.7897, "step": 1877 }, { "epoch": 0.05483051589734606, "grad_norm": 0.8017639802115504, "learning_rate": 2.740805604203153e-06, "loss": 0.8093, "step": 1878 }, { "epoch": 0.05485971212519342, "grad_norm": 0.8522559551274099, "learning_rate": 2.742265032107414e-06, "loss": 0.781, "step": 1879 }, { "epoch": 0.054888908353040784, "grad_norm": 0.8550644747600198, "learning_rate": 2.743724460011676e-06, "loss": 0.8271, "step": 1880 }, { "epoch": 0.05491810458088815, "grad_norm": 0.997617195822203, "learning_rate": 2.745183887915937e-06, "loss": 0.7527, "step": 1881 }, { "epoch": 0.054947300808735514, "grad_norm": 0.8027037877799749, "learning_rate": 2.7466433158201987e-06, "loss": 0.8379, "step": 1882 }, { "epoch": 0.054976497036582875, "grad_norm": 0.807158246156738, "learning_rate": 2.74810274372446e-06, "loss": 0.8476, "step": 1883 }, { "epoch": 0.055005693264430236, "grad_norm": 0.9152420445947418, "learning_rate": 2.749562171628722e-06, "loss": 0.765, "step": 1884 }, { "epoch": 0.0550348894922776, "grad_norm": 0.7889908868294848, "learning_rate": 2.751021599532983e-06, "loss": 0.7171, "step": 1885 }, { "epoch": 0.05506408572012496, "grad_norm": 0.7927469011805954, "learning_rate": 2.752481027437245e-06, "loss": 0.7687, "step": 1886 }, { "epoch": 0.05509328194797232, "grad_norm": 0.796328687009281, "learning_rate": 2.7539404553415062e-06, "loss": 0.7976, "step": 1887 }, { "epoch": 0.05512247817581968, "grad_norm": 0.8249110630626663, "learning_rate": 2.755399883245768e-06, "loss": 0.811, "step": 1888 }, { "epoch": 0.05515167440366705, "grad_norm": 0.8741951184122141, "learning_rate": 2.756859311150029e-06, "loss": 0.8244, "step": 1889 }, { "epoch": 0.05518087063151441, "grad_norm": 0.8423677869089552, "learning_rate": 2.758318739054291e-06, "loss": 0.7121, "step": 1890 }, { "epoch": 0.05521006685936177, "grad_norm": 0.8618118445564771, "learning_rate": 2.759778166958553e-06, "loss": 0.7218, "step": 1891 }, { "epoch": 0.05523926308720913, "grad_norm": 0.7679882440776169, "learning_rate": 2.761237594862814e-06, "loss": 0.7231, "step": 1892 }, { "epoch": 0.055268459315056494, "grad_norm": 0.8482398018141336, "learning_rate": 2.762697022767076e-06, "loss": 0.7502, "step": 1893 }, { "epoch": 0.055297655542903855, "grad_norm": 0.8925999889621473, "learning_rate": 2.764156450671337e-06, "loss": 0.7789, "step": 1894 }, { "epoch": 0.055326851770751216, "grad_norm": 0.7201919969536873, "learning_rate": 2.7656158785755987e-06, "loss": 0.6363, "step": 1895 }, { "epoch": 0.055356047998598584, "grad_norm": 0.7486599157937421, "learning_rate": 2.76707530647986e-06, "loss": 0.7332, "step": 1896 }, { "epoch": 0.055385244226445945, "grad_norm": 0.7315650386871511, "learning_rate": 2.7685347343841217e-06, "loss": 0.6587, "step": 1897 }, { "epoch": 0.05541444045429331, "grad_norm": 0.7433677562669918, "learning_rate": 2.769994162288383e-06, "loss": 0.7228, "step": 1898 }, { "epoch": 0.05544363668214067, "grad_norm": 0.7991334512805891, "learning_rate": 2.771453590192645e-06, "loss": 0.7406, "step": 1899 }, { "epoch": 0.05547283290998803, "grad_norm": 0.8181840592232456, "learning_rate": 2.7729130180969062e-06, "loss": 0.7587, "step": 1900 }, { "epoch": 0.05550202913783539, "grad_norm": 0.811290295322971, "learning_rate": 2.774372446001168e-06, "loss": 0.8195, "step": 1901 }, { "epoch": 0.05553122536568275, "grad_norm": 0.8106981051123383, "learning_rate": 2.775831873905429e-06, "loss": 0.7311, "step": 1902 }, { "epoch": 0.05556042159353011, "grad_norm": 0.8934841413256932, "learning_rate": 2.777291301809691e-06, "loss": 0.8384, "step": 1903 }, { "epoch": 0.05558961782137748, "grad_norm": 0.7978356696989932, "learning_rate": 2.778750729713952e-06, "loss": 0.7042, "step": 1904 }, { "epoch": 0.05561881404922484, "grad_norm": 0.8266616140141179, "learning_rate": 2.7802101576182137e-06, "loss": 0.7569, "step": 1905 }, { "epoch": 0.0556480102770722, "grad_norm": 0.9167708594890684, "learning_rate": 2.781669585522476e-06, "loss": 0.8041, "step": 1906 }, { "epoch": 0.055677206504919564, "grad_norm": 0.8224288833188607, "learning_rate": 2.783129013426737e-06, "loss": 0.7828, "step": 1907 }, { "epoch": 0.055706402732766926, "grad_norm": 0.7679456982379007, "learning_rate": 2.7845884413309988e-06, "loss": 0.7271, "step": 1908 }, { "epoch": 0.05573559896061429, "grad_norm": 0.8384739907359039, "learning_rate": 2.78604786923526e-06, "loss": 0.8032, "step": 1909 }, { "epoch": 0.05576479518846165, "grad_norm": 0.8135489912842054, "learning_rate": 2.7875072971395217e-06, "loss": 0.7372, "step": 1910 }, { "epoch": 0.055793991416309016, "grad_norm": 0.7640610690520742, "learning_rate": 2.788966725043783e-06, "loss": 0.695, "step": 1911 }, { "epoch": 0.05582318764415638, "grad_norm": 0.8728204651085649, "learning_rate": 2.7904261529480446e-06, "loss": 0.8675, "step": 1912 }, { "epoch": 0.05585238387200374, "grad_norm": 0.7874553601656544, "learning_rate": 2.791885580852306e-06, "loss": 0.7321, "step": 1913 }, { "epoch": 0.0558815800998511, "grad_norm": 0.8107675225432938, "learning_rate": 2.793345008756568e-06, "loss": 0.6766, "step": 1914 }, { "epoch": 0.05591077632769846, "grad_norm": 0.8571888119535478, "learning_rate": 2.794804436660829e-06, "loss": 0.807, "step": 1915 }, { "epoch": 0.05593997255554582, "grad_norm": 0.8516656619276187, "learning_rate": 2.796263864565091e-06, "loss": 0.7949, "step": 1916 }, { "epoch": 0.05596916878339318, "grad_norm": 0.8433052435970323, "learning_rate": 2.797723292469352e-06, "loss": 0.8115, "step": 1917 }, { "epoch": 0.055998365011240545, "grad_norm": 0.8386271473912505, "learning_rate": 2.7991827203736138e-06, "loss": 0.7862, "step": 1918 }, { "epoch": 0.05602756123908791, "grad_norm": 0.767123105309166, "learning_rate": 2.800642148277875e-06, "loss": 0.7458, "step": 1919 }, { "epoch": 0.056056757466935274, "grad_norm": 0.7608939116868672, "learning_rate": 2.8021015761821367e-06, "loss": 0.7155, "step": 1920 }, { "epoch": 0.056085953694782635, "grad_norm": 0.8435774311134223, "learning_rate": 2.803561004086398e-06, "loss": 0.7534, "step": 1921 }, { "epoch": 0.056115149922629996, "grad_norm": 0.8369716953991089, "learning_rate": 2.80502043199066e-06, "loss": 0.8709, "step": 1922 }, { "epoch": 0.05614434615047736, "grad_norm": 0.7806804270439897, "learning_rate": 2.8064798598949217e-06, "loss": 0.767, "step": 1923 }, { "epoch": 0.05617354237832472, "grad_norm": 1.0144439303895572, "learning_rate": 2.807939287799183e-06, "loss": 0.6943, "step": 1924 }, { "epoch": 0.05620273860617208, "grad_norm": 0.8617725386449648, "learning_rate": 2.8093987157034446e-06, "loss": 0.7411, "step": 1925 }, { "epoch": 0.05623193483401945, "grad_norm": 0.8465538940640062, "learning_rate": 2.810858143607706e-06, "loss": 0.7482, "step": 1926 }, { "epoch": 0.05626113106186681, "grad_norm": 0.7424669130568275, "learning_rate": 2.8123175715119675e-06, "loss": 0.7459, "step": 1927 }, { "epoch": 0.05629032728971417, "grad_norm": 0.83747903487219, "learning_rate": 2.8137769994162288e-06, "loss": 0.8186, "step": 1928 }, { "epoch": 0.05631952351756153, "grad_norm": 0.8799908318060528, "learning_rate": 2.815236427320491e-06, "loss": 0.7348, "step": 1929 }, { "epoch": 0.05634871974540889, "grad_norm": 0.8085036632528818, "learning_rate": 2.816695855224752e-06, "loss": 0.8152, "step": 1930 }, { "epoch": 0.056377915973256254, "grad_norm": 0.8077060923721083, "learning_rate": 2.8181552831290138e-06, "loss": 0.7889, "step": 1931 }, { "epoch": 0.056407112201103615, "grad_norm": 0.7239941829928908, "learning_rate": 2.819614711033275e-06, "loss": 0.7025, "step": 1932 }, { "epoch": 0.056436308428950976, "grad_norm": 0.8330190038167088, "learning_rate": 2.8210741389375367e-06, "loss": 0.7257, "step": 1933 }, { "epoch": 0.056465504656798345, "grad_norm": 0.7592031628595592, "learning_rate": 2.822533566841798e-06, "loss": 0.7397, "step": 1934 }, { "epoch": 0.056494700884645706, "grad_norm": 0.7882490730613927, "learning_rate": 2.8239929947460596e-06, "loss": 0.8172, "step": 1935 }, { "epoch": 0.05652389711249307, "grad_norm": 0.900488216575983, "learning_rate": 2.825452422650321e-06, "loss": 0.8662, "step": 1936 }, { "epoch": 0.05655309334034043, "grad_norm": 1.3198496933101351, "learning_rate": 2.826911850554583e-06, "loss": 0.8523, "step": 1937 }, { "epoch": 0.05658228956818779, "grad_norm": 0.8468589558941961, "learning_rate": 2.8283712784588446e-06, "loss": 0.8475, "step": 1938 }, { "epoch": 0.05661148579603515, "grad_norm": 0.7066191341667177, "learning_rate": 2.829830706363106e-06, "loss": 0.6505, "step": 1939 }, { "epoch": 0.05664068202388251, "grad_norm": 0.7648923501558251, "learning_rate": 2.8312901342673675e-06, "loss": 0.6994, "step": 1940 }, { "epoch": 0.05666987825172988, "grad_norm": 0.8678841751639113, "learning_rate": 2.8327495621716288e-06, "loss": 0.7923, "step": 1941 }, { "epoch": 0.05669907447957724, "grad_norm": 0.8125107691317518, "learning_rate": 2.834208990075891e-06, "loss": 0.7723, "step": 1942 }, { "epoch": 0.0567282707074246, "grad_norm": 0.8720856245717581, "learning_rate": 2.8356684179801517e-06, "loss": 0.7529, "step": 1943 }, { "epoch": 0.056757466935271964, "grad_norm": 0.7704407112553561, "learning_rate": 2.837127845884414e-06, "loss": 0.7776, "step": 1944 }, { "epoch": 0.056786663163119325, "grad_norm": 0.7828305079148663, "learning_rate": 2.838587273788675e-06, "loss": 0.7038, "step": 1945 }, { "epoch": 0.056815859390966686, "grad_norm": 1.3691396458368, "learning_rate": 2.8400467016929367e-06, "loss": 0.778, "step": 1946 }, { "epoch": 0.05684505561881405, "grad_norm": 0.7285820885320797, "learning_rate": 2.841506129597198e-06, "loss": 0.6618, "step": 1947 }, { "epoch": 0.05687425184666141, "grad_norm": 0.8244257226833197, "learning_rate": 2.8429655575014596e-06, "loss": 0.7303, "step": 1948 }, { "epoch": 0.056903448074508776, "grad_norm": 0.7288988782737699, "learning_rate": 2.844424985405721e-06, "loss": 0.6292, "step": 1949 }, { "epoch": 0.05693264430235614, "grad_norm": 0.752078147049723, "learning_rate": 2.845884413309983e-06, "loss": 0.6161, "step": 1950 }, { "epoch": 0.0569618405302035, "grad_norm": 0.7453719480966433, "learning_rate": 2.8473438412142442e-06, "loss": 0.7238, "step": 1951 }, { "epoch": 0.05699103675805086, "grad_norm": 0.823626004067671, "learning_rate": 2.848803269118506e-06, "loss": 0.798, "step": 1952 }, { "epoch": 0.05702023298589822, "grad_norm": 0.8005446081227141, "learning_rate": 2.850262697022767e-06, "loss": 0.8208, "step": 1953 }, { "epoch": 0.05704942921374558, "grad_norm": 0.7830369264080251, "learning_rate": 2.851722124927029e-06, "loss": 0.7769, "step": 1954 }, { "epoch": 0.057078625441592944, "grad_norm": 0.8089533782060336, "learning_rate": 2.8531815528312905e-06, "loss": 0.7598, "step": 1955 }, { "epoch": 0.057107821669440305, "grad_norm": 0.7401257819950828, "learning_rate": 2.8546409807355517e-06, "loss": 0.6616, "step": 1956 }, { "epoch": 0.05713701789728767, "grad_norm": 0.9644222870986491, "learning_rate": 2.856100408639814e-06, "loss": 0.7899, "step": 1957 }, { "epoch": 0.057166214125135034, "grad_norm": 0.8102645447059293, "learning_rate": 2.857559836544075e-06, "loss": 0.7423, "step": 1958 }, { "epoch": 0.057195410352982395, "grad_norm": 0.791587299892984, "learning_rate": 2.8590192644483367e-06, "loss": 0.7704, "step": 1959 }, { "epoch": 0.05722460658082976, "grad_norm": 0.7541507036236875, "learning_rate": 2.860478692352598e-06, "loss": 0.7638, "step": 1960 }, { "epoch": 0.05725380280867712, "grad_norm": 0.719950649338208, "learning_rate": 2.8619381202568596e-06, "loss": 0.6665, "step": 1961 }, { "epoch": 0.05728299903652448, "grad_norm": 0.8331693758330191, "learning_rate": 2.863397548161121e-06, "loss": 0.6999, "step": 1962 }, { "epoch": 0.05731219526437184, "grad_norm": 0.8873567964494397, "learning_rate": 2.8648569760653826e-06, "loss": 0.7829, "step": 1963 }, { "epoch": 0.05734139149221921, "grad_norm": 0.8305002730440009, "learning_rate": 2.866316403969644e-06, "loss": 0.8251, "step": 1964 }, { "epoch": 0.05737058772006657, "grad_norm": 0.9078699486835974, "learning_rate": 2.867775831873906e-06, "loss": 0.8085, "step": 1965 }, { "epoch": 0.05739978394791393, "grad_norm": 0.8457242143535021, "learning_rate": 2.869235259778167e-06, "loss": 0.8194, "step": 1966 }, { "epoch": 0.05742898017576129, "grad_norm": 0.7526765685250082, "learning_rate": 2.870694687682429e-06, "loss": 0.6953, "step": 1967 }, { "epoch": 0.05745817640360865, "grad_norm": 0.8490948829677327, "learning_rate": 2.87215411558669e-06, "loss": 0.788, "step": 1968 }, { "epoch": 0.057487372631456014, "grad_norm": 0.8100385383757525, "learning_rate": 2.8736135434909517e-06, "loss": 0.7604, "step": 1969 }, { "epoch": 0.057516568859303376, "grad_norm": 0.7821270840691134, "learning_rate": 2.8750729713952134e-06, "loss": 0.6958, "step": 1970 }, { "epoch": 0.05754576508715074, "grad_norm": 0.8625661191344947, "learning_rate": 2.8765323992994746e-06, "loss": 0.8168, "step": 1971 }, { "epoch": 0.057574961314998105, "grad_norm": 0.788850782798288, "learning_rate": 2.8779918272037367e-06, "loss": 0.6688, "step": 1972 }, { "epoch": 0.057604157542845466, "grad_norm": 0.834961456641946, "learning_rate": 2.879451255107998e-06, "loss": 0.7565, "step": 1973 }, { "epoch": 0.05763335377069283, "grad_norm": 0.8370357195510381, "learning_rate": 2.8809106830122597e-06, "loss": 0.8232, "step": 1974 }, { "epoch": 0.05766254999854019, "grad_norm": 0.8098797549785618, "learning_rate": 2.882370110916521e-06, "loss": 0.7908, "step": 1975 }, { "epoch": 0.05769174622638755, "grad_norm": 0.7619627062394667, "learning_rate": 2.8838295388207826e-06, "loss": 0.7339, "step": 1976 }, { "epoch": 0.05772094245423491, "grad_norm": 0.8187520794546237, "learning_rate": 2.885288966725044e-06, "loss": 0.7296, "step": 1977 }, { "epoch": 0.05775013868208227, "grad_norm": 0.7667165075261844, "learning_rate": 2.8867483946293055e-06, "loss": 0.7506, "step": 1978 }, { "epoch": 0.05777933490992964, "grad_norm": 0.779644486199339, "learning_rate": 2.8882078225335667e-06, "loss": 0.8055, "step": 1979 }, { "epoch": 0.057808531137777, "grad_norm": 0.8464936775803874, "learning_rate": 2.889667250437829e-06, "loss": 0.8612, "step": 1980 }, { "epoch": 0.05783772736562436, "grad_norm": 0.7177414565569967, "learning_rate": 2.89112667834209e-06, "loss": 0.6689, "step": 1981 }, { "epoch": 0.057866923593471724, "grad_norm": 0.8035953410451092, "learning_rate": 2.8925861062463517e-06, "loss": 0.7505, "step": 1982 }, { "epoch": 0.057896119821319085, "grad_norm": 0.8516325171975453, "learning_rate": 2.894045534150613e-06, "loss": 0.8503, "step": 1983 }, { "epoch": 0.057925316049166446, "grad_norm": 0.801390538311748, "learning_rate": 2.8955049620548747e-06, "loss": 0.7324, "step": 1984 }, { "epoch": 0.05795451227701381, "grad_norm": 0.8544850331308029, "learning_rate": 2.896964389959136e-06, "loss": 0.7718, "step": 1985 }, { "epoch": 0.05798370850486117, "grad_norm": 0.8528604659197871, "learning_rate": 2.8984238178633976e-06, "loss": 0.8265, "step": 1986 }, { "epoch": 0.05801290473270854, "grad_norm": 0.705011542004375, "learning_rate": 2.8998832457676597e-06, "loss": 0.648, "step": 1987 }, { "epoch": 0.0580421009605559, "grad_norm": 0.8062437590751669, "learning_rate": 2.901342673671921e-06, "loss": 0.6866, "step": 1988 }, { "epoch": 0.05807129718840326, "grad_norm": 0.8013566283876924, "learning_rate": 2.9028021015761826e-06, "loss": 0.7729, "step": 1989 }, { "epoch": 0.05810049341625062, "grad_norm": 0.7749295373970181, "learning_rate": 2.904261529480444e-06, "loss": 0.7576, "step": 1990 }, { "epoch": 0.05812968964409798, "grad_norm": 0.861513842011759, "learning_rate": 2.9057209573847055e-06, "loss": 0.7684, "step": 1991 }, { "epoch": 0.05815888587194534, "grad_norm": 0.7032453037874785, "learning_rate": 2.9071803852889668e-06, "loss": 0.654, "step": 1992 }, { "epoch": 0.058188082099792704, "grad_norm": 0.7804672213223345, "learning_rate": 2.9086398131932284e-06, "loss": 0.6944, "step": 1993 }, { "epoch": 0.05821727832764007, "grad_norm": 0.8395759453550171, "learning_rate": 2.9100992410974897e-06, "loss": 0.7212, "step": 1994 }, { "epoch": 0.05824647455548743, "grad_norm": 0.7385702021238364, "learning_rate": 2.9115586690017518e-06, "loss": 0.6429, "step": 1995 }, { "epoch": 0.058275670783334795, "grad_norm": 0.8570461405773052, "learning_rate": 2.913018096906013e-06, "loss": 0.79, "step": 1996 }, { "epoch": 0.058304867011182156, "grad_norm": 0.77344020823496, "learning_rate": 2.9144775248102747e-06, "loss": 0.7543, "step": 1997 }, { "epoch": 0.05833406323902952, "grad_norm": 0.8357937366408252, "learning_rate": 2.915936952714536e-06, "loss": 0.7355, "step": 1998 }, { "epoch": 0.05836325946687688, "grad_norm": 0.7551144220316052, "learning_rate": 2.9173963806187976e-06, "loss": 0.714, "step": 1999 }, { "epoch": 0.05839245569472424, "grad_norm": 0.9528679696149894, "learning_rate": 2.918855808523059e-06, "loss": 0.9038, "step": 2000 }, { "epoch": 0.0584216519225716, "grad_norm": 0.8130306418452908, "learning_rate": 2.920315236427321e-06, "loss": 0.7663, "step": 2001 }, { "epoch": 0.05845084815041897, "grad_norm": 2.1727035871424913, "learning_rate": 2.9217746643315826e-06, "loss": 0.7852, "step": 2002 }, { "epoch": 0.05848004437826633, "grad_norm": 0.7299114453110092, "learning_rate": 2.923234092235844e-06, "loss": 0.6702, "step": 2003 }, { "epoch": 0.05850924060611369, "grad_norm": 0.8352200381811261, "learning_rate": 2.9246935201401055e-06, "loss": 0.7979, "step": 2004 }, { "epoch": 0.05853843683396105, "grad_norm": 0.9032945269373074, "learning_rate": 2.9261529480443668e-06, "loss": 0.8555, "step": 2005 }, { "epoch": 0.05856763306180841, "grad_norm": 0.97008570087973, "learning_rate": 2.9276123759486284e-06, "loss": 0.8047, "step": 2006 }, { "epoch": 0.058596829289655775, "grad_norm": 0.8265564325844981, "learning_rate": 2.9290718038528897e-06, "loss": 0.6625, "step": 2007 }, { "epoch": 0.058626025517503136, "grad_norm": 0.8088513238826883, "learning_rate": 2.9305312317571518e-06, "loss": 0.7768, "step": 2008 }, { "epoch": 0.058655221745350504, "grad_norm": 0.9034729621265553, "learning_rate": 2.931990659661413e-06, "loss": 0.8438, "step": 2009 }, { "epoch": 0.058684417973197865, "grad_norm": 0.8464169170327902, "learning_rate": 2.9334500875656747e-06, "loss": 0.7205, "step": 2010 }, { "epoch": 0.058713614201045226, "grad_norm": 0.8568024499431481, "learning_rate": 2.934909515469936e-06, "loss": 0.8117, "step": 2011 }, { "epoch": 0.05874281042889259, "grad_norm": 0.8556513773793045, "learning_rate": 2.9363689433741976e-06, "loss": 0.7575, "step": 2012 }, { "epoch": 0.05877200665673995, "grad_norm": 0.9256544843825307, "learning_rate": 2.937828371278459e-06, "loss": 0.8669, "step": 2013 }, { "epoch": 0.05880120288458731, "grad_norm": 0.8387361524505006, "learning_rate": 2.9392877991827205e-06, "loss": 0.7034, "step": 2014 }, { "epoch": 0.05883039911243467, "grad_norm": 0.9453721523056678, "learning_rate": 2.9407472270869818e-06, "loss": 0.7855, "step": 2015 }, { "epoch": 0.05885959534028203, "grad_norm": 0.8951202127731744, "learning_rate": 2.942206654991244e-06, "loss": 0.8124, "step": 2016 }, { "epoch": 0.0588887915681294, "grad_norm": 0.7822756978322594, "learning_rate": 2.943666082895505e-06, "loss": 0.7928, "step": 2017 }, { "epoch": 0.05891798779597676, "grad_norm": 0.7770978683119333, "learning_rate": 2.945125510799767e-06, "loss": 0.6985, "step": 2018 }, { "epoch": 0.05894718402382412, "grad_norm": 0.9875848793165122, "learning_rate": 2.9465849387040285e-06, "loss": 0.8746, "step": 2019 }, { "epoch": 0.058976380251671484, "grad_norm": 0.8317806755384293, "learning_rate": 2.9480443666082897e-06, "loss": 0.7905, "step": 2020 }, { "epoch": 0.059005576479518845, "grad_norm": 0.9495934076583402, "learning_rate": 2.9495037945125514e-06, "loss": 0.7616, "step": 2021 }, { "epoch": 0.059034772707366207, "grad_norm": 0.921634895816853, "learning_rate": 2.9509632224168126e-06, "loss": 0.7777, "step": 2022 }, { "epoch": 0.05906396893521357, "grad_norm": 0.7571277122664375, "learning_rate": 2.9524226503210747e-06, "loss": 0.7175, "step": 2023 }, { "epoch": 0.059093165163060936, "grad_norm": 0.7900915402278023, "learning_rate": 2.953882078225336e-06, "loss": 0.8061, "step": 2024 }, { "epoch": 0.0591223613909083, "grad_norm": 0.8962632199229547, "learning_rate": 2.9553415061295976e-06, "loss": 0.7001, "step": 2025 }, { "epoch": 0.05915155761875566, "grad_norm": 0.7954003285142821, "learning_rate": 2.956800934033859e-06, "loss": 0.7367, "step": 2026 }, { "epoch": 0.05918075384660302, "grad_norm": 0.7597154942360823, "learning_rate": 2.9582603619381205e-06, "loss": 0.7165, "step": 2027 }, { "epoch": 0.05920995007445038, "grad_norm": 0.8294138262059881, "learning_rate": 2.959719789842382e-06, "loss": 0.7642, "step": 2028 }, { "epoch": 0.05923914630229774, "grad_norm": 3.4420594489503213, "learning_rate": 2.9611792177466435e-06, "loss": 0.8404, "step": 2029 }, { "epoch": 0.0592683425301451, "grad_norm": 0.8510338762746125, "learning_rate": 2.9626386456509047e-06, "loss": 0.7429, "step": 2030 }, { "epoch": 0.059297538757992464, "grad_norm": 0.820736992967229, "learning_rate": 2.964098073555167e-06, "loss": 0.7625, "step": 2031 }, { "epoch": 0.05932673498583983, "grad_norm": 0.8198711834530026, "learning_rate": 2.965557501459428e-06, "loss": 0.7816, "step": 2032 }, { "epoch": 0.059355931213687194, "grad_norm": 0.7819635639931712, "learning_rate": 2.9670169293636897e-06, "loss": 0.7128, "step": 2033 }, { "epoch": 0.059385127441534555, "grad_norm": 0.7962013042588534, "learning_rate": 2.9684763572679514e-06, "loss": 0.756, "step": 2034 }, { "epoch": 0.059414323669381916, "grad_norm": 0.8673512727747537, "learning_rate": 2.9699357851722126e-06, "loss": 0.8238, "step": 2035 }, { "epoch": 0.05944351989722928, "grad_norm": 0.8213301926156109, "learning_rate": 2.9713952130764743e-06, "loss": 0.807, "step": 2036 }, { "epoch": 0.05947271612507664, "grad_norm": 0.6966900241982623, "learning_rate": 2.9728546409807356e-06, "loss": 0.582, "step": 2037 }, { "epoch": 0.059501912352924, "grad_norm": 0.8081622113475369, "learning_rate": 2.9743140688849976e-06, "loss": 0.7887, "step": 2038 }, { "epoch": 0.05953110858077137, "grad_norm": 0.8500496210264608, "learning_rate": 2.975773496789259e-06, "loss": 0.8665, "step": 2039 }, { "epoch": 0.05956030480861873, "grad_norm": 0.8283449710511461, "learning_rate": 2.9772329246935206e-06, "loss": 0.8351, "step": 2040 }, { "epoch": 0.05958950103646609, "grad_norm": 0.7918797151611504, "learning_rate": 2.978692352597782e-06, "loss": 0.7826, "step": 2041 }, { "epoch": 0.05961869726431345, "grad_norm": 0.824778508909239, "learning_rate": 2.9801517805020435e-06, "loss": 0.7831, "step": 2042 }, { "epoch": 0.05964789349216081, "grad_norm": 0.8106885433072767, "learning_rate": 2.9816112084063047e-06, "loss": 0.7069, "step": 2043 }, { "epoch": 0.059677089720008174, "grad_norm": 0.8506094266244518, "learning_rate": 2.9830706363105664e-06, "loss": 0.7337, "step": 2044 }, { "epoch": 0.059706285947855535, "grad_norm": 0.8154061176343507, "learning_rate": 2.9845300642148276e-06, "loss": 0.7713, "step": 2045 }, { "epoch": 0.059735482175702896, "grad_norm": 1.2409306229431705, "learning_rate": 2.9859894921190897e-06, "loss": 0.7473, "step": 2046 }, { "epoch": 0.059764678403550264, "grad_norm": 0.844184825460674, "learning_rate": 2.987448920023351e-06, "loss": 0.6722, "step": 2047 }, { "epoch": 0.059793874631397625, "grad_norm": 0.8069164931040828, "learning_rate": 2.9889083479276127e-06, "loss": 0.7657, "step": 2048 }, { "epoch": 0.05982307085924499, "grad_norm": 0.8130549086250429, "learning_rate": 2.990367775831874e-06, "loss": 0.8343, "step": 2049 }, { "epoch": 0.05985226708709235, "grad_norm": 0.787170240046197, "learning_rate": 2.9918272037361356e-06, "loss": 0.77, "step": 2050 }, { "epoch": 0.05988146331493971, "grad_norm": 0.7393556192848815, "learning_rate": 2.9932866316403977e-06, "loss": 0.6797, "step": 2051 }, { "epoch": 0.05991065954278707, "grad_norm": 0.7484847945297719, "learning_rate": 2.9947460595446585e-06, "loss": 0.7278, "step": 2052 }, { "epoch": 0.05993985577063443, "grad_norm": 0.9309179725130793, "learning_rate": 2.9962054874489206e-06, "loss": 0.7391, "step": 2053 }, { "epoch": 0.05996905199848179, "grad_norm": 0.8199569909629877, "learning_rate": 2.997664915353182e-06, "loss": 0.7955, "step": 2054 }, { "epoch": 0.05999824822632916, "grad_norm": 0.8420609190063058, "learning_rate": 2.9991243432574435e-06, "loss": 0.8511, "step": 2055 }, { "epoch": 0.06002744445417652, "grad_norm": 0.7839936531057324, "learning_rate": 3.0005837711617047e-06, "loss": 0.779, "step": 2056 }, { "epoch": 0.06005664068202388, "grad_norm": 0.767747683340505, "learning_rate": 3.0020431990659664e-06, "loss": 0.7597, "step": 2057 }, { "epoch": 0.060085836909871244, "grad_norm": 0.7462896412603127, "learning_rate": 3.0035026269702277e-06, "loss": 0.7024, "step": 2058 }, { "epoch": 0.060115033137718606, "grad_norm": 0.8331369485973408, "learning_rate": 3.0049620548744898e-06, "loss": 0.7876, "step": 2059 }, { "epoch": 0.06014422936556597, "grad_norm": 0.962833895082158, "learning_rate": 3.0064214827787506e-06, "loss": 0.7334, "step": 2060 }, { "epoch": 0.06017342559341333, "grad_norm": 0.7677665022508625, "learning_rate": 3.0078809106830127e-06, "loss": 0.735, "step": 2061 }, { "epoch": 0.060202621821260696, "grad_norm": 0.8249521583261521, "learning_rate": 3.009340338587274e-06, "loss": 0.7545, "step": 2062 }, { "epoch": 0.06023181804910806, "grad_norm": 1.2557286765984639, "learning_rate": 3.0107997664915356e-06, "loss": 0.8096, "step": 2063 }, { "epoch": 0.06026101427695542, "grad_norm": 0.9715651874009312, "learning_rate": 3.012259194395797e-06, "loss": 0.7822, "step": 2064 }, { "epoch": 0.06029021050480278, "grad_norm": 0.8902380882925213, "learning_rate": 3.0137186223000585e-06, "loss": 0.8505, "step": 2065 }, { "epoch": 0.06031940673265014, "grad_norm": 0.8664252894476308, "learning_rate": 3.0151780502043206e-06, "loss": 0.7056, "step": 2066 }, { "epoch": 0.0603486029604975, "grad_norm": 0.7785756280378354, "learning_rate": 3.016637478108582e-06, "loss": 0.7402, "step": 2067 }, { "epoch": 0.06037779918834486, "grad_norm": 0.7666414949089821, "learning_rate": 3.0180969060128435e-06, "loss": 0.7097, "step": 2068 }, { "epoch": 0.060406995416192225, "grad_norm": 0.8646446678221158, "learning_rate": 3.0195563339171048e-06, "loss": 0.684, "step": 2069 }, { "epoch": 0.06043619164403959, "grad_norm": 0.7802352814271878, "learning_rate": 3.0210157618213664e-06, "loss": 0.8073, "step": 2070 }, { "epoch": 0.060465387871886954, "grad_norm": 0.7881082808478272, "learning_rate": 3.0224751897256277e-06, "loss": 0.7733, "step": 2071 }, { "epoch": 0.060494584099734315, "grad_norm": 0.8089061018249976, "learning_rate": 3.0239346176298893e-06, "loss": 0.7746, "step": 2072 }, { "epoch": 0.060523780327581676, "grad_norm": 0.9583319119174828, "learning_rate": 3.0253940455341506e-06, "loss": 0.7819, "step": 2073 }, { "epoch": 0.06055297655542904, "grad_norm": 0.8035467477040354, "learning_rate": 3.0268534734384127e-06, "loss": 0.7744, "step": 2074 }, { "epoch": 0.0605821727832764, "grad_norm": 0.8231043897690974, "learning_rate": 3.028312901342674e-06, "loss": 0.7705, "step": 2075 }, { "epoch": 0.06061136901112376, "grad_norm": 0.7979897329121569, "learning_rate": 3.0297723292469356e-06, "loss": 0.702, "step": 2076 }, { "epoch": 0.06064056523897113, "grad_norm": 0.9949182798944003, "learning_rate": 3.031231757151197e-06, "loss": 0.7858, "step": 2077 }, { "epoch": 0.06066976146681849, "grad_norm": 0.7927479209311131, "learning_rate": 3.0326911850554585e-06, "loss": 0.7496, "step": 2078 }, { "epoch": 0.06069895769466585, "grad_norm": 1.4528166951060986, "learning_rate": 3.0341506129597198e-06, "loss": 0.7581, "step": 2079 }, { "epoch": 0.06072815392251321, "grad_norm": 0.917798448465079, "learning_rate": 3.0356100408639814e-06, "loss": 0.7358, "step": 2080 }, { "epoch": 0.06075735015036057, "grad_norm": 0.7023239627184837, "learning_rate": 3.0370694687682427e-06, "loss": 0.6558, "step": 2081 }, { "epoch": 0.060786546378207934, "grad_norm": 0.7589990918242094, "learning_rate": 3.0385288966725048e-06, "loss": 0.7318, "step": 2082 }, { "epoch": 0.060815742606055295, "grad_norm": 0.7671976352893582, "learning_rate": 3.0399883245767664e-06, "loss": 0.7578, "step": 2083 }, { "epoch": 0.060844938833902656, "grad_norm": 0.9217726313633234, "learning_rate": 3.0414477524810277e-06, "loss": 0.7633, "step": 2084 }, { "epoch": 0.060874135061750025, "grad_norm": 0.8105827820217327, "learning_rate": 3.0429071803852894e-06, "loss": 0.7914, "step": 2085 }, { "epoch": 0.060903331289597386, "grad_norm": 0.8355836592280861, "learning_rate": 3.0443666082895506e-06, "loss": 0.8738, "step": 2086 }, { "epoch": 0.06093252751744475, "grad_norm": 0.7959332725760081, "learning_rate": 3.0458260361938123e-06, "loss": 0.7412, "step": 2087 }, { "epoch": 0.06096172374529211, "grad_norm": 0.7757768801186216, "learning_rate": 3.0472854640980735e-06, "loss": 0.6936, "step": 2088 }, { "epoch": 0.06099091997313947, "grad_norm": 0.8768103670570959, "learning_rate": 3.0487448920023356e-06, "loss": 0.8166, "step": 2089 }, { "epoch": 0.06102011620098683, "grad_norm": 0.7488384670816702, "learning_rate": 3.050204319906597e-06, "loss": 0.6787, "step": 2090 }, { "epoch": 0.06104931242883419, "grad_norm": 1.00104256990068, "learning_rate": 3.0516637478108585e-06, "loss": 0.7997, "step": 2091 }, { "epoch": 0.06107850865668156, "grad_norm": 0.8052173510243781, "learning_rate": 3.0531231757151198e-06, "loss": 0.72, "step": 2092 }, { "epoch": 0.06110770488452892, "grad_norm": 0.7678732730165004, "learning_rate": 3.0545826036193815e-06, "loss": 0.6805, "step": 2093 }, { "epoch": 0.06113690111237628, "grad_norm": 0.9469474398692667, "learning_rate": 3.0560420315236427e-06, "loss": 0.7966, "step": 2094 }, { "epoch": 0.061166097340223644, "grad_norm": 0.7929760764159459, "learning_rate": 3.0575014594279044e-06, "loss": 0.7724, "step": 2095 }, { "epoch": 0.061195293568071005, "grad_norm": 1.0091784295298158, "learning_rate": 3.0589608873321656e-06, "loss": 0.8096, "step": 2096 }, { "epoch": 0.061224489795918366, "grad_norm": 1.5750763786950588, "learning_rate": 3.0604203152364277e-06, "loss": 0.8003, "step": 2097 }, { "epoch": 0.06125368602376573, "grad_norm": 0.8004100481593303, "learning_rate": 3.0618797431406894e-06, "loss": 0.8336, "step": 2098 }, { "epoch": 0.06128288225161309, "grad_norm": 0.8680385366662625, "learning_rate": 3.0633391710449506e-06, "loss": 0.8161, "step": 2099 }, { "epoch": 0.061312078479460456, "grad_norm": 0.8962632948066601, "learning_rate": 3.0647985989492123e-06, "loss": 0.7259, "step": 2100 }, { "epoch": 0.06134127470730782, "grad_norm": 0.7953623762695776, "learning_rate": 3.0662580268534735e-06, "loss": 0.7881, "step": 2101 }, { "epoch": 0.06137047093515518, "grad_norm": 0.7972135307024275, "learning_rate": 3.0677174547577352e-06, "loss": 0.788, "step": 2102 }, { "epoch": 0.06139966716300254, "grad_norm": 0.7611504799966364, "learning_rate": 3.0691768826619965e-06, "loss": 0.7256, "step": 2103 }, { "epoch": 0.0614288633908499, "grad_norm": 0.8339172416739642, "learning_rate": 3.0706363105662586e-06, "loss": 0.8273, "step": 2104 }, { "epoch": 0.06145805961869726, "grad_norm": 0.9507249232046121, "learning_rate": 3.07209573847052e-06, "loss": 0.747, "step": 2105 }, { "epoch": 0.061487255846544624, "grad_norm": 0.7867470922159012, "learning_rate": 3.0735551663747815e-06, "loss": 0.6868, "step": 2106 }, { "epoch": 0.06151645207439199, "grad_norm": 0.7841307087782796, "learning_rate": 3.0750145942790427e-06, "loss": 0.7947, "step": 2107 }, { "epoch": 0.06154564830223935, "grad_norm": 0.8131929779987065, "learning_rate": 3.0764740221833044e-06, "loss": 0.8195, "step": 2108 }, { "epoch": 0.061574844530086714, "grad_norm": 0.7388342647858605, "learning_rate": 3.0779334500875656e-06, "loss": 0.6697, "step": 2109 }, { "epoch": 0.061604040757934075, "grad_norm": 0.8358438110042867, "learning_rate": 3.0793928779918273e-06, "loss": 0.8684, "step": 2110 }, { "epoch": 0.06163323698578144, "grad_norm": 0.7713421064785846, "learning_rate": 3.0808523058960885e-06, "loss": 0.7395, "step": 2111 }, { "epoch": 0.0616624332136288, "grad_norm": 1.8858435892742298, "learning_rate": 3.0823117338003506e-06, "loss": 0.9032, "step": 2112 }, { "epoch": 0.06169162944147616, "grad_norm": 0.8787866643396216, "learning_rate": 3.083771161704612e-06, "loss": 0.8893, "step": 2113 }, { "epoch": 0.06172082566932352, "grad_norm": 0.7834680260237836, "learning_rate": 3.0852305896088736e-06, "loss": 0.7972, "step": 2114 }, { "epoch": 0.06175002189717089, "grad_norm": 0.8307260787518996, "learning_rate": 3.0866900175131352e-06, "loss": 0.7235, "step": 2115 }, { "epoch": 0.06177921812501825, "grad_norm": 0.7992099475826628, "learning_rate": 3.0881494454173965e-06, "loss": 0.7215, "step": 2116 }, { "epoch": 0.06180841435286561, "grad_norm": 0.8536811407135342, "learning_rate": 3.0896088733216586e-06, "loss": 0.7416, "step": 2117 }, { "epoch": 0.06183761058071297, "grad_norm": 0.8472202464205391, "learning_rate": 3.09106830122592e-06, "loss": 0.6504, "step": 2118 }, { "epoch": 0.06186680680856033, "grad_norm": 0.8252410252701906, "learning_rate": 3.0925277291301815e-06, "loss": 0.8086, "step": 2119 }, { "epoch": 0.061896003036407694, "grad_norm": 0.8100601760781566, "learning_rate": 3.0939871570344427e-06, "loss": 0.8303, "step": 2120 }, { "epoch": 0.061925199264255056, "grad_norm": 0.7639920344919156, "learning_rate": 3.0954465849387044e-06, "loss": 0.7438, "step": 2121 }, { "epoch": 0.061954395492102424, "grad_norm": 0.8225604321170876, "learning_rate": 3.0969060128429656e-06, "loss": 0.8342, "step": 2122 }, { "epoch": 0.061983591719949785, "grad_norm": 0.8322365118679907, "learning_rate": 3.0983654407472273e-06, "loss": 0.7882, "step": 2123 }, { "epoch": 0.062012787947797146, "grad_norm": 0.7821418170384642, "learning_rate": 3.0998248686514886e-06, "loss": 0.7465, "step": 2124 }, { "epoch": 0.06204198417564451, "grad_norm": 0.8941980887243889, "learning_rate": 3.1012842965557507e-06, "loss": 0.7379, "step": 2125 }, { "epoch": 0.06207118040349187, "grad_norm": 0.8897339729536353, "learning_rate": 3.102743724460012e-06, "loss": 0.8278, "step": 2126 }, { "epoch": 0.06210037663133923, "grad_norm": 0.806304214011121, "learning_rate": 3.1042031523642736e-06, "loss": 0.8155, "step": 2127 }, { "epoch": 0.06212957285918659, "grad_norm": 0.8073986487620033, "learning_rate": 3.105662580268535e-06, "loss": 0.7706, "step": 2128 }, { "epoch": 0.06215876908703395, "grad_norm": 1.2529911196316796, "learning_rate": 3.1071220081727965e-06, "loss": 0.723, "step": 2129 }, { "epoch": 0.06218796531488132, "grad_norm": 0.8152845198458867, "learning_rate": 3.108581436077058e-06, "loss": 0.7554, "step": 2130 }, { "epoch": 0.06221716154272868, "grad_norm": 0.7897450378576354, "learning_rate": 3.1100408639813194e-06, "loss": 0.6619, "step": 2131 }, { "epoch": 0.06224635777057604, "grad_norm": 0.8072356210684944, "learning_rate": 3.1115002918855815e-06, "loss": 0.7359, "step": 2132 }, { "epoch": 0.062275553998423404, "grad_norm": 0.9810837821473003, "learning_rate": 3.1129597197898427e-06, "loss": 0.6856, "step": 2133 }, { "epoch": 0.062304750226270765, "grad_norm": 0.7353897547578206, "learning_rate": 3.1144191476941044e-06, "loss": 0.6923, "step": 2134 }, { "epoch": 0.062333946454118126, "grad_norm": 0.7913928393026185, "learning_rate": 3.1158785755983657e-06, "loss": 0.78, "step": 2135 }, { "epoch": 0.06236314268196549, "grad_norm": 0.7828271321574487, "learning_rate": 3.1173380035026273e-06, "loss": 0.8514, "step": 2136 }, { "epoch": 0.062392338909812856, "grad_norm": 0.8003373385130061, "learning_rate": 3.1187974314068886e-06, "loss": 0.7214, "step": 2137 }, { "epoch": 0.06242153513766022, "grad_norm": 0.7381329478466311, "learning_rate": 3.1202568593111503e-06, "loss": 0.6872, "step": 2138 }, { "epoch": 0.06245073136550758, "grad_norm": 0.8245130681869055, "learning_rate": 3.1217162872154115e-06, "loss": 0.8225, "step": 2139 }, { "epoch": 0.06247992759335494, "grad_norm": 0.858291144803145, "learning_rate": 3.1231757151196736e-06, "loss": 0.7706, "step": 2140 }, { "epoch": 0.0625091238212023, "grad_norm": 0.7325028406286762, "learning_rate": 3.124635143023935e-06, "loss": 0.657, "step": 2141 }, { "epoch": 0.06253832004904966, "grad_norm": 0.7905592786673155, "learning_rate": 3.1260945709281965e-06, "loss": 0.8045, "step": 2142 }, { "epoch": 0.06256751627689702, "grad_norm": 10.407278113048022, "learning_rate": 3.1275539988324578e-06, "loss": 1.3436, "step": 2143 }, { "epoch": 0.06259671250474438, "grad_norm": 0.9184303106972816, "learning_rate": 3.1290134267367194e-06, "loss": 0.842, "step": 2144 }, { "epoch": 0.06262590873259175, "grad_norm": 0.9696931469891981, "learning_rate": 3.1304728546409807e-06, "loss": 0.7753, "step": 2145 }, { "epoch": 0.0626551049604391, "grad_norm": 0.7900989696930522, "learning_rate": 3.1319322825452423e-06, "loss": 0.8054, "step": 2146 }, { "epoch": 0.06268430118828647, "grad_norm": 0.851639066063245, "learning_rate": 3.1333917104495044e-06, "loss": 0.7935, "step": 2147 }, { "epoch": 0.06271349741613383, "grad_norm": 0.7516227620687868, "learning_rate": 3.1348511383537657e-06, "loss": 0.6664, "step": 2148 }, { "epoch": 0.0627426936439812, "grad_norm": 0.9533191743164426, "learning_rate": 3.1363105662580274e-06, "loss": 0.8197, "step": 2149 }, { "epoch": 0.06277188987182857, "grad_norm": 1.0047348586427, "learning_rate": 3.1377699941622886e-06, "loss": 0.8565, "step": 2150 }, { "epoch": 0.06280108609967593, "grad_norm": 0.7710418106388579, "learning_rate": 3.1392294220665503e-06, "loss": 0.7751, "step": 2151 }, { "epoch": 0.06283028232752329, "grad_norm": 0.7307128961310057, "learning_rate": 3.1406888499708115e-06, "loss": 0.6891, "step": 2152 }, { "epoch": 0.06285947855537065, "grad_norm": 0.7773703752760078, "learning_rate": 3.142148277875073e-06, "loss": 0.7378, "step": 2153 }, { "epoch": 0.06288867478321801, "grad_norm": 0.9537393526024365, "learning_rate": 3.1436077057793344e-06, "loss": 0.8766, "step": 2154 }, { "epoch": 0.06291787101106537, "grad_norm": 0.7407723598364079, "learning_rate": 3.1450671336835965e-06, "loss": 0.7396, "step": 2155 }, { "epoch": 0.06294706723891273, "grad_norm": 0.8294580450135344, "learning_rate": 3.1465265615878578e-06, "loss": 0.84, "step": 2156 }, { "epoch": 0.0629762634667601, "grad_norm": 0.8266040604393328, "learning_rate": 3.1479859894921194e-06, "loss": 0.7517, "step": 2157 }, { "epoch": 0.06300545969460745, "grad_norm": 0.7421448496942076, "learning_rate": 3.1494454173963807e-06, "loss": 0.6746, "step": 2158 }, { "epoch": 0.06303465592245482, "grad_norm": 0.7416899830753377, "learning_rate": 3.1509048453006424e-06, "loss": 0.7134, "step": 2159 }, { "epoch": 0.06306385215030218, "grad_norm": 0.8613251928605791, "learning_rate": 3.1523642732049036e-06, "loss": 0.8215, "step": 2160 }, { "epoch": 0.06309304837814954, "grad_norm": 0.8103723841108424, "learning_rate": 3.1538237011091653e-06, "loss": 0.7362, "step": 2161 }, { "epoch": 0.0631222446059969, "grad_norm": 0.8790806236124866, "learning_rate": 3.1552831290134274e-06, "loss": 0.8431, "step": 2162 }, { "epoch": 0.06315144083384426, "grad_norm": 0.7496419741525385, "learning_rate": 3.1567425569176886e-06, "loss": 0.6514, "step": 2163 }, { "epoch": 0.06318063706169164, "grad_norm": 0.7251407373263421, "learning_rate": 3.1582019848219503e-06, "loss": 0.6915, "step": 2164 }, { "epoch": 0.063209833289539, "grad_norm": 0.786158999337505, "learning_rate": 3.1596614127262115e-06, "loss": 0.7703, "step": 2165 }, { "epoch": 0.06323902951738636, "grad_norm": 1.1454385991264482, "learning_rate": 3.161120840630473e-06, "loss": 0.9414, "step": 2166 }, { "epoch": 0.06326822574523372, "grad_norm": 0.949543795307388, "learning_rate": 3.1625802685347344e-06, "loss": 0.8384, "step": 2167 }, { "epoch": 0.06329742197308108, "grad_norm": 0.8317206893766987, "learning_rate": 3.1640396964389965e-06, "loss": 0.7156, "step": 2168 }, { "epoch": 0.06332661820092844, "grad_norm": 0.9848135429048063, "learning_rate": 3.1654991243432574e-06, "loss": 0.8461, "step": 2169 }, { "epoch": 0.0633558144287758, "grad_norm": 0.7277550237627171, "learning_rate": 3.1669585522475195e-06, "loss": 0.6754, "step": 2170 }, { "epoch": 0.06338501065662316, "grad_norm": 0.7717046182019953, "learning_rate": 3.1684179801517807e-06, "loss": 0.7171, "step": 2171 }, { "epoch": 0.06341420688447053, "grad_norm": 0.8500730987528393, "learning_rate": 3.1698774080560424e-06, "loss": 0.8817, "step": 2172 }, { "epoch": 0.06344340311231789, "grad_norm": 0.8348897265519915, "learning_rate": 3.1713368359603036e-06, "loss": 0.7666, "step": 2173 }, { "epoch": 0.06347259934016525, "grad_norm": 0.8319145772351437, "learning_rate": 3.1727962638645653e-06, "loss": 0.7769, "step": 2174 }, { "epoch": 0.06350179556801261, "grad_norm": 0.8418330727938714, "learning_rate": 3.1742556917688265e-06, "loss": 0.8031, "step": 2175 }, { "epoch": 0.06353099179585997, "grad_norm": 0.8788212965556319, "learning_rate": 3.1757151196730886e-06, "loss": 0.7865, "step": 2176 }, { "epoch": 0.06356018802370733, "grad_norm": 0.8645401926549466, "learning_rate": 3.17717454757735e-06, "loss": 0.7678, "step": 2177 }, { "epoch": 0.06358938425155469, "grad_norm": 0.9540637733606296, "learning_rate": 3.1786339754816115e-06, "loss": 0.743, "step": 2178 }, { "epoch": 0.06361858047940207, "grad_norm": 0.8370047783701623, "learning_rate": 3.1800934033858732e-06, "loss": 0.7994, "step": 2179 }, { "epoch": 0.06364777670724943, "grad_norm": 0.8414303611356035, "learning_rate": 3.1815528312901345e-06, "loss": 0.8601, "step": 2180 }, { "epoch": 0.06367697293509679, "grad_norm": 0.7954463492894369, "learning_rate": 3.183012259194396e-06, "loss": 0.6419, "step": 2181 }, { "epoch": 0.06370616916294415, "grad_norm": 0.8967352858782967, "learning_rate": 3.1844716870986574e-06, "loss": 0.8197, "step": 2182 }, { "epoch": 0.06373536539079151, "grad_norm": 0.900316945514521, "learning_rate": 3.1859311150029195e-06, "loss": 0.834, "step": 2183 }, { "epoch": 0.06376456161863887, "grad_norm": 0.7585109043924576, "learning_rate": 3.1873905429071807e-06, "loss": 0.7124, "step": 2184 }, { "epoch": 0.06379375784648623, "grad_norm": 0.825898712743635, "learning_rate": 3.1888499708114424e-06, "loss": 0.8163, "step": 2185 }, { "epoch": 0.0638229540743336, "grad_norm": 1.2693302423352737, "learning_rate": 3.1903093987157036e-06, "loss": 0.8126, "step": 2186 }, { "epoch": 0.06385215030218096, "grad_norm": 0.84218125893767, "learning_rate": 3.1917688266199653e-06, "loss": 0.9105, "step": 2187 }, { "epoch": 0.06388134653002832, "grad_norm": 0.898540582977073, "learning_rate": 3.1932282545242266e-06, "loss": 0.8443, "step": 2188 }, { "epoch": 0.06391054275787568, "grad_norm": 1.0136989026253165, "learning_rate": 3.1946876824284882e-06, "loss": 0.8314, "step": 2189 }, { "epoch": 0.06393973898572304, "grad_norm": 0.7578447002188636, "learning_rate": 3.1961471103327495e-06, "loss": 0.7222, "step": 2190 }, { "epoch": 0.0639689352135704, "grad_norm": 0.9493137767543296, "learning_rate": 3.1976065382370116e-06, "loss": 0.8799, "step": 2191 }, { "epoch": 0.06399813144141776, "grad_norm": 0.7955387678509781, "learning_rate": 3.199065966141273e-06, "loss": 0.7492, "step": 2192 }, { "epoch": 0.06402732766926512, "grad_norm": 0.8084416628960556, "learning_rate": 3.2005253940455345e-06, "loss": 0.8118, "step": 2193 }, { "epoch": 0.0640565238971125, "grad_norm": 0.7986740885245502, "learning_rate": 3.201984821949796e-06, "loss": 0.751, "step": 2194 }, { "epoch": 0.06408572012495986, "grad_norm": 0.9254776769254325, "learning_rate": 3.2034442498540574e-06, "loss": 0.8082, "step": 2195 }, { "epoch": 0.06411491635280722, "grad_norm": 1.1228980791966243, "learning_rate": 3.204903677758319e-06, "loss": 0.8465, "step": 2196 }, { "epoch": 0.06414411258065458, "grad_norm": 0.766977367153264, "learning_rate": 3.2063631056625803e-06, "loss": 0.7522, "step": 2197 }, { "epoch": 0.06417330880850194, "grad_norm": 0.7374747878632973, "learning_rate": 3.2078225335668424e-06, "loss": 0.6364, "step": 2198 }, { "epoch": 0.0642025050363493, "grad_norm": 0.8530437936744241, "learning_rate": 3.2092819614711037e-06, "loss": 0.8429, "step": 2199 }, { "epoch": 0.06423170126419667, "grad_norm": 0.740404307540203, "learning_rate": 3.2107413893753653e-06, "loss": 0.71, "step": 2200 }, { "epoch": 0.06426089749204403, "grad_norm": 0.8146414398463546, "learning_rate": 3.2122008172796266e-06, "loss": 0.8095, "step": 2201 }, { "epoch": 0.06429009371989139, "grad_norm": 0.7910350258993462, "learning_rate": 3.2136602451838882e-06, "loss": 0.7301, "step": 2202 }, { "epoch": 0.06431928994773875, "grad_norm": 0.8288881726319586, "learning_rate": 3.2151196730881495e-06, "loss": 0.7836, "step": 2203 }, { "epoch": 0.06434848617558611, "grad_norm": 0.9042588842292428, "learning_rate": 3.216579100992411e-06, "loss": 0.7918, "step": 2204 }, { "epoch": 0.06437768240343347, "grad_norm": 0.7668983410863529, "learning_rate": 3.2180385288966724e-06, "loss": 0.7126, "step": 2205 }, { "epoch": 0.06440687863128083, "grad_norm": 0.8568103446178251, "learning_rate": 3.2194979568009345e-06, "loss": 0.7519, "step": 2206 }, { "epoch": 0.0644360748591282, "grad_norm": 0.7372467032767159, "learning_rate": 3.2209573847051957e-06, "loss": 0.6702, "step": 2207 }, { "epoch": 0.06446527108697556, "grad_norm": 0.8116288173116573, "learning_rate": 3.2224168126094574e-06, "loss": 0.814, "step": 2208 }, { "epoch": 0.06449446731482293, "grad_norm": 0.7800973580525178, "learning_rate": 3.2238762405137187e-06, "loss": 0.7534, "step": 2209 }, { "epoch": 0.06452366354267029, "grad_norm": 1.0268117075899572, "learning_rate": 3.2253356684179803e-06, "loss": 0.7582, "step": 2210 }, { "epoch": 0.06455285977051765, "grad_norm": 0.8381180832409293, "learning_rate": 3.226795096322242e-06, "loss": 0.8396, "step": 2211 }, { "epoch": 0.06458205599836501, "grad_norm": 0.8240109525343713, "learning_rate": 3.2282545242265032e-06, "loss": 0.7123, "step": 2212 }, { "epoch": 0.06461125222621238, "grad_norm": 0.8142724014013305, "learning_rate": 3.2297139521307653e-06, "loss": 0.7272, "step": 2213 }, { "epoch": 0.06464044845405974, "grad_norm": 0.8660487309264067, "learning_rate": 3.2311733800350266e-06, "loss": 0.8011, "step": 2214 }, { "epoch": 0.0646696446819071, "grad_norm": 0.9934425198716323, "learning_rate": 3.2326328079392883e-06, "loss": 0.8113, "step": 2215 }, { "epoch": 0.06469884090975446, "grad_norm": 0.7154133619437618, "learning_rate": 3.2340922358435495e-06, "loss": 0.6627, "step": 2216 }, { "epoch": 0.06472803713760182, "grad_norm": 0.7874513949685384, "learning_rate": 3.235551663747811e-06, "loss": 0.7263, "step": 2217 }, { "epoch": 0.06475723336544918, "grad_norm": 0.7859018662635269, "learning_rate": 3.2370110916520724e-06, "loss": 0.7514, "step": 2218 }, { "epoch": 0.06478642959329654, "grad_norm": 0.6935195798992786, "learning_rate": 3.238470519556334e-06, "loss": 0.6232, "step": 2219 }, { "epoch": 0.0648156258211439, "grad_norm": 0.9750905564460469, "learning_rate": 3.2399299474605953e-06, "loss": 0.7606, "step": 2220 }, { "epoch": 0.06484482204899127, "grad_norm": 0.833955996850699, "learning_rate": 3.2413893753648574e-06, "loss": 0.7384, "step": 2221 }, { "epoch": 0.06487401827683863, "grad_norm": 0.7659161542676234, "learning_rate": 3.2428488032691187e-06, "loss": 0.7728, "step": 2222 }, { "epoch": 0.06490321450468599, "grad_norm": 0.7573357739512344, "learning_rate": 3.2443082311733803e-06, "loss": 0.7617, "step": 2223 }, { "epoch": 0.06493241073253336, "grad_norm": 0.861628955092318, "learning_rate": 3.2457676590776416e-06, "loss": 0.7727, "step": 2224 }, { "epoch": 0.06496160696038072, "grad_norm": 0.8477240050792814, "learning_rate": 3.2472270869819033e-06, "loss": 0.731, "step": 2225 }, { "epoch": 0.06499080318822809, "grad_norm": 0.8947192432025011, "learning_rate": 3.2486865148861654e-06, "loss": 0.7508, "step": 2226 }, { "epoch": 0.06501999941607545, "grad_norm": 0.8480031299813862, "learning_rate": 3.250145942790426e-06, "loss": 0.7579, "step": 2227 }, { "epoch": 0.06504919564392281, "grad_norm": 1.1227919792260044, "learning_rate": 3.2516053706946883e-06, "loss": 0.828, "step": 2228 }, { "epoch": 0.06507839187177017, "grad_norm": 0.7816343553478095, "learning_rate": 3.2530647985989495e-06, "loss": 0.699, "step": 2229 }, { "epoch": 0.06510758809961753, "grad_norm": 0.8399068170140076, "learning_rate": 3.254524226503211e-06, "loss": 0.8265, "step": 2230 }, { "epoch": 0.06513678432746489, "grad_norm": 0.7571129741544262, "learning_rate": 3.2559836544074724e-06, "loss": 0.7114, "step": 2231 }, { "epoch": 0.06516598055531225, "grad_norm": 0.8638385704319935, "learning_rate": 3.257443082311734e-06, "loss": 0.8345, "step": 2232 }, { "epoch": 0.06519517678315961, "grad_norm": 0.7769154573329937, "learning_rate": 3.2589025102159954e-06, "loss": 0.6786, "step": 2233 }, { "epoch": 0.06522437301100698, "grad_norm": 0.7471856013793786, "learning_rate": 3.2603619381202574e-06, "loss": 0.6794, "step": 2234 }, { "epoch": 0.06525356923885434, "grad_norm": 0.7447362989487513, "learning_rate": 3.2618213660245187e-06, "loss": 0.7164, "step": 2235 }, { "epoch": 0.0652827654667017, "grad_norm": 0.7870072035010044, "learning_rate": 3.2632807939287804e-06, "loss": 0.7152, "step": 2236 }, { "epoch": 0.06531196169454906, "grad_norm": 0.703067281873992, "learning_rate": 3.2647402218330416e-06, "loss": 0.6518, "step": 2237 }, { "epoch": 0.06534115792239642, "grad_norm": 0.8811929807745521, "learning_rate": 3.2661996497373033e-06, "loss": 0.8659, "step": 2238 }, { "epoch": 0.0653703541502438, "grad_norm": 0.853533953573309, "learning_rate": 3.2676590776415645e-06, "loss": 0.8836, "step": 2239 }, { "epoch": 0.06539955037809116, "grad_norm": 0.8047759807168369, "learning_rate": 3.269118505545826e-06, "loss": 0.7425, "step": 2240 }, { "epoch": 0.06542874660593852, "grad_norm": 0.8342438731602984, "learning_rate": 3.2705779334500874e-06, "loss": 0.7783, "step": 2241 }, { "epoch": 0.06545794283378588, "grad_norm": 0.7723353714904588, "learning_rate": 3.2720373613543495e-06, "loss": 0.7281, "step": 2242 }, { "epoch": 0.06548713906163324, "grad_norm": 0.741730286431217, "learning_rate": 3.273496789258611e-06, "loss": 0.6662, "step": 2243 }, { "epoch": 0.0655163352894806, "grad_norm": 0.9041298775226266, "learning_rate": 3.2749562171628725e-06, "loss": 0.8724, "step": 2244 }, { "epoch": 0.06554553151732796, "grad_norm": 0.715771720359587, "learning_rate": 3.276415645067134e-06, "loss": 0.6249, "step": 2245 }, { "epoch": 0.06557472774517532, "grad_norm": 0.8292641141758459, "learning_rate": 3.2778750729713954e-06, "loss": 0.746, "step": 2246 }, { "epoch": 0.06560392397302268, "grad_norm": 0.8728531369946476, "learning_rate": 3.279334500875657e-06, "loss": 0.7377, "step": 2247 }, { "epoch": 0.06563312020087005, "grad_norm": 0.8883339770668746, "learning_rate": 3.2807939287799183e-06, "loss": 0.7437, "step": 2248 }, { "epoch": 0.06566231642871741, "grad_norm": 0.8222505956866405, "learning_rate": 3.2822533566841804e-06, "loss": 0.7861, "step": 2249 }, { "epoch": 0.06569151265656477, "grad_norm": 0.780852391076346, "learning_rate": 3.2837127845884416e-06, "loss": 0.7093, "step": 2250 }, { "epoch": 0.06572070888441213, "grad_norm": 0.7686896475794278, "learning_rate": 3.2851722124927033e-06, "loss": 0.7118, "step": 2251 }, { "epoch": 0.06574990511225949, "grad_norm": 0.8057349593163445, "learning_rate": 3.2866316403969645e-06, "loss": 0.7543, "step": 2252 }, { "epoch": 0.06577910134010685, "grad_norm": 0.7760996015405439, "learning_rate": 3.288091068301226e-06, "loss": 0.7379, "step": 2253 }, { "epoch": 0.06580829756795423, "grad_norm": 0.810498693543381, "learning_rate": 3.2895504962054875e-06, "loss": 0.7979, "step": 2254 }, { "epoch": 0.06583749379580159, "grad_norm": 0.8907014125620287, "learning_rate": 3.291009924109749e-06, "loss": 0.6877, "step": 2255 }, { "epoch": 0.06586669002364895, "grad_norm": 0.8999700620621903, "learning_rate": 3.2924693520140104e-06, "loss": 0.7226, "step": 2256 }, { "epoch": 0.06589588625149631, "grad_norm": 0.8116673176741825, "learning_rate": 3.2939287799182725e-06, "loss": 0.7714, "step": 2257 }, { "epoch": 0.06592508247934367, "grad_norm": 0.785764231167562, "learning_rate": 3.295388207822534e-06, "loss": 0.7189, "step": 2258 }, { "epoch": 0.06595427870719103, "grad_norm": 1.026531492149653, "learning_rate": 3.2968476357267954e-06, "loss": 0.7381, "step": 2259 }, { "epoch": 0.0659834749350384, "grad_norm": 0.7984456903258836, "learning_rate": 3.298307063631057e-06, "loss": 0.789, "step": 2260 }, { "epoch": 0.06601267116288576, "grad_norm": 0.7497773543474596, "learning_rate": 3.2997664915353183e-06, "loss": 0.7294, "step": 2261 }, { "epoch": 0.06604186739073312, "grad_norm": 0.710799481736652, "learning_rate": 3.30122591943958e-06, "loss": 0.6962, "step": 2262 }, { "epoch": 0.06607106361858048, "grad_norm": 0.7395335798802409, "learning_rate": 3.3026853473438412e-06, "loss": 0.642, "step": 2263 }, { "epoch": 0.06610025984642784, "grad_norm": 0.8085393676058092, "learning_rate": 3.3041447752481033e-06, "loss": 0.8254, "step": 2264 }, { "epoch": 0.0661294560742752, "grad_norm": 0.7467820673517099, "learning_rate": 3.3056042031523646e-06, "loss": 0.7048, "step": 2265 }, { "epoch": 0.06615865230212256, "grad_norm": 0.7121871707363242, "learning_rate": 3.3070636310566262e-06, "loss": 0.6251, "step": 2266 }, { "epoch": 0.06618784852996992, "grad_norm": 0.7477258005283883, "learning_rate": 3.3085230589608875e-06, "loss": 0.7009, "step": 2267 }, { "epoch": 0.06621704475781728, "grad_norm": 0.7952699373351632, "learning_rate": 3.309982486865149e-06, "loss": 0.7034, "step": 2268 }, { "epoch": 0.06624624098566466, "grad_norm": 0.7650417153384733, "learning_rate": 3.3114419147694104e-06, "loss": 0.7489, "step": 2269 }, { "epoch": 0.06627543721351202, "grad_norm": 0.7996418815382196, "learning_rate": 3.312901342673672e-06, "loss": 0.8011, "step": 2270 }, { "epoch": 0.06630463344135938, "grad_norm": 0.9909175816369853, "learning_rate": 3.3143607705779333e-06, "loss": 0.767, "step": 2271 }, { "epoch": 0.06633382966920674, "grad_norm": 1.488348426817033, "learning_rate": 3.3158201984821954e-06, "loss": 0.8324, "step": 2272 }, { "epoch": 0.0663630258970541, "grad_norm": 0.8274782423766209, "learning_rate": 3.3172796263864566e-06, "loss": 0.7691, "step": 2273 }, { "epoch": 0.06639222212490146, "grad_norm": 0.8714584466521729, "learning_rate": 3.3187390542907183e-06, "loss": 0.9, "step": 2274 }, { "epoch": 0.06642141835274883, "grad_norm": 0.8059726124504397, "learning_rate": 3.32019848219498e-06, "loss": 0.7081, "step": 2275 }, { "epoch": 0.06645061458059619, "grad_norm": 0.8505589998652965, "learning_rate": 3.3216579100992412e-06, "loss": 0.7724, "step": 2276 }, { "epoch": 0.06647981080844355, "grad_norm": 0.9039851632838215, "learning_rate": 3.323117338003503e-06, "loss": 0.8808, "step": 2277 }, { "epoch": 0.06650900703629091, "grad_norm": 0.734915138637435, "learning_rate": 3.324576765907764e-06, "loss": 0.7311, "step": 2278 }, { "epoch": 0.06653820326413827, "grad_norm": 0.8626971720531478, "learning_rate": 3.3260361938120262e-06, "loss": 0.8162, "step": 2279 }, { "epoch": 0.06656739949198563, "grad_norm": 0.8327437479851668, "learning_rate": 3.3274956217162875e-06, "loss": 0.774, "step": 2280 }, { "epoch": 0.066596595719833, "grad_norm": 0.8142059790814178, "learning_rate": 3.328955049620549e-06, "loss": 0.7478, "step": 2281 }, { "epoch": 0.06662579194768035, "grad_norm": 0.739722013525219, "learning_rate": 3.3304144775248104e-06, "loss": 0.6981, "step": 2282 }, { "epoch": 0.06665498817552772, "grad_norm": 0.8246944233732795, "learning_rate": 3.331873905429072e-06, "loss": 0.7917, "step": 2283 }, { "epoch": 0.06668418440337509, "grad_norm": 1.1604916050762146, "learning_rate": 3.3333333333333333e-06, "loss": 0.7862, "step": 2284 }, { "epoch": 0.06671338063122245, "grad_norm": 0.8110138238367973, "learning_rate": 3.3347927612375954e-06, "loss": 0.7317, "step": 2285 }, { "epoch": 0.06674257685906981, "grad_norm": 0.7183280588780461, "learning_rate": 3.3362521891418562e-06, "loss": 0.6862, "step": 2286 }, { "epoch": 0.06677177308691717, "grad_norm": 0.7308269122614969, "learning_rate": 3.3377116170461183e-06, "loss": 0.6954, "step": 2287 }, { "epoch": 0.06680096931476454, "grad_norm": 0.7699619064530525, "learning_rate": 3.3391710449503796e-06, "loss": 0.6972, "step": 2288 }, { "epoch": 0.0668301655426119, "grad_norm": 0.7987639116358213, "learning_rate": 3.3406304728546413e-06, "loss": 0.745, "step": 2289 }, { "epoch": 0.06685936177045926, "grad_norm": 0.9154296414289567, "learning_rate": 3.342089900758903e-06, "loss": 0.7393, "step": 2290 }, { "epoch": 0.06688855799830662, "grad_norm": 0.8971462732238951, "learning_rate": 3.343549328663164e-06, "loss": 0.8364, "step": 2291 }, { "epoch": 0.06691775422615398, "grad_norm": 1.616741303497687, "learning_rate": 3.3450087565674263e-06, "loss": 0.6456, "step": 2292 }, { "epoch": 0.06694695045400134, "grad_norm": 0.900005083894733, "learning_rate": 3.3464681844716875e-06, "loss": 0.7967, "step": 2293 }, { "epoch": 0.0669761466818487, "grad_norm": 0.8386775940750768, "learning_rate": 3.347927612375949e-06, "loss": 0.7153, "step": 2294 }, { "epoch": 0.06700534290969606, "grad_norm": 1.069901626304546, "learning_rate": 3.3493870402802104e-06, "loss": 0.7923, "step": 2295 }, { "epoch": 0.06703453913754343, "grad_norm": 0.7509677659552474, "learning_rate": 3.350846468184472e-06, "loss": 0.6894, "step": 2296 }, { "epoch": 0.06706373536539079, "grad_norm": 0.8073446699865676, "learning_rate": 3.3523058960887333e-06, "loss": 0.7342, "step": 2297 }, { "epoch": 0.06709293159323815, "grad_norm": 0.8543657672074981, "learning_rate": 3.353765323992995e-06, "loss": 0.8197, "step": 2298 }, { "epoch": 0.06712212782108552, "grad_norm": 0.7723196908333246, "learning_rate": 3.3552247518972563e-06, "loss": 0.7213, "step": 2299 }, { "epoch": 0.06715132404893288, "grad_norm": 0.8042178303069515, "learning_rate": 3.3566841798015184e-06, "loss": 0.7715, "step": 2300 }, { "epoch": 0.06718052027678025, "grad_norm": 1.0299204478985076, "learning_rate": 3.3581436077057796e-06, "loss": 0.8689, "step": 2301 }, { "epoch": 0.0672097165046276, "grad_norm": 0.8031432413830619, "learning_rate": 3.3596030356100413e-06, "loss": 0.7434, "step": 2302 }, { "epoch": 0.06723891273247497, "grad_norm": 0.8475633750377422, "learning_rate": 3.3610624635143025e-06, "loss": 0.7396, "step": 2303 }, { "epoch": 0.06726810896032233, "grad_norm": 0.8071280503205712, "learning_rate": 3.362521891418564e-06, "loss": 0.7752, "step": 2304 }, { "epoch": 0.06729730518816969, "grad_norm": 0.7174992293691508, "learning_rate": 3.3639813193228254e-06, "loss": 0.6786, "step": 2305 }, { "epoch": 0.06732650141601705, "grad_norm": 0.863046828329203, "learning_rate": 3.365440747227087e-06, "loss": 0.8293, "step": 2306 }, { "epoch": 0.06735569764386441, "grad_norm": 0.7875383969705342, "learning_rate": 3.366900175131349e-06, "loss": 0.7435, "step": 2307 }, { "epoch": 0.06738489387171177, "grad_norm": 1.0363067111878226, "learning_rate": 3.3683596030356104e-06, "loss": 0.7725, "step": 2308 }, { "epoch": 0.06741409009955913, "grad_norm": 0.8696912722805842, "learning_rate": 3.369819030939872e-06, "loss": 0.8184, "step": 2309 }, { "epoch": 0.0674432863274065, "grad_norm": 0.9409771772143014, "learning_rate": 3.3712784588441334e-06, "loss": 0.7399, "step": 2310 }, { "epoch": 0.06747248255525386, "grad_norm": 0.7209109934401512, "learning_rate": 3.372737886748395e-06, "loss": 0.6819, "step": 2311 }, { "epoch": 0.06750167878310122, "grad_norm": 0.811435535540055, "learning_rate": 3.3741973146526563e-06, "loss": 0.7995, "step": 2312 }, { "epoch": 0.06753087501094858, "grad_norm": 0.8770441861915148, "learning_rate": 3.375656742556918e-06, "loss": 0.7819, "step": 2313 }, { "epoch": 0.06756007123879594, "grad_norm": 1.0407679719860197, "learning_rate": 3.377116170461179e-06, "loss": 0.9067, "step": 2314 }, { "epoch": 0.06758926746664332, "grad_norm": 0.7085844862081784, "learning_rate": 3.3785755983654413e-06, "loss": 0.6482, "step": 2315 }, { "epoch": 0.06761846369449068, "grad_norm": 0.8664154912891753, "learning_rate": 3.3800350262697025e-06, "loss": 0.7479, "step": 2316 }, { "epoch": 0.06764765992233804, "grad_norm": 0.8101832591407109, "learning_rate": 3.381494454173964e-06, "loss": 0.7396, "step": 2317 }, { "epoch": 0.0676768561501854, "grad_norm": 2.676051527562726, "learning_rate": 3.3829538820782254e-06, "loss": 0.8635, "step": 2318 }, { "epoch": 0.06770605237803276, "grad_norm": 0.8947045478096483, "learning_rate": 3.384413309982487e-06, "loss": 0.912, "step": 2319 }, { "epoch": 0.06773524860588012, "grad_norm": 0.8010575332613727, "learning_rate": 3.3858727378867484e-06, "loss": 0.7537, "step": 2320 }, { "epoch": 0.06776444483372748, "grad_norm": 0.8540670862614477, "learning_rate": 3.38733216579101e-06, "loss": 0.661, "step": 2321 }, { "epoch": 0.06779364106157484, "grad_norm": 0.9076235715672772, "learning_rate": 3.388791593695272e-06, "loss": 0.8265, "step": 2322 }, { "epoch": 0.0678228372894222, "grad_norm": 0.8020238404738047, "learning_rate": 3.3902510215995334e-06, "loss": 0.7929, "step": 2323 }, { "epoch": 0.06785203351726957, "grad_norm": 0.7678930530367103, "learning_rate": 3.391710449503795e-06, "loss": 0.7363, "step": 2324 }, { "epoch": 0.06788122974511693, "grad_norm": 0.8659803928982449, "learning_rate": 3.3931698774080563e-06, "loss": 0.8646, "step": 2325 }, { "epoch": 0.06791042597296429, "grad_norm": 0.7493333130861474, "learning_rate": 3.394629305312318e-06, "loss": 0.7129, "step": 2326 }, { "epoch": 0.06793962220081165, "grad_norm": 0.8414598216779561, "learning_rate": 3.396088733216579e-06, "loss": 0.7683, "step": 2327 }, { "epoch": 0.06796881842865901, "grad_norm": 0.7706604880967328, "learning_rate": 3.397548161120841e-06, "loss": 0.6959, "step": 2328 }, { "epoch": 0.06799801465650637, "grad_norm": 0.7810928024562273, "learning_rate": 3.399007589025102e-06, "loss": 0.7227, "step": 2329 }, { "epoch": 0.06802721088435375, "grad_norm": 0.7451234758906801, "learning_rate": 3.4004670169293642e-06, "loss": 0.6755, "step": 2330 }, { "epoch": 0.06805640711220111, "grad_norm": 0.8103861592049564, "learning_rate": 3.4019264448336255e-06, "loss": 0.7459, "step": 2331 }, { "epoch": 0.06808560334004847, "grad_norm": 0.7835219538076973, "learning_rate": 3.403385872737887e-06, "loss": 0.6807, "step": 2332 }, { "epoch": 0.06811479956789583, "grad_norm": 0.8183851403831505, "learning_rate": 3.4048453006421484e-06, "loss": 0.8204, "step": 2333 }, { "epoch": 0.06814399579574319, "grad_norm": 0.6821987916800382, "learning_rate": 3.40630472854641e-06, "loss": 0.5655, "step": 2334 }, { "epoch": 0.06817319202359055, "grad_norm": 0.7407744241018097, "learning_rate": 3.4077641564506713e-06, "loss": 0.6648, "step": 2335 }, { "epoch": 0.06820238825143791, "grad_norm": 0.7754322337263441, "learning_rate": 3.409223584354933e-06, "loss": 0.7131, "step": 2336 }, { "epoch": 0.06823158447928528, "grad_norm": 0.7643510292159817, "learning_rate": 3.4106830122591942e-06, "loss": 0.7066, "step": 2337 }, { "epoch": 0.06826078070713264, "grad_norm": 0.7727524091153033, "learning_rate": 3.4121424401634563e-06, "loss": 0.7956, "step": 2338 }, { "epoch": 0.06828997693498, "grad_norm": 0.800179166996823, "learning_rate": 3.413601868067718e-06, "loss": 0.6737, "step": 2339 }, { "epoch": 0.06831917316282736, "grad_norm": 0.8196103410731554, "learning_rate": 3.4150612959719792e-06, "loss": 0.8125, "step": 2340 }, { "epoch": 0.06834836939067472, "grad_norm": 0.8092808862723683, "learning_rate": 3.416520723876241e-06, "loss": 0.7754, "step": 2341 }, { "epoch": 0.06837756561852208, "grad_norm": 0.7465861878936882, "learning_rate": 3.417980151780502e-06, "loss": 0.707, "step": 2342 }, { "epoch": 0.06840676184636944, "grad_norm": 0.9335728217235396, "learning_rate": 3.4194395796847642e-06, "loss": 0.7178, "step": 2343 }, { "epoch": 0.0684359580742168, "grad_norm": 0.7921188608290901, "learning_rate": 3.4208990075890255e-06, "loss": 0.766, "step": 2344 }, { "epoch": 0.06846515430206418, "grad_norm": 0.8304871190176573, "learning_rate": 3.422358435493287e-06, "loss": 0.8054, "step": 2345 }, { "epoch": 0.06849435052991154, "grad_norm": 0.8414844064536201, "learning_rate": 3.4238178633975484e-06, "loss": 0.7125, "step": 2346 }, { "epoch": 0.0685235467577589, "grad_norm": 0.7792900062117684, "learning_rate": 3.42527729130181e-06, "loss": 0.6937, "step": 2347 }, { "epoch": 0.06855274298560626, "grad_norm": 0.7533731272271283, "learning_rate": 3.4267367192060713e-06, "loss": 0.6847, "step": 2348 }, { "epoch": 0.06858193921345362, "grad_norm": 0.930608134176028, "learning_rate": 3.428196147110333e-06, "loss": 0.821, "step": 2349 }, { "epoch": 0.06861113544130099, "grad_norm": 0.7697993723941392, "learning_rate": 3.4296555750145942e-06, "loss": 0.7175, "step": 2350 }, { "epoch": 0.06864033166914835, "grad_norm": 0.772489947312238, "learning_rate": 3.4311150029188563e-06, "loss": 0.629, "step": 2351 }, { "epoch": 0.06866952789699571, "grad_norm": 0.8413009697006798, "learning_rate": 3.4325744308231176e-06, "loss": 0.7666, "step": 2352 }, { "epoch": 0.06869872412484307, "grad_norm": 1.0209651811745764, "learning_rate": 3.4340338587273792e-06, "loss": 0.8375, "step": 2353 }, { "epoch": 0.06872792035269043, "grad_norm": 0.7281274419177264, "learning_rate": 3.435493286631641e-06, "loss": 0.66, "step": 2354 }, { "epoch": 0.06875711658053779, "grad_norm": 0.7757606421215851, "learning_rate": 3.436952714535902e-06, "loss": 0.6925, "step": 2355 }, { "epoch": 0.06878631280838515, "grad_norm": 0.8242272641741177, "learning_rate": 3.438412142440164e-06, "loss": 0.7957, "step": 2356 }, { "epoch": 0.06881550903623251, "grad_norm": 0.8506030215420094, "learning_rate": 3.439871570344425e-06, "loss": 0.8492, "step": 2357 }, { "epoch": 0.06884470526407988, "grad_norm": 0.7853547340032085, "learning_rate": 3.441330998248687e-06, "loss": 0.7844, "step": 2358 }, { "epoch": 0.06887390149192724, "grad_norm": 0.7600914474413712, "learning_rate": 3.4427904261529484e-06, "loss": 0.7506, "step": 2359 }, { "epoch": 0.06890309771977461, "grad_norm": 0.7930537557923631, "learning_rate": 3.44424985405721e-06, "loss": 0.7596, "step": 2360 }, { "epoch": 0.06893229394762197, "grad_norm": 0.849852106346067, "learning_rate": 3.4457092819614713e-06, "loss": 0.7032, "step": 2361 }, { "epoch": 0.06896149017546933, "grad_norm": 0.7876365095735589, "learning_rate": 3.447168709865733e-06, "loss": 0.6422, "step": 2362 }, { "epoch": 0.0689906864033167, "grad_norm": 0.7808426299628881, "learning_rate": 3.4486281377699942e-06, "loss": 0.6837, "step": 2363 }, { "epoch": 0.06901988263116406, "grad_norm": 0.8126579975955445, "learning_rate": 3.450087565674256e-06, "loss": 0.8052, "step": 2364 }, { "epoch": 0.06904907885901142, "grad_norm": 0.9541043466935267, "learning_rate": 3.451546993578517e-06, "loss": 0.8481, "step": 2365 }, { "epoch": 0.06907827508685878, "grad_norm": 0.8725624193593916, "learning_rate": 3.4530064214827793e-06, "loss": 0.8127, "step": 2366 }, { "epoch": 0.06910747131470614, "grad_norm": 0.7745302700407238, "learning_rate": 3.4544658493870405e-06, "loss": 0.6966, "step": 2367 }, { "epoch": 0.0691366675425535, "grad_norm": 0.7732749803563176, "learning_rate": 3.455925277291302e-06, "loss": 0.6838, "step": 2368 }, { "epoch": 0.06916586377040086, "grad_norm": 0.7673766995591924, "learning_rate": 3.4573847051955634e-06, "loss": 0.7159, "step": 2369 }, { "epoch": 0.06919505999824822, "grad_norm": 0.9007722000624969, "learning_rate": 3.458844133099825e-06, "loss": 0.7742, "step": 2370 }, { "epoch": 0.06922425622609558, "grad_norm": 0.7673408759000272, "learning_rate": 3.4603035610040868e-06, "loss": 0.7879, "step": 2371 }, { "epoch": 0.06925345245394295, "grad_norm": 0.7620527438655326, "learning_rate": 3.461762988908348e-06, "loss": 0.7005, "step": 2372 }, { "epoch": 0.06928264868179031, "grad_norm": 0.7562756671357401, "learning_rate": 3.46322241681261e-06, "loss": 0.7028, "step": 2373 }, { "epoch": 0.06931184490963767, "grad_norm": 0.8063577987777019, "learning_rate": 3.4646818447168713e-06, "loss": 0.7317, "step": 2374 }, { "epoch": 0.06934104113748504, "grad_norm": 0.8209826671652255, "learning_rate": 3.466141272621133e-06, "loss": 0.8047, "step": 2375 }, { "epoch": 0.0693702373653324, "grad_norm": 0.746382311060313, "learning_rate": 3.4676007005253943e-06, "loss": 0.6301, "step": 2376 }, { "epoch": 0.06939943359317977, "grad_norm": 0.8767058892610315, "learning_rate": 3.469060128429656e-06, "loss": 0.7649, "step": 2377 }, { "epoch": 0.06942862982102713, "grad_norm": 0.800278108122364, "learning_rate": 3.470519556333917e-06, "loss": 0.6804, "step": 2378 }, { "epoch": 0.06945782604887449, "grad_norm": 0.9161281369948026, "learning_rate": 3.471978984238179e-06, "loss": 0.6178, "step": 2379 }, { "epoch": 0.06948702227672185, "grad_norm": 0.8202363479866547, "learning_rate": 3.47343841214244e-06, "loss": 0.7981, "step": 2380 }, { "epoch": 0.06951621850456921, "grad_norm": 0.859037535104036, "learning_rate": 3.474897840046702e-06, "loss": 0.7829, "step": 2381 }, { "epoch": 0.06954541473241657, "grad_norm": 0.8402520788643851, "learning_rate": 3.4763572679509634e-06, "loss": 0.7694, "step": 2382 }, { "epoch": 0.06957461096026393, "grad_norm": 0.803221678598048, "learning_rate": 3.477816695855225e-06, "loss": 0.7543, "step": 2383 }, { "epoch": 0.0696038071881113, "grad_norm": 1.0208131326887988, "learning_rate": 3.4792761237594864e-06, "loss": 0.8441, "step": 2384 }, { "epoch": 0.06963300341595866, "grad_norm": 0.7792391848813327, "learning_rate": 3.480735551663748e-06, "loss": 0.756, "step": 2385 }, { "epoch": 0.06966219964380602, "grad_norm": 0.8315290713672722, "learning_rate": 3.4821949795680097e-06, "loss": 0.8268, "step": 2386 }, { "epoch": 0.06969139587165338, "grad_norm": 0.7984158257031513, "learning_rate": 3.483654407472271e-06, "loss": 0.7582, "step": 2387 }, { "epoch": 0.06972059209950074, "grad_norm": 0.7556536894293288, "learning_rate": 3.485113835376533e-06, "loss": 0.6758, "step": 2388 }, { "epoch": 0.0697497883273481, "grad_norm": 0.7974494150456741, "learning_rate": 3.4865732632807943e-06, "loss": 0.7664, "step": 2389 }, { "epoch": 0.06977898455519548, "grad_norm": 0.8545860720967724, "learning_rate": 3.488032691185056e-06, "loss": 0.7306, "step": 2390 }, { "epoch": 0.06980818078304284, "grad_norm": 0.7878784081294825, "learning_rate": 3.489492119089317e-06, "loss": 0.7368, "step": 2391 }, { "epoch": 0.0698373770108902, "grad_norm": 0.9291443333831112, "learning_rate": 3.490951546993579e-06, "loss": 0.742, "step": 2392 }, { "epoch": 0.06986657323873756, "grad_norm": 0.763750874228277, "learning_rate": 3.49241097489784e-06, "loss": 0.7393, "step": 2393 }, { "epoch": 0.06989576946658492, "grad_norm": 0.8100876567763033, "learning_rate": 3.4938704028021018e-06, "loss": 0.7533, "step": 2394 }, { "epoch": 0.06992496569443228, "grad_norm": 0.7556228058662656, "learning_rate": 3.495329830706363e-06, "loss": 0.667, "step": 2395 }, { "epoch": 0.06995416192227964, "grad_norm": 0.8032418009303709, "learning_rate": 3.496789258610625e-06, "loss": 0.6953, "step": 2396 }, { "epoch": 0.069983358150127, "grad_norm": 0.7630695594609891, "learning_rate": 3.4982486865148864e-06, "loss": 0.6792, "step": 2397 }, { "epoch": 0.07001255437797436, "grad_norm": 0.9930813914345555, "learning_rate": 3.499708114419148e-06, "loss": 0.8046, "step": 2398 }, { "epoch": 0.07004175060582173, "grad_norm": 0.8007741046132977, "learning_rate": 3.5011675423234093e-06, "loss": 0.7297, "step": 2399 }, { "epoch": 0.07007094683366909, "grad_norm": 0.8856626288565542, "learning_rate": 3.502626970227671e-06, "loss": 0.7117, "step": 2400 }, { "epoch": 0.07010014306151645, "grad_norm": 0.7937441359856294, "learning_rate": 3.504086398131932e-06, "loss": 0.7588, "step": 2401 }, { "epoch": 0.07012933928936381, "grad_norm": 0.8185585701809567, "learning_rate": 3.5055458260361943e-06, "loss": 0.7072, "step": 2402 }, { "epoch": 0.07015853551721117, "grad_norm": 0.8388844329519016, "learning_rate": 3.507005253940456e-06, "loss": 0.7591, "step": 2403 }, { "epoch": 0.07018773174505853, "grad_norm": 0.9164250608878068, "learning_rate": 3.508464681844717e-06, "loss": 0.7683, "step": 2404 }, { "epoch": 0.07021692797290591, "grad_norm": 0.7437391957631584, "learning_rate": 3.509924109748979e-06, "loss": 0.6941, "step": 2405 }, { "epoch": 0.07024612420075327, "grad_norm": 0.7261199679895819, "learning_rate": 3.51138353765324e-06, "loss": 0.6921, "step": 2406 }, { "epoch": 0.07027532042860063, "grad_norm": 0.8100599158219052, "learning_rate": 3.512842965557502e-06, "loss": 0.7615, "step": 2407 }, { "epoch": 0.07030451665644799, "grad_norm": 0.8835979992046259, "learning_rate": 3.514302393461763e-06, "loss": 0.8092, "step": 2408 }, { "epoch": 0.07033371288429535, "grad_norm": 0.8607276402911296, "learning_rate": 3.515761821366025e-06, "loss": 0.8048, "step": 2409 }, { "epoch": 0.07036290911214271, "grad_norm": 0.8088021498681774, "learning_rate": 3.5172212492702864e-06, "loss": 0.744, "step": 2410 }, { "epoch": 0.07039210533999007, "grad_norm": 0.7695987833132785, "learning_rate": 3.518680677174548e-06, "loss": 0.748, "step": 2411 }, { "epoch": 0.07042130156783744, "grad_norm": 0.779447457115176, "learning_rate": 3.5201401050788093e-06, "loss": 0.707, "step": 2412 }, { "epoch": 0.0704504977956848, "grad_norm": 0.8631588361588266, "learning_rate": 3.521599532983071e-06, "loss": 0.7996, "step": 2413 }, { "epoch": 0.07047969402353216, "grad_norm": 0.7727748524537572, "learning_rate": 3.5230589608873322e-06, "loss": 0.7274, "step": 2414 }, { "epoch": 0.07050889025137952, "grad_norm": 0.930433392369394, "learning_rate": 3.524518388791594e-06, "loss": 0.8674, "step": 2415 }, { "epoch": 0.07053808647922688, "grad_norm": 0.999906357650361, "learning_rate": 3.525977816695855e-06, "loss": 0.7708, "step": 2416 }, { "epoch": 0.07056728270707424, "grad_norm": 0.7769639155980811, "learning_rate": 3.5274372446001172e-06, "loss": 0.6839, "step": 2417 }, { "epoch": 0.0705964789349216, "grad_norm": 0.9273932637393194, "learning_rate": 3.528896672504379e-06, "loss": 0.7087, "step": 2418 }, { "epoch": 0.07062567516276896, "grad_norm": 0.7838244513057648, "learning_rate": 3.53035610040864e-06, "loss": 0.7553, "step": 2419 }, { "epoch": 0.07065487139061634, "grad_norm": 0.7205484568694611, "learning_rate": 3.531815528312902e-06, "loss": 0.6713, "step": 2420 }, { "epoch": 0.0706840676184637, "grad_norm": 0.8348333271736957, "learning_rate": 3.533274956217163e-06, "loss": 0.7689, "step": 2421 }, { "epoch": 0.07071326384631106, "grad_norm": 0.789349314304428, "learning_rate": 3.5347343841214247e-06, "loss": 0.7712, "step": 2422 }, { "epoch": 0.07074246007415842, "grad_norm": 0.8061767634077339, "learning_rate": 3.536193812025686e-06, "loss": 0.8077, "step": 2423 }, { "epoch": 0.07077165630200578, "grad_norm": 0.8156725481748609, "learning_rate": 3.537653239929948e-06, "loss": 0.8141, "step": 2424 }, { "epoch": 0.07080085252985314, "grad_norm": 0.8338970628013015, "learning_rate": 3.5391126678342093e-06, "loss": 0.8119, "step": 2425 }, { "epoch": 0.0708300487577005, "grad_norm": 0.7734663179210808, "learning_rate": 3.540572095738471e-06, "loss": 0.675, "step": 2426 }, { "epoch": 0.07085924498554787, "grad_norm": 0.7935959882729727, "learning_rate": 3.5420315236427322e-06, "loss": 0.7486, "step": 2427 }, { "epoch": 0.07088844121339523, "grad_norm": 0.7904785524973443, "learning_rate": 3.543490951546994e-06, "loss": 0.7579, "step": 2428 }, { "epoch": 0.07091763744124259, "grad_norm": 0.7794678583234587, "learning_rate": 3.544950379451255e-06, "loss": 0.6232, "step": 2429 }, { "epoch": 0.07094683366908995, "grad_norm": 0.8297455946437827, "learning_rate": 3.546409807355517e-06, "loss": 0.7523, "step": 2430 }, { "epoch": 0.07097602989693731, "grad_norm": 0.7886538709354325, "learning_rate": 3.547869235259778e-06, "loss": 0.7701, "step": 2431 }, { "epoch": 0.07100522612478467, "grad_norm": 0.9210525152020747, "learning_rate": 3.54932866316404e-06, "loss": 0.7716, "step": 2432 }, { "epoch": 0.07103442235263203, "grad_norm": 0.7799943741068219, "learning_rate": 3.5507880910683014e-06, "loss": 0.7108, "step": 2433 }, { "epoch": 0.0710636185804794, "grad_norm": 0.7448200947694126, "learning_rate": 3.552247518972563e-06, "loss": 0.6582, "step": 2434 }, { "epoch": 0.07109281480832677, "grad_norm": 0.7534566100385376, "learning_rate": 3.5537069468768247e-06, "loss": 0.7675, "step": 2435 }, { "epoch": 0.07112201103617413, "grad_norm": 0.7772967297913864, "learning_rate": 3.555166374781086e-06, "loss": 0.7895, "step": 2436 }, { "epoch": 0.0711512072640215, "grad_norm": 0.7553432478255631, "learning_rate": 3.5566258026853477e-06, "loss": 0.7687, "step": 2437 }, { "epoch": 0.07118040349186885, "grad_norm": 0.7607797305880437, "learning_rate": 3.558085230589609e-06, "loss": 0.683, "step": 2438 }, { "epoch": 0.07120959971971622, "grad_norm": 0.7846863938535849, "learning_rate": 3.559544658493871e-06, "loss": 0.7326, "step": 2439 }, { "epoch": 0.07123879594756358, "grad_norm": 0.7950527727030401, "learning_rate": 3.5610040863981322e-06, "loss": 0.7065, "step": 2440 }, { "epoch": 0.07126799217541094, "grad_norm": 1.0824052540349378, "learning_rate": 3.562463514302394e-06, "loss": 0.7997, "step": 2441 }, { "epoch": 0.0712971884032583, "grad_norm": 0.8622084617973299, "learning_rate": 3.563922942206655e-06, "loss": 0.823, "step": 2442 }, { "epoch": 0.07132638463110566, "grad_norm": 0.8164362358662439, "learning_rate": 3.565382370110917e-06, "loss": 0.7242, "step": 2443 }, { "epoch": 0.07135558085895302, "grad_norm": 1.1713841637843019, "learning_rate": 3.566841798015178e-06, "loss": 0.8429, "step": 2444 }, { "epoch": 0.07138477708680038, "grad_norm": 0.8585270395424641, "learning_rate": 3.5683012259194398e-06, "loss": 0.7368, "step": 2445 }, { "epoch": 0.07141397331464774, "grad_norm": 0.7241106891332549, "learning_rate": 3.569760653823701e-06, "loss": 0.6301, "step": 2446 }, { "epoch": 0.0714431695424951, "grad_norm": 0.7596451320112821, "learning_rate": 3.571220081727963e-06, "loss": 0.702, "step": 2447 }, { "epoch": 0.07147236577034247, "grad_norm": 0.8807167506847005, "learning_rate": 3.5726795096322243e-06, "loss": 0.7825, "step": 2448 }, { "epoch": 0.07150156199818983, "grad_norm": 0.9527512962161776, "learning_rate": 3.574138937536486e-06, "loss": 0.7744, "step": 2449 }, { "epoch": 0.0715307582260372, "grad_norm": 0.8044775505664873, "learning_rate": 3.5755983654407477e-06, "loss": 0.7255, "step": 2450 }, { "epoch": 0.07155995445388456, "grad_norm": 0.7571512176629156, "learning_rate": 3.577057793345009e-06, "loss": 0.7018, "step": 2451 }, { "epoch": 0.07158915068173193, "grad_norm": 0.8180557618843425, "learning_rate": 3.578517221249271e-06, "loss": 0.6943, "step": 2452 }, { "epoch": 0.07161834690957929, "grad_norm": 0.8311226129716153, "learning_rate": 3.579976649153532e-06, "loss": 0.8211, "step": 2453 }, { "epoch": 0.07164754313742665, "grad_norm": 0.7534052374161727, "learning_rate": 3.581436077057794e-06, "loss": 0.698, "step": 2454 }, { "epoch": 0.07167673936527401, "grad_norm": 0.7622080793417111, "learning_rate": 3.582895504962055e-06, "loss": 0.7315, "step": 2455 }, { "epoch": 0.07170593559312137, "grad_norm": 0.7806667476403888, "learning_rate": 3.584354932866317e-06, "loss": 0.7056, "step": 2456 }, { "epoch": 0.07173513182096873, "grad_norm": 0.8162132845885868, "learning_rate": 3.585814360770578e-06, "loss": 0.8111, "step": 2457 }, { "epoch": 0.07176432804881609, "grad_norm": 0.8653204664651726, "learning_rate": 3.5872737886748398e-06, "loss": 0.803, "step": 2458 }, { "epoch": 0.07179352427666345, "grad_norm": 0.7927644649577599, "learning_rate": 3.588733216579101e-06, "loss": 0.7226, "step": 2459 }, { "epoch": 0.07182272050451081, "grad_norm": 0.8049574614834516, "learning_rate": 3.590192644483363e-06, "loss": 0.7926, "step": 2460 }, { "epoch": 0.07185191673235818, "grad_norm": 0.7632348331669269, "learning_rate": 3.5916520723876244e-06, "loss": 0.6818, "step": 2461 }, { "epoch": 0.07188111296020554, "grad_norm": 0.7946048221297514, "learning_rate": 3.593111500291886e-06, "loss": 0.7486, "step": 2462 }, { "epoch": 0.0719103091880529, "grad_norm": 0.7926803277228313, "learning_rate": 3.5945709281961473e-06, "loss": 0.6473, "step": 2463 }, { "epoch": 0.07193950541590026, "grad_norm": 0.8621616145258687, "learning_rate": 3.596030356100409e-06, "loss": 0.8027, "step": 2464 }, { "epoch": 0.07196870164374763, "grad_norm": 0.7965560258549924, "learning_rate": 3.59748978400467e-06, "loss": 0.7107, "step": 2465 }, { "epoch": 0.071997897871595, "grad_norm": 0.8417762691968549, "learning_rate": 3.598949211908932e-06, "loss": 0.7138, "step": 2466 }, { "epoch": 0.07202709409944236, "grad_norm": 0.819421540112931, "learning_rate": 3.600408639813194e-06, "loss": 0.8031, "step": 2467 }, { "epoch": 0.07205629032728972, "grad_norm": 1.1440247221280415, "learning_rate": 3.601868067717455e-06, "loss": 0.8265, "step": 2468 }, { "epoch": 0.07208548655513708, "grad_norm": 0.9552576386044856, "learning_rate": 3.603327495621717e-06, "loss": 0.7674, "step": 2469 }, { "epoch": 0.07211468278298444, "grad_norm": 0.7644441676362096, "learning_rate": 3.604786923525978e-06, "loss": 0.7493, "step": 2470 }, { "epoch": 0.0721438790108318, "grad_norm": 0.7731547612492697, "learning_rate": 3.6062463514302398e-06, "loss": 0.7267, "step": 2471 }, { "epoch": 0.07217307523867916, "grad_norm": 0.8645581680105243, "learning_rate": 3.607705779334501e-06, "loss": 0.7631, "step": 2472 }, { "epoch": 0.07220227146652652, "grad_norm": 0.7824933022297009, "learning_rate": 3.6091652072387627e-06, "loss": 0.7596, "step": 2473 }, { "epoch": 0.07223146769437389, "grad_norm": 0.8333954565045679, "learning_rate": 3.610624635143024e-06, "loss": 0.7702, "step": 2474 }, { "epoch": 0.07226066392222125, "grad_norm": 0.7768461427986207, "learning_rate": 3.612084063047286e-06, "loss": 0.6966, "step": 2475 }, { "epoch": 0.07228986015006861, "grad_norm": 0.7719031918917183, "learning_rate": 3.6135434909515473e-06, "loss": 0.713, "step": 2476 }, { "epoch": 0.07231905637791597, "grad_norm": 0.78029739715065, "learning_rate": 3.615002918855809e-06, "loss": 0.7408, "step": 2477 }, { "epoch": 0.07234825260576333, "grad_norm": 0.9145884997630456, "learning_rate": 3.61646234676007e-06, "loss": 0.8522, "step": 2478 }, { "epoch": 0.07237744883361069, "grad_norm": 0.8499652853261099, "learning_rate": 3.617921774664332e-06, "loss": 0.657, "step": 2479 }, { "epoch": 0.07240664506145807, "grad_norm": 0.7851219930598672, "learning_rate": 3.619381202568593e-06, "loss": 0.6875, "step": 2480 }, { "epoch": 0.07243584128930543, "grad_norm": 0.7618713795620721, "learning_rate": 3.620840630472855e-06, "loss": 0.7355, "step": 2481 }, { "epoch": 0.07246503751715279, "grad_norm": 0.8700869754707437, "learning_rate": 3.622300058377117e-06, "loss": 0.8187, "step": 2482 }, { "epoch": 0.07249423374500015, "grad_norm": 0.7901924634359214, "learning_rate": 3.623759486281378e-06, "loss": 0.737, "step": 2483 }, { "epoch": 0.07252342997284751, "grad_norm": 0.8484014921758204, "learning_rate": 3.62521891418564e-06, "loss": 0.8416, "step": 2484 }, { "epoch": 0.07255262620069487, "grad_norm": 0.8887040456038863, "learning_rate": 3.626678342089901e-06, "loss": 0.8251, "step": 2485 }, { "epoch": 0.07258182242854223, "grad_norm": 1.0290841521491618, "learning_rate": 3.6281377699941627e-06, "loss": 0.7252, "step": 2486 }, { "epoch": 0.0726110186563896, "grad_norm": 0.8072778640193041, "learning_rate": 3.629597197898424e-06, "loss": 0.7922, "step": 2487 }, { "epoch": 0.07264021488423696, "grad_norm": 0.7172699736624681, "learning_rate": 3.6310566258026856e-06, "loss": 0.7161, "step": 2488 }, { "epoch": 0.07266941111208432, "grad_norm": 0.83684514681179, "learning_rate": 3.632516053706947e-06, "loss": 0.7976, "step": 2489 }, { "epoch": 0.07269860733993168, "grad_norm": 0.7704811127597668, "learning_rate": 3.633975481611209e-06, "loss": 0.7431, "step": 2490 }, { "epoch": 0.07272780356777904, "grad_norm": 0.77232413738858, "learning_rate": 3.6354349095154702e-06, "loss": 0.6521, "step": 2491 }, { "epoch": 0.0727569997956264, "grad_norm": 0.7993294871784488, "learning_rate": 3.636894337419732e-06, "loss": 0.75, "step": 2492 }, { "epoch": 0.07278619602347376, "grad_norm": 0.7657383553222115, "learning_rate": 3.638353765323993e-06, "loss": 0.7277, "step": 2493 }, { "epoch": 0.07281539225132112, "grad_norm": 0.8186017742527634, "learning_rate": 3.639813193228255e-06, "loss": 0.7753, "step": 2494 }, { "epoch": 0.0728445884791685, "grad_norm": 0.7348759358153081, "learning_rate": 3.641272621132516e-06, "loss": 0.6887, "step": 2495 }, { "epoch": 0.07287378470701586, "grad_norm": 0.7371483469647575, "learning_rate": 3.6427320490367777e-06, "loss": 0.6909, "step": 2496 }, { "epoch": 0.07290298093486322, "grad_norm": 0.7670801820045657, "learning_rate": 3.644191476941039e-06, "loss": 0.6408, "step": 2497 }, { "epoch": 0.07293217716271058, "grad_norm": 0.7663506027868465, "learning_rate": 3.645650904845301e-06, "loss": 0.7113, "step": 2498 }, { "epoch": 0.07296137339055794, "grad_norm": 0.7688850398325403, "learning_rate": 3.6471103327495627e-06, "loss": 0.6744, "step": 2499 }, { "epoch": 0.0729905696184053, "grad_norm": 0.7856392842329614, "learning_rate": 3.648569760653824e-06, "loss": 0.6892, "step": 2500 }, { "epoch": 0.07301976584625267, "grad_norm": 0.7571393130076917, "learning_rate": 3.6500291885580857e-06, "loss": 0.6616, "step": 2501 }, { "epoch": 0.07304896207410003, "grad_norm": 0.9709101605826809, "learning_rate": 3.651488616462347e-06, "loss": 0.8755, "step": 2502 }, { "epoch": 0.07307815830194739, "grad_norm": 0.8823513171113917, "learning_rate": 3.6529480443666086e-06, "loss": 0.7913, "step": 2503 }, { "epoch": 0.07310735452979475, "grad_norm": 0.8384596597154426, "learning_rate": 3.65440747227087e-06, "loss": 0.6682, "step": 2504 }, { "epoch": 0.07313655075764211, "grad_norm": 0.8921491571788377, "learning_rate": 3.655866900175132e-06, "loss": 0.832, "step": 2505 }, { "epoch": 0.07316574698548947, "grad_norm": 0.816411897327985, "learning_rate": 3.657326328079393e-06, "loss": 0.8275, "step": 2506 }, { "epoch": 0.07319494321333683, "grad_norm": 0.7748233211025819, "learning_rate": 3.658785755983655e-06, "loss": 0.7832, "step": 2507 }, { "epoch": 0.0732241394411842, "grad_norm": 0.904903377111384, "learning_rate": 3.660245183887916e-06, "loss": 0.7062, "step": 2508 }, { "epoch": 0.07325333566903156, "grad_norm": 0.8238160196080552, "learning_rate": 3.6617046117921777e-06, "loss": 0.7851, "step": 2509 }, { "epoch": 0.07328253189687892, "grad_norm": 0.7973328215079588, "learning_rate": 3.663164039696439e-06, "loss": 0.8261, "step": 2510 }, { "epoch": 0.07331172812472629, "grad_norm": 0.856395208606886, "learning_rate": 3.6646234676007007e-06, "loss": 0.8292, "step": 2511 }, { "epoch": 0.07334092435257365, "grad_norm": 0.7524488140940597, "learning_rate": 3.666082895504962e-06, "loss": 0.6719, "step": 2512 }, { "epoch": 0.07337012058042101, "grad_norm": 0.8006843114701246, "learning_rate": 3.667542323409224e-06, "loss": 0.71, "step": 2513 }, { "epoch": 0.07339931680826838, "grad_norm": 0.7827791361641061, "learning_rate": 3.6690017513134857e-06, "loss": 0.7066, "step": 2514 }, { "epoch": 0.07342851303611574, "grad_norm": 0.9809711516145369, "learning_rate": 3.670461179217747e-06, "loss": 0.7436, "step": 2515 }, { "epoch": 0.0734577092639631, "grad_norm": 0.7951138561928435, "learning_rate": 3.6719206071220086e-06, "loss": 0.815, "step": 2516 }, { "epoch": 0.07348690549181046, "grad_norm": 0.7635089033678204, "learning_rate": 3.67338003502627e-06, "loss": 0.7295, "step": 2517 }, { "epoch": 0.07351610171965782, "grad_norm": 0.7132416795639427, "learning_rate": 3.674839462930532e-06, "loss": 0.6832, "step": 2518 }, { "epoch": 0.07354529794750518, "grad_norm": 0.7806295794541583, "learning_rate": 3.676298890834793e-06, "loss": 0.7957, "step": 2519 }, { "epoch": 0.07357449417535254, "grad_norm": 0.7720602516543814, "learning_rate": 3.677758318739055e-06, "loss": 0.7266, "step": 2520 }, { "epoch": 0.0736036904031999, "grad_norm": 0.8616153126889593, "learning_rate": 3.679217746643316e-06, "loss": 0.7871, "step": 2521 }, { "epoch": 0.07363288663104726, "grad_norm": 0.7805903928647583, "learning_rate": 3.6806771745475778e-06, "loss": 0.7314, "step": 2522 }, { "epoch": 0.07366208285889463, "grad_norm": 0.8166843714775334, "learning_rate": 3.682136602451839e-06, "loss": 0.7922, "step": 2523 }, { "epoch": 0.07369127908674199, "grad_norm": 0.7918326231596148, "learning_rate": 3.6835960303561007e-06, "loss": 0.772, "step": 2524 }, { "epoch": 0.07372047531458935, "grad_norm": 0.83448404993997, "learning_rate": 3.685055458260362e-06, "loss": 0.8344, "step": 2525 }, { "epoch": 0.07374967154243672, "grad_norm": 0.7972597375265918, "learning_rate": 3.686514886164624e-06, "loss": 0.8117, "step": 2526 }, { "epoch": 0.07377886777028408, "grad_norm": 0.7949917603275444, "learning_rate": 3.6879743140688853e-06, "loss": 0.7557, "step": 2527 }, { "epoch": 0.07380806399813145, "grad_norm": 0.7970782074009233, "learning_rate": 3.689433741973147e-06, "loss": 0.8158, "step": 2528 }, { "epoch": 0.0738372602259788, "grad_norm": 0.7871513158998446, "learning_rate": 3.690893169877408e-06, "loss": 0.7561, "step": 2529 }, { "epoch": 0.07386645645382617, "grad_norm": 0.8134703004167027, "learning_rate": 3.69235259778167e-06, "loss": 0.7301, "step": 2530 }, { "epoch": 0.07389565268167353, "grad_norm": 0.8579117683401863, "learning_rate": 3.6938120256859315e-06, "loss": 0.7787, "step": 2531 }, { "epoch": 0.07392484890952089, "grad_norm": 0.7364459502661175, "learning_rate": 3.6952714535901928e-06, "loss": 0.6471, "step": 2532 }, { "epoch": 0.07395404513736825, "grad_norm": 0.8869825682094455, "learning_rate": 3.696730881494455e-06, "loss": 0.7548, "step": 2533 }, { "epoch": 0.07398324136521561, "grad_norm": 0.7535032347963275, "learning_rate": 3.698190309398716e-06, "loss": 0.6939, "step": 2534 }, { "epoch": 0.07401243759306297, "grad_norm": 0.7258162900847233, "learning_rate": 3.6996497373029778e-06, "loss": 0.656, "step": 2535 }, { "epoch": 0.07404163382091034, "grad_norm": 0.7649775488426818, "learning_rate": 3.701109165207239e-06, "loss": 0.6784, "step": 2536 }, { "epoch": 0.0740708300487577, "grad_norm": 0.8081418953690143, "learning_rate": 3.7025685931115007e-06, "loss": 0.7921, "step": 2537 }, { "epoch": 0.07410002627660506, "grad_norm": 0.806915551940351, "learning_rate": 3.704028021015762e-06, "loss": 0.7942, "step": 2538 }, { "epoch": 0.07412922250445242, "grad_norm": 0.8323642783581691, "learning_rate": 3.7054874489200236e-06, "loss": 0.7766, "step": 2539 }, { "epoch": 0.07415841873229978, "grad_norm": 0.8932613294966165, "learning_rate": 3.706946876824285e-06, "loss": 0.7973, "step": 2540 }, { "epoch": 0.07418761496014716, "grad_norm": 0.7757512225803959, "learning_rate": 3.708406304728547e-06, "loss": 0.7472, "step": 2541 }, { "epoch": 0.07421681118799452, "grad_norm": 0.7558676155784795, "learning_rate": 3.709865732632808e-06, "loss": 0.7155, "step": 2542 }, { "epoch": 0.07424600741584188, "grad_norm": 0.8146854216275371, "learning_rate": 3.71132516053707e-06, "loss": 0.8025, "step": 2543 }, { "epoch": 0.07427520364368924, "grad_norm": 0.7856752955844692, "learning_rate": 3.712784588441331e-06, "loss": 0.7599, "step": 2544 }, { "epoch": 0.0743043998715366, "grad_norm": 0.814396566458274, "learning_rate": 3.7142440163455928e-06, "loss": 0.7989, "step": 2545 }, { "epoch": 0.07433359609938396, "grad_norm": 0.822321387489256, "learning_rate": 3.7157034442498544e-06, "loss": 0.7269, "step": 2546 }, { "epoch": 0.07436279232723132, "grad_norm": 0.7824497731828133, "learning_rate": 3.7171628721541157e-06, "loss": 0.825, "step": 2547 }, { "epoch": 0.07439198855507868, "grad_norm": 0.8029071918201903, "learning_rate": 3.718622300058378e-06, "loss": 0.8599, "step": 2548 }, { "epoch": 0.07442118478292604, "grad_norm": 0.8862322558674137, "learning_rate": 3.720081727962639e-06, "loss": 0.7101, "step": 2549 }, { "epoch": 0.0744503810107734, "grad_norm": 0.7617791191485339, "learning_rate": 3.7215411558669007e-06, "loss": 0.7427, "step": 2550 }, { "epoch": 0.07447957723862077, "grad_norm": 0.7576365482214737, "learning_rate": 3.723000583771162e-06, "loss": 0.7078, "step": 2551 }, { "epoch": 0.07450877346646813, "grad_norm": 0.7194541277902488, "learning_rate": 3.7244600116754236e-06, "loss": 0.6834, "step": 2552 }, { "epoch": 0.07453796969431549, "grad_norm": 0.7454457348176127, "learning_rate": 3.725919439579685e-06, "loss": 0.7213, "step": 2553 }, { "epoch": 0.07456716592216285, "grad_norm": 0.7818229621754624, "learning_rate": 3.7273788674839465e-06, "loss": 0.7443, "step": 2554 }, { "epoch": 0.07459636215001021, "grad_norm": 0.7063199464361346, "learning_rate": 3.7288382953882078e-06, "loss": 0.5533, "step": 2555 }, { "epoch": 0.07462555837785759, "grad_norm": 0.7526265963291429, "learning_rate": 3.73029772329247e-06, "loss": 0.7158, "step": 2556 }, { "epoch": 0.07465475460570495, "grad_norm": 0.8592738524034376, "learning_rate": 3.731757151196731e-06, "loss": 0.8125, "step": 2557 }, { "epoch": 0.07468395083355231, "grad_norm": 0.7997620512673574, "learning_rate": 3.733216579100993e-06, "loss": 0.7524, "step": 2558 }, { "epoch": 0.07471314706139967, "grad_norm": 0.7863210860192918, "learning_rate": 3.734676007005254e-06, "loss": 0.6972, "step": 2559 }, { "epoch": 0.07474234328924703, "grad_norm": 1.0071119666763961, "learning_rate": 3.7361354349095157e-06, "loss": 0.816, "step": 2560 }, { "epoch": 0.07477153951709439, "grad_norm": 0.8430227617562834, "learning_rate": 3.737594862813777e-06, "loss": 0.7798, "step": 2561 }, { "epoch": 0.07480073574494175, "grad_norm": 0.7384334388658756, "learning_rate": 3.7390542907180386e-06, "loss": 0.6343, "step": 2562 }, { "epoch": 0.07482993197278912, "grad_norm": 0.8595210091750072, "learning_rate": 3.7405137186223007e-06, "loss": 0.7663, "step": 2563 }, { "epoch": 0.07485912820063648, "grad_norm": 0.7675214660551074, "learning_rate": 3.741973146526562e-06, "loss": 0.6812, "step": 2564 }, { "epoch": 0.07488832442848384, "grad_norm": 0.8538882398520062, "learning_rate": 3.7434325744308236e-06, "loss": 0.8339, "step": 2565 }, { "epoch": 0.0749175206563312, "grad_norm": 0.7926224850974191, "learning_rate": 3.744892002335085e-06, "loss": 0.7098, "step": 2566 }, { "epoch": 0.07494671688417856, "grad_norm": 0.8002125589137714, "learning_rate": 3.7463514302393466e-06, "loss": 0.7237, "step": 2567 }, { "epoch": 0.07497591311202592, "grad_norm": 0.795097235124565, "learning_rate": 3.747810858143608e-06, "loss": 0.7112, "step": 2568 }, { "epoch": 0.07500510933987328, "grad_norm": 0.8266123065253247, "learning_rate": 3.74927028604787e-06, "loss": 0.8146, "step": 2569 }, { "epoch": 0.07503430556772064, "grad_norm": 0.7330057024772132, "learning_rate": 3.7507297139521307e-06, "loss": 0.6616, "step": 2570 }, { "epoch": 0.07506350179556802, "grad_norm": 0.8076841737954075, "learning_rate": 3.752189141856393e-06, "loss": 0.8068, "step": 2571 }, { "epoch": 0.07509269802341538, "grad_norm": 0.7687961845451378, "learning_rate": 3.753648569760654e-06, "loss": 0.7413, "step": 2572 }, { "epoch": 0.07512189425126274, "grad_norm": 0.7922691721127841, "learning_rate": 3.7551079976649157e-06, "loss": 0.7393, "step": 2573 }, { "epoch": 0.0751510904791101, "grad_norm": 0.8829381293756442, "learning_rate": 3.756567425569177e-06, "loss": 0.6313, "step": 2574 }, { "epoch": 0.07518028670695746, "grad_norm": 0.753970959061499, "learning_rate": 3.7580268534734386e-06, "loss": 0.7023, "step": 2575 }, { "epoch": 0.07520948293480482, "grad_norm": 0.7784443377557299, "learning_rate": 3.7594862813777e-06, "loss": 0.7331, "step": 2576 }, { "epoch": 0.07523867916265219, "grad_norm": 1.3341430483416197, "learning_rate": 3.760945709281962e-06, "loss": 0.7119, "step": 2577 }, { "epoch": 0.07526787539049955, "grad_norm": 0.6802269480958486, "learning_rate": 3.7624051371862237e-06, "loss": 0.5858, "step": 2578 }, { "epoch": 0.07529707161834691, "grad_norm": 0.8446099454463886, "learning_rate": 3.763864565090485e-06, "loss": 0.8282, "step": 2579 }, { "epoch": 0.07532626784619427, "grad_norm": 0.9752011152591332, "learning_rate": 3.7653239929947466e-06, "loss": 0.8429, "step": 2580 }, { "epoch": 0.07535546407404163, "grad_norm": 1.0201109855807335, "learning_rate": 3.766783420899008e-06, "loss": 0.8727, "step": 2581 }, { "epoch": 0.07538466030188899, "grad_norm": 0.7729125754215799, "learning_rate": 3.7682428488032695e-06, "loss": 0.8042, "step": 2582 }, { "epoch": 0.07541385652973635, "grad_norm": 0.7924364415782873, "learning_rate": 3.7697022767075307e-06, "loss": 0.7759, "step": 2583 }, { "epoch": 0.07544305275758371, "grad_norm": 0.7425207040814277, "learning_rate": 3.771161704611793e-06, "loss": 0.6793, "step": 2584 }, { "epoch": 0.07547224898543108, "grad_norm": 0.7346679270043577, "learning_rate": 3.772621132516054e-06, "loss": 0.6721, "step": 2585 }, { "epoch": 0.07550144521327845, "grad_norm": 0.9301972589282301, "learning_rate": 3.7740805604203157e-06, "loss": 0.8361, "step": 2586 }, { "epoch": 0.07553064144112581, "grad_norm": 1.4218577135366013, "learning_rate": 3.775539988324577e-06, "loss": 0.7549, "step": 2587 }, { "epoch": 0.07555983766897317, "grad_norm": 0.7588202646105561, "learning_rate": 3.7769994162288387e-06, "loss": 0.7469, "step": 2588 }, { "epoch": 0.07558903389682053, "grad_norm": 0.7655628626552708, "learning_rate": 3.7784588441331e-06, "loss": 0.7026, "step": 2589 }, { "epoch": 0.0756182301246679, "grad_norm": 0.7535277145009197, "learning_rate": 3.7799182720373616e-06, "loss": 0.7237, "step": 2590 }, { "epoch": 0.07564742635251526, "grad_norm": 0.8531545179890861, "learning_rate": 3.781377699941623e-06, "loss": 0.7331, "step": 2591 }, { "epoch": 0.07567662258036262, "grad_norm": 0.8304847727563798, "learning_rate": 3.782837127845885e-06, "loss": 0.8401, "step": 2592 }, { "epoch": 0.07570581880820998, "grad_norm": 0.9456283431313933, "learning_rate": 3.784296555750146e-06, "loss": 0.7788, "step": 2593 }, { "epoch": 0.07573501503605734, "grad_norm": 0.7341597723483784, "learning_rate": 3.785755983654408e-06, "loss": 0.6187, "step": 2594 }, { "epoch": 0.0757642112639047, "grad_norm": 0.7349796137084504, "learning_rate": 3.7872154115586695e-06, "loss": 0.7085, "step": 2595 }, { "epoch": 0.07579340749175206, "grad_norm": 0.7210684377399422, "learning_rate": 3.7886748394629308e-06, "loss": 0.6387, "step": 2596 }, { "epoch": 0.07582260371959942, "grad_norm": 0.9721678970023706, "learning_rate": 3.7901342673671924e-06, "loss": 0.7816, "step": 2597 }, { "epoch": 0.07585179994744679, "grad_norm": 0.7879501219290963, "learning_rate": 3.7915936952714537e-06, "loss": 0.7537, "step": 2598 }, { "epoch": 0.07588099617529415, "grad_norm": 0.7947205979403573, "learning_rate": 3.7930531231757158e-06, "loss": 0.7079, "step": 2599 }, { "epoch": 0.07591019240314151, "grad_norm": 0.7899935100793372, "learning_rate": 3.794512551079977e-06, "loss": 0.7329, "step": 2600 }, { "epoch": 0.07593938863098888, "grad_norm": 0.8381826780797386, "learning_rate": 3.7959719789842387e-06, "loss": 0.7757, "step": 2601 }, { "epoch": 0.07596858485883624, "grad_norm": 0.7741190608336577, "learning_rate": 3.7974314068885e-06, "loss": 0.7576, "step": 2602 }, { "epoch": 0.0759977810866836, "grad_norm": 0.7941459280898043, "learning_rate": 3.7988908347927616e-06, "loss": 0.7451, "step": 2603 }, { "epoch": 0.07602697731453097, "grad_norm": 0.8587107796251147, "learning_rate": 3.800350262697023e-06, "loss": 0.8562, "step": 2604 }, { "epoch": 0.07605617354237833, "grad_norm": 0.847322914749792, "learning_rate": 3.8018096906012845e-06, "loss": 0.8266, "step": 2605 }, { "epoch": 0.07608536977022569, "grad_norm": 0.7666706520416662, "learning_rate": 3.8032691185055458e-06, "loss": 0.6685, "step": 2606 }, { "epoch": 0.07611456599807305, "grad_norm": 0.7890577788990921, "learning_rate": 3.804728546409808e-06, "loss": 0.6917, "step": 2607 }, { "epoch": 0.07614376222592041, "grad_norm": 0.7664722742842509, "learning_rate": 3.806187974314069e-06, "loss": 0.7123, "step": 2608 }, { "epoch": 0.07617295845376777, "grad_norm": 0.852608981595455, "learning_rate": 3.8076474022183308e-06, "loss": 0.7981, "step": 2609 }, { "epoch": 0.07620215468161513, "grad_norm": 0.82734078870851, "learning_rate": 3.8091068301225924e-06, "loss": 0.7741, "step": 2610 }, { "epoch": 0.0762313509094625, "grad_norm": 0.8117142612847378, "learning_rate": 3.8105662580268537e-06, "loss": 0.7211, "step": 2611 }, { "epoch": 0.07626054713730986, "grad_norm": 0.8487781980157296, "learning_rate": 3.8120256859311154e-06, "loss": 0.799, "step": 2612 }, { "epoch": 0.07628974336515722, "grad_norm": 0.8632978435875507, "learning_rate": 3.8134851138353766e-06, "loss": 0.8464, "step": 2613 }, { "epoch": 0.07631893959300458, "grad_norm": 0.778395763263276, "learning_rate": 3.814944541739639e-06, "loss": 0.7142, "step": 2614 }, { "epoch": 0.07634813582085194, "grad_norm": 0.8261096840504659, "learning_rate": 3.8164039696439e-06, "loss": 0.6399, "step": 2615 }, { "epoch": 0.07637733204869931, "grad_norm": 0.8072197721299414, "learning_rate": 3.817863397548161e-06, "loss": 0.7955, "step": 2616 }, { "epoch": 0.07640652827654668, "grad_norm": 0.9013716139706762, "learning_rate": 3.8193228254524224e-06, "loss": 0.7772, "step": 2617 }, { "epoch": 0.07643572450439404, "grad_norm": 0.769151553713528, "learning_rate": 3.8207822533566845e-06, "loss": 0.706, "step": 2618 }, { "epoch": 0.0764649207322414, "grad_norm": 0.8497288574682924, "learning_rate": 3.822241681260946e-06, "loss": 0.8311, "step": 2619 }, { "epoch": 0.07649411696008876, "grad_norm": 0.8535860891462765, "learning_rate": 3.823701109165208e-06, "loss": 0.8392, "step": 2620 }, { "epoch": 0.07652331318793612, "grad_norm": 0.8222872948290963, "learning_rate": 3.825160537069469e-06, "loss": 0.7932, "step": 2621 }, { "epoch": 0.07655250941578348, "grad_norm": 0.7166161927178163, "learning_rate": 3.82661996497373e-06, "loss": 0.6278, "step": 2622 }, { "epoch": 0.07658170564363084, "grad_norm": 0.8695170979747571, "learning_rate": 3.828079392877992e-06, "loss": 0.8152, "step": 2623 }, { "epoch": 0.0766109018714782, "grad_norm": 0.8664645077762717, "learning_rate": 3.829538820782254e-06, "loss": 0.8221, "step": 2624 }, { "epoch": 0.07664009809932557, "grad_norm": 0.817373256016829, "learning_rate": 3.830998248686515e-06, "loss": 0.7499, "step": 2625 }, { "epoch": 0.07666929432717293, "grad_norm": 0.8136633086873247, "learning_rate": 3.832457676590777e-06, "loss": 0.8103, "step": 2626 }, { "epoch": 0.07669849055502029, "grad_norm": 0.8272144417046358, "learning_rate": 3.833917104495038e-06, "loss": 0.7234, "step": 2627 }, { "epoch": 0.07672768678286765, "grad_norm": 0.8161557681226631, "learning_rate": 3.8353765323992995e-06, "loss": 0.7407, "step": 2628 }, { "epoch": 0.07675688301071501, "grad_norm": 0.8838009319529164, "learning_rate": 3.836835960303562e-06, "loss": 0.8062, "step": 2629 }, { "epoch": 0.07678607923856237, "grad_norm": 0.9097060139034218, "learning_rate": 3.838295388207823e-06, "loss": 0.8444, "step": 2630 }, { "epoch": 0.07681527546640975, "grad_norm": 0.7559537111566085, "learning_rate": 3.839754816112085e-06, "loss": 0.6483, "step": 2631 }, { "epoch": 0.07684447169425711, "grad_norm": 0.8586859464495843, "learning_rate": 3.841214244016346e-06, "loss": 0.7546, "step": 2632 }, { "epoch": 0.07687366792210447, "grad_norm": 0.7665065012472837, "learning_rate": 3.8426736719206075e-06, "loss": 0.7417, "step": 2633 }, { "epoch": 0.07690286414995183, "grad_norm": 0.7893329371404533, "learning_rate": 3.844133099824869e-06, "loss": 0.7507, "step": 2634 }, { "epoch": 0.07693206037779919, "grad_norm": 0.7915078347635203, "learning_rate": 3.845592527729131e-06, "loss": 0.7109, "step": 2635 }, { "epoch": 0.07696125660564655, "grad_norm": 0.7639591711904336, "learning_rate": 3.847051955633392e-06, "loss": 0.7625, "step": 2636 }, { "epoch": 0.07699045283349391, "grad_norm": 1.043805511869859, "learning_rate": 3.848511383537653e-06, "loss": 0.8776, "step": 2637 }, { "epoch": 0.07701964906134127, "grad_norm": 0.825791362280053, "learning_rate": 3.8499708114419145e-06, "loss": 0.7002, "step": 2638 }, { "epoch": 0.07704884528918864, "grad_norm": 0.8327707988608414, "learning_rate": 3.851430239346177e-06, "loss": 0.807, "step": 2639 }, { "epoch": 0.077078041517036, "grad_norm": 0.9066138713036959, "learning_rate": 3.852889667250438e-06, "loss": 0.87, "step": 2640 }, { "epoch": 0.07710723774488336, "grad_norm": 0.7519931501274439, "learning_rate": 3.8543490951547e-06, "loss": 0.6661, "step": 2641 }, { "epoch": 0.07713643397273072, "grad_norm": 0.7918531458273113, "learning_rate": 3.855808523058961e-06, "loss": 0.8303, "step": 2642 }, { "epoch": 0.07716563020057808, "grad_norm": 0.7437902728133963, "learning_rate": 3.8572679509632225e-06, "loss": 0.6917, "step": 2643 }, { "epoch": 0.07719482642842544, "grad_norm": 0.811639011871154, "learning_rate": 3.8587273788674846e-06, "loss": 0.7685, "step": 2644 }, { "epoch": 0.0772240226562728, "grad_norm": 0.8690234664745509, "learning_rate": 3.860186806771746e-06, "loss": 0.7959, "step": 2645 }, { "epoch": 0.07725321888412018, "grad_norm": 0.9136873722838977, "learning_rate": 3.861646234676008e-06, "loss": 0.7673, "step": 2646 }, { "epoch": 0.07728241511196754, "grad_norm": 1.0419920646981993, "learning_rate": 3.863105662580269e-06, "loss": 0.7307, "step": 2647 }, { "epoch": 0.0773116113398149, "grad_norm": 0.9290021276441242, "learning_rate": 3.86456509048453e-06, "loss": 0.7687, "step": 2648 }, { "epoch": 0.07734080756766226, "grad_norm": 0.9085437011721632, "learning_rate": 3.866024518388792e-06, "loss": 0.875, "step": 2649 }, { "epoch": 0.07737000379550962, "grad_norm": 0.8449602762923699, "learning_rate": 3.867483946293054e-06, "loss": 0.7562, "step": 2650 }, { "epoch": 0.07739920002335698, "grad_norm": 0.8248040500447399, "learning_rate": 3.868943374197315e-06, "loss": 0.8441, "step": 2651 }, { "epoch": 0.07742839625120435, "grad_norm": 0.781933794625144, "learning_rate": 3.870402802101576e-06, "loss": 0.7445, "step": 2652 }, { "epoch": 0.0774575924790517, "grad_norm": 0.7151466138590499, "learning_rate": 3.8718622300058375e-06, "loss": 0.6642, "step": 2653 }, { "epoch": 0.07748678870689907, "grad_norm": 0.8462758697746685, "learning_rate": 3.8733216579100996e-06, "loss": 0.8077, "step": 2654 }, { "epoch": 0.07751598493474643, "grad_norm": 0.7771767978004643, "learning_rate": 3.874781085814361e-06, "loss": 0.6706, "step": 2655 }, { "epoch": 0.07754518116259379, "grad_norm": 0.8315864770035021, "learning_rate": 3.876240513718623e-06, "loss": 0.7573, "step": 2656 }, { "epoch": 0.07757437739044115, "grad_norm": 0.7331315954755352, "learning_rate": 3.877699941622884e-06, "loss": 0.6806, "step": 2657 }, { "epoch": 0.07760357361828851, "grad_norm": 0.7757789416747909, "learning_rate": 3.879159369527145e-06, "loss": 0.7479, "step": 2658 }, { "epoch": 0.07763276984613587, "grad_norm": 0.8614214625719825, "learning_rate": 3.8806187974314075e-06, "loss": 0.8042, "step": 2659 }, { "epoch": 0.07766196607398324, "grad_norm": 0.7741440611242434, "learning_rate": 3.882078225335669e-06, "loss": 0.6646, "step": 2660 }, { "epoch": 0.07769116230183061, "grad_norm": 0.7652728159819396, "learning_rate": 3.883537653239931e-06, "loss": 0.7064, "step": 2661 }, { "epoch": 0.07772035852967797, "grad_norm": 0.9104052034543206, "learning_rate": 3.884997081144192e-06, "loss": 0.955, "step": 2662 }, { "epoch": 0.07774955475752533, "grad_norm": 0.7652637863056916, "learning_rate": 3.886456509048453e-06, "loss": 0.6474, "step": 2663 }, { "epoch": 0.0777787509853727, "grad_norm": 0.7735705469156472, "learning_rate": 3.8879159369527146e-06, "loss": 0.7414, "step": 2664 }, { "epoch": 0.07780794721322006, "grad_norm": 1.1535843665611405, "learning_rate": 3.889375364856977e-06, "loss": 0.7671, "step": 2665 }, { "epoch": 0.07783714344106742, "grad_norm": 0.8415649559313162, "learning_rate": 3.890834792761238e-06, "loss": 0.6622, "step": 2666 }, { "epoch": 0.07786633966891478, "grad_norm": 0.8119672082993196, "learning_rate": 3.892294220665499e-06, "loss": 0.8181, "step": 2667 }, { "epoch": 0.07789553589676214, "grad_norm": 1.285587540929004, "learning_rate": 3.89375364856976e-06, "loss": 0.7513, "step": 2668 }, { "epoch": 0.0779247321246095, "grad_norm": 0.8308139919153427, "learning_rate": 3.8952130764740225e-06, "loss": 0.7705, "step": 2669 }, { "epoch": 0.07795392835245686, "grad_norm": 0.9218744831535016, "learning_rate": 3.896672504378284e-06, "loss": 0.7708, "step": 2670 }, { "epoch": 0.07798312458030422, "grad_norm": 0.764736638542426, "learning_rate": 3.898131932282546e-06, "loss": 0.7166, "step": 2671 }, { "epoch": 0.07801232080815158, "grad_norm": 0.7743521541991547, "learning_rate": 3.899591360186807e-06, "loss": 0.7144, "step": 2672 }, { "epoch": 0.07804151703599894, "grad_norm": 0.7835832635951282, "learning_rate": 3.901050788091068e-06, "loss": 0.6454, "step": 2673 }, { "epoch": 0.0780707132638463, "grad_norm": 0.7704670342225235, "learning_rate": 3.9025102159953304e-06, "loss": 0.7378, "step": 2674 }, { "epoch": 0.07809990949169367, "grad_norm": 0.855817276379843, "learning_rate": 3.903969643899592e-06, "loss": 0.8327, "step": 2675 }, { "epoch": 0.07812910571954104, "grad_norm": 0.835788915807515, "learning_rate": 3.905429071803854e-06, "loss": 0.7291, "step": 2676 }, { "epoch": 0.0781583019473884, "grad_norm": 0.86569241905961, "learning_rate": 3.906888499708115e-06, "loss": 0.8017, "step": 2677 }, { "epoch": 0.07818749817523576, "grad_norm": 0.7397870214096962, "learning_rate": 3.908347927612376e-06, "loss": 0.6997, "step": 2678 }, { "epoch": 0.07821669440308313, "grad_norm": 0.7763573279017233, "learning_rate": 3.9098073555166375e-06, "loss": 0.6006, "step": 2679 }, { "epoch": 0.07824589063093049, "grad_norm": 0.9747974620587028, "learning_rate": 3.9112667834209e-06, "loss": 0.7998, "step": 2680 }, { "epoch": 0.07827508685877785, "grad_norm": 0.8014213393455569, "learning_rate": 3.912726211325161e-06, "loss": 0.8084, "step": 2681 }, { "epoch": 0.07830428308662521, "grad_norm": 0.7436396732773312, "learning_rate": 3.914185639229423e-06, "loss": 0.6879, "step": 2682 }, { "epoch": 0.07833347931447257, "grad_norm": 0.7297467275706815, "learning_rate": 3.915645067133683e-06, "loss": 0.7271, "step": 2683 }, { "epoch": 0.07836267554231993, "grad_norm": 0.8640750570283585, "learning_rate": 3.9171044950379454e-06, "loss": 0.8247, "step": 2684 }, { "epoch": 0.07839187177016729, "grad_norm": 0.7896950446081454, "learning_rate": 3.918563922942207e-06, "loss": 0.7387, "step": 2685 }, { "epoch": 0.07842106799801465, "grad_norm": 0.856065761430611, "learning_rate": 3.920023350846469e-06, "loss": 0.8292, "step": 2686 }, { "epoch": 0.07845026422586202, "grad_norm": 0.8686116103298251, "learning_rate": 3.92148277875073e-06, "loss": 0.8777, "step": 2687 }, { "epoch": 0.07847946045370938, "grad_norm": 0.764604275334446, "learning_rate": 3.922942206654991e-06, "loss": 0.6697, "step": 2688 }, { "epoch": 0.07850865668155674, "grad_norm": 0.8022947286917893, "learning_rate": 3.9244016345592525e-06, "loss": 0.7884, "step": 2689 }, { "epoch": 0.0785378529094041, "grad_norm": 0.8338315962937554, "learning_rate": 3.925861062463515e-06, "loss": 0.8342, "step": 2690 }, { "epoch": 0.07856704913725147, "grad_norm": 0.6986798242426829, "learning_rate": 3.927320490367777e-06, "loss": 0.6188, "step": 2691 }, { "epoch": 0.07859624536509884, "grad_norm": 0.7774663282129077, "learning_rate": 3.928779918272038e-06, "loss": 0.7465, "step": 2692 }, { "epoch": 0.0786254415929462, "grad_norm": 0.8296695545097051, "learning_rate": 3.930239346176299e-06, "loss": 0.8348, "step": 2693 }, { "epoch": 0.07865463782079356, "grad_norm": 0.8090880668558406, "learning_rate": 3.9316987740805604e-06, "loss": 0.8086, "step": 2694 }, { "epoch": 0.07868383404864092, "grad_norm": 0.828117692471158, "learning_rate": 3.9331582019848225e-06, "loss": 0.7347, "step": 2695 }, { "epoch": 0.07871303027648828, "grad_norm": 0.8447434944585553, "learning_rate": 3.934617629889084e-06, "loss": 0.7782, "step": 2696 }, { "epoch": 0.07874222650433564, "grad_norm": 0.7602030179171458, "learning_rate": 3.936077057793346e-06, "loss": 0.6928, "step": 2697 }, { "epoch": 0.078771422732183, "grad_norm": 0.7576105476981226, "learning_rate": 3.937536485697607e-06, "loss": 0.7551, "step": 2698 }, { "epoch": 0.07880061896003036, "grad_norm": 0.7690500225877556, "learning_rate": 3.938995913601868e-06, "loss": 0.7123, "step": 2699 }, { "epoch": 0.07882981518787772, "grad_norm": 0.7445867415614662, "learning_rate": 3.94045534150613e-06, "loss": 0.6825, "step": 2700 }, { "epoch": 0.07885901141572509, "grad_norm": 0.9158160377891728, "learning_rate": 3.941914769410392e-06, "loss": 0.7138, "step": 2701 }, { "epoch": 0.07888820764357245, "grad_norm": 0.8056282041164811, "learning_rate": 3.943374197314653e-06, "loss": 0.7754, "step": 2702 }, { "epoch": 0.07891740387141981, "grad_norm": 0.8001528595693581, "learning_rate": 3.944833625218914e-06, "loss": 0.7424, "step": 2703 }, { "epoch": 0.07894660009926717, "grad_norm": 0.8887076403030345, "learning_rate": 3.9462930531231754e-06, "loss": 0.7782, "step": 2704 }, { "epoch": 0.07897579632711453, "grad_norm": 0.780677313792861, "learning_rate": 3.9477524810274375e-06, "loss": 0.6846, "step": 2705 }, { "epoch": 0.07900499255496189, "grad_norm": 0.8424041464003529, "learning_rate": 3.9492119089317e-06, "loss": 0.7122, "step": 2706 }, { "epoch": 0.07903418878280927, "grad_norm": 0.7261600553076935, "learning_rate": 3.950671336835961e-06, "loss": 0.6466, "step": 2707 }, { "epoch": 0.07906338501065663, "grad_norm": 0.75677081086416, "learning_rate": 3.952130764740222e-06, "loss": 0.6963, "step": 2708 }, { "epoch": 0.07909258123850399, "grad_norm": 0.7292476629826775, "learning_rate": 3.953590192644483e-06, "loss": 0.6809, "step": 2709 }, { "epoch": 0.07912177746635135, "grad_norm": 0.7106520742514215, "learning_rate": 3.9550496205487455e-06, "loss": 0.6422, "step": 2710 }, { "epoch": 0.07915097369419871, "grad_norm": 0.7539230279095981, "learning_rate": 3.956509048453007e-06, "loss": 0.6892, "step": 2711 }, { "epoch": 0.07918016992204607, "grad_norm": 0.7893663530884696, "learning_rate": 3.957968476357269e-06, "loss": 0.7252, "step": 2712 }, { "epoch": 0.07920936614989343, "grad_norm": 0.79070447733689, "learning_rate": 3.95942790426153e-06, "loss": 0.7441, "step": 2713 }, { "epoch": 0.0792385623777408, "grad_norm": 0.8436062244463531, "learning_rate": 3.960887332165791e-06, "loss": 0.8507, "step": 2714 }, { "epoch": 0.07926775860558816, "grad_norm": 0.7624805678499502, "learning_rate": 3.9623467600700525e-06, "loss": 0.7233, "step": 2715 }, { "epoch": 0.07929695483343552, "grad_norm": 0.8252472518188688, "learning_rate": 3.963806187974315e-06, "loss": 0.7509, "step": 2716 }, { "epoch": 0.07932615106128288, "grad_norm": 0.7580859811247127, "learning_rate": 3.965265615878576e-06, "loss": 0.7635, "step": 2717 }, { "epoch": 0.07935534728913024, "grad_norm": 0.8843084410806465, "learning_rate": 3.966725043782837e-06, "loss": 0.7038, "step": 2718 }, { "epoch": 0.0793845435169776, "grad_norm": 0.8927462746519639, "learning_rate": 3.968184471687098e-06, "loss": 0.7886, "step": 2719 }, { "epoch": 0.07941373974482496, "grad_norm": 0.8621309346929045, "learning_rate": 3.9696438995913605e-06, "loss": 0.8365, "step": 2720 }, { "epoch": 0.07944293597267232, "grad_norm": 0.8851132554663591, "learning_rate": 3.971103327495622e-06, "loss": 0.7275, "step": 2721 }, { "epoch": 0.0794721322005197, "grad_norm": 0.781180862697955, "learning_rate": 3.972562755399884e-06, "loss": 0.781, "step": 2722 }, { "epoch": 0.07950132842836706, "grad_norm": 0.8264253791193911, "learning_rate": 3.974022183304145e-06, "loss": 0.822, "step": 2723 }, { "epoch": 0.07953052465621442, "grad_norm": 0.7566770410922177, "learning_rate": 3.975481611208406e-06, "loss": 0.7176, "step": 2724 }, { "epoch": 0.07955972088406178, "grad_norm": 0.8190680089183721, "learning_rate": 3.976941039112668e-06, "loss": 0.8091, "step": 2725 }, { "epoch": 0.07958891711190914, "grad_norm": 0.8505463294534202, "learning_rate": 3.97840046701693e-06, "loss": 0.7189, "step": 2726 }, { "epoch": 0.0796181133397565, "grad_norm": 0.8705379956701106, "learning_rate": 3.979859894921192e-06, "loss": 0.7658, "step": 2727 }, { "epoch": 0.07964730956760387, "grad_norm": 0.993290545366167, "learning_rate": 3.981319322825453e-06, "loss": 0.9145, "step": 2728 }, { "epoch": 0.07967650579545123, "grad_norm": 0.8707718672084396, "learning_rate": 3.982778750729714e-06, "loss": 0.8298, "step": 2729 }, { "epoch": 0.07970570202329859, "grad_norm": 0.9174146453701102, "learning_rate": 3.9842381786339755e-06, "loss": 0.8146, "step": 2730 }, { "epoch": 0.07973489825114595, "grad_norm": 0.7154334467024113, "learning_rate": 3.9856976065382376e-06, "loss": 0.5992, "step": 2731 }, { "epoch": 0.07976409447899331, "grad_norm": 0.6883343256539177, "learning_rate": 3.987157034442499e-06, "loss": 0.5902, "step": 2732 }, { "epoch": 0.07979329070684067, "grad_norm": 0.7801385583277407, "learning_rate": 3.98861646234676e-06, "loss": 0.7041, "step": 2733 }, { "epoch": 0.07982248693468803, "grad_norm": 0.7994262790913389, "learning_rate": 3.990075890251021e-06, "loss": 0.7397, "step": 2734 }, { "epoch": 0.0798516831625354, "grad_norm": 0.7964270763680493, "learning_rate": 3.991535318155283e-06, "loss": 0.7246, "step": 2735 }, { "epoch": 0.07988087939038276, "grad_norm": 0.7403180670806138, "learning_rate": 3.992994746059545e-06, "loss": 0.7269, "step": 2736 }, { "epoch": 0.07991007561823013, "grad_norm": 0.8366428481690293, "learning_rate": 3.994454173963807e-06, "loss": 0.7932, "step": 2737 }, { "epoch": 0.07993927184607749, "grad_norm": 0.7921640447376435, "learning_rate": 3.995913601868068e-06, "loss": 0.7705, "step": 2738 }, { "epoch": 0.07996846807392485, "grad_norm": 0.7724684903329945, "learning_rate": 3.997373029772329e-06, "loss": 0.7292, "step": 2739 }, { "epoch": 0.07999766430177221, "grad_norm": 0.8242738462610022, "learning_rate": 3.998832457676591e-06, "loss": 0.8001, "step": 2740 }, { "epoch": 0.08002686052961958, "grad_norm": 0.7740443929064637, "learning_rate": 4.000291885580853e-06, "loss": 0.7293, "step": 2741 }, { "epoch": 0.08005605675746694, "grad_norm": 0.8329106837599846, "learning_rate": 4.001751313485115e-06, "loss": 0.8475, "step": 2742 }, { "epoch": 0.0800852529853143, "grad_norm": 0.854686953510131, "learning_rate": 4.003210741389376e-06, "loss": 0.76, "step": 2743 }, { "epoch": 0.08011444921316166, "grad_norm": 0.7754740706362254, "learning_rate": 4.004670169293637e-06, "loss": 0.7613, "step": 2744 }, { "epoch": 0.08014364544100902, "grad_norm": 0.9066857900372992, "learning_rate": 4.006129597197898e-06, "loss": 0.7774, "step": 2745 }, { "epoch": 0.08017284166885638, "grad_norm": 0.8707751473894718, "learning_rate": 4.0075890251021605e-06, "loss": 0.7885, "step": 2746 }, { "epoch": 0.08020203789670374, "grad_norm": 0.748767587234184, "learning_rate": 4.009048453006422e-06, "loss": 0.7327, "step": 2747 }, { "epoch": 0.0802312341245511, "grad_norm": 0.7743826318158668, "learning_rate": 4.010507880910684e-06, "loss": 0.7512, "step": 2748 }, { "epoch": 0.08026043035239847, "grad_norm": 0.7599367298260494, "learning_rate": 4.011967308814945e-06, "loss": 0.7033, "step": 2749 }, { "epoch": 0.08028962658024583, "grad_norm": 0.8108338380450811, "learning_rate": 4.013426736719206e-06, "loss": 0.7791, "step": 2750 }, { "epoch": 0.08031882280809319, "grad_norm": 0.816630248857688, "learning_rate": 4.014886164623468e-06, "loss": 0.7895, "step": 2751 }, { "epoch": 0.08034801903594056, "grad_norm": 0.7985394621014668, "learning_rate": 4.01634559252773e-06, "loss": 0.7496, "step": 2752 }, { "epoch": 0.08037721526378792, "grad_norm": 0.7670442640523414, "learning_rate": 4.017805020431991e-06, "loss": 0.6781, "step": 2753 }, { "epoch": 0.08040641149163529, "grad_norm": 0.8168405794104093, "learning_rate": 4.019264448336252e-06, "loss": 0.745, "step": 2754 }, { "epoch": 0.08043560771948265, "grad_norm": 0.7480348422262767, "learning_rate": 4.020723876240514e-06, "loss": 0.6853, "step": 2755 }, { "epoch": 0.08046480394733001, "grad_norm": 0.8098358991319748, "learning_rate": 4.0221833041447755e-06, "loss": 0.7695, "step": 2756 }, { "epoch": 0.08049400017517737, "grad_norm": 0.8063412477968838, "learning_rate": 4.023642732049038e-06, "loss": 0.734, "step": 2757 }, { "epoch": 0.08052319640302473, "grad_norm": 0.8251549092405244, "learning_rate": 4.025102159953299e-06, "loss": 0.7375, "step": 2758 }, { "epoch": 0.08055239263087209, "grad_norm": 0.7704055156121414, "learning_rate": 4.02656158785756e-06, "loss": 0.6622, "step": 2759 }, { "epoch": 0.08058158885871945, "grad_norm": 0.8146817574042182, "learning_rate": 4.028021015761821e-06, "loss": 0.8274, "step": 2760 }, { "epoch": 0.08061078508656681, "grad_norm": 0.8350935386082208, "learning_rate": 4.0294804436660834e-06, "loss": 0.7091, "step": 2761 }, { "epoch": 0.08063998131441417, "grad_norm": 0.7693071662800847, "learning_rate": 4.030939871570345e-06, "loss": 0.7064, "step": 2762 }, { "epoch": 0.08066917754226154, "grad_norm": 1.01355049416034, "learning_rate": 4.032399299474607e-06, "loss": 0.7703, "step": 2763 }, { "epoch": 0.0806983737701089, "grad_norm": 0.7741006922385514, "learning_rate": 4.033858727378868e-06, "loss": 0.7452, "step": 2764 }, { "epoch": 0.08072756999795626, "grad_norm": 0.7354891303493752, "learning_rate": 4.035318155283129e-06, "loss": 0.7013, "step": 2765 }, { "epoch": 0.08075676622580362, "grad_norm": 0.8001903626097492, "learning_rate": 4.0367775831873905e-06, "loss": 0.8348, "step": 2766 }, { "epoch": 0.080785962453651, "grad_norm": 0.68841194283078, "learning_rate": 4.038237011091653e-06, "loss": 0.6107, "step": 2767 }, { "epoch": 0.08081515868149836, "grad_norm": 0.7832570268983274, "learning_rate": 4.039696438995914e-06, "loss": 0.7552, "step": 2768 }, { "epoch": 0.08084435490934572, "grad_norm": 0.7662735094937398, "learning_rate": 4.041155866900175e-06, "loss": 0.717, "step": 2769 }, { "epoch": 0.08087355113719308, "grad_norm": 0.825673435705426, "learning_rate": 4.042615294804437e-06, "loss": 0.6953, "step": 2770 }, { "epoch": 0.08090274736504044, "grad_norm": 0.8417162786745177, "learning_rate": 4.0440747227086984e-06, "loss": 0.7656, "step": 2771 }, { "epoch": 0.0809319435928878, "grad_norm": 0.8327234943443267, "learning_rate": 4.0455341506129605e-06, "loss": 0.7383, "step": 2772 }, { "epoch": 0.08096113982073516, "grad_norm": 0.7784704674040936, "learning_rate": 4.046993578517222e-06, "loss": 0.7405, "step": 2773 }, { "epoch": 0.08099033604858252, "grad_norm": 0.9204222382754257, "learning_rate": 4.048453006421483e-06, "loss": 0.8627, "step": 2774 }, { "epoch": 0.08101953227642988, "grad_norm": 0.7707038127884949, "learning_rate": 4.049912434325744e-06, "loss": 0.7383, "step": 2775 }, { "epoch": 0.08104872850427725, "grad_norm": 0.8252706747839509, "learning_rate": 4.051371862230006e-06, "loss": 0.8023, "step": 2776 }, { "epoch": 0.0810779247321246, "grad_norm": 0.8357164164487174, "learning_rate": 4.052831290134268e-06, "loss": 0.7922, "step": 2777 }, { "epoch": 0.08110712095997197, "grad_norm": 0.9844547104523209, "learning_rate": 4.05429071803853e-06, "loss": 0.807, "step": 2778 }, { "epoch": 0.08113631718781933, "grad_norm": 0.780510063112636, "learning_rate": 4.055750145942791e-06, "loss": 0.7598, "step": 2779 }, { "epoch": 0.08116551341566669, "grad_norm": 0.8438395197544096, "learning_rate": 4.057209573847052e-06, "loss": 0.7436, "step": 2780 }, { "epoch": 0.08119470964351405, "grad_norm": 0.772865036854416, "learning_rate": 4.0586690017513134e-06, "loss": 0.7436, "step": 2781 }, { "epoch": 0.08122390587136143, "grad_norm": 1.004787410819991, "learning_rate": 4.0601284296555755e-06, "loss": 0.7407, "step": 2782 }, { "epoch": 0.08125310209920879, "grad_norm": 0.9483577769984697, "learning_rate": 4.061587857559837e-06, "loss": 0.8147, "step": 2783 }, { "epoch": 0.08128229832705615, "grad_norm": 0.7502792061859277, "learning_rate": 4.063047285464098e-06, "loss": 0.7103, "step": 2784 }, { "epoch": 0.08131149455490351, "grad_norm": 0.8238793696127071, "learning_rate": 4.064506713368359e-06, "loss": 0.727, "step": 2785 }, { "epoch": 0.08134069078275087, "grad_norm": 0.7661573473507501, "learning_rate": 4.065966141272621e-06, "loss": 0.6429, "step": 2786 }, { "epoch": 0.08136988701059823, "grad_norm": 0.8592492167548099, "learning_rate": 4.0674255691768835e-06, "loss": 0.782, "step": 2787 }, { "epoch": 0.0813990832384456, "grad_norm": 0.8119569221328151, "learning_rate": 4.068884997081145e-06, "loss": 0.7466, "step": 2788 }, { "epoch": 0.08142827946629295, "grad_norm": 0.7880374121572159, "learning_rate": 4.070344424985406e-06, "loss": 0.8198, "step": 2789 }, { "epoch": 0.08145747569414032, "grad_norm": 1.1610752528032757, "learning_rate": 4.071803852889667e-06, "loss": 0.7979, "step": 2790 }, { "epoch": 0.08148667192198768, "grad_norm": 0.7736666944613118, "learning_rate": 4.073263280793929e-06, "loss": 0.7329, "step": 2791 }, { "epoch": 0.08151586814983504, "grad_norm": 0.8074193057311195, "learning_rate": 4.0747227086981905e-06, "loss": 0.7975, "step": 2792 }, { "epoch": 0.0815450643776824, "grad_norm": 0.8123876659879767, "learning_rate": 4.076182136602453e-06, "loss": 0.7229, "step": 2793 }, { "epoch": 0.08157426060552976, "grad_norm": 0.8328179752618715, "learning_rate": 4.077641564506714e-06, "loss": 0.7157, "step": 2794 }, { "epoch": 0.08160345683337712, "grad_norm": 1.1934162478165846, "learning_rate": 4.079100992410975e-06, "loss": 0.6885, "step": 2795 }, { "epoch": 0.08163265306122448, "grad_norm": 0.7729445049728889, "learning_rate": 4.080560420315236e-06, "loss": 0.768, "step": 2796 }, { "epoch": 0.08166184928907186, "grad_norm": 0.824029304781512, "learning_rate": 4.0820198482194985e-06, "loss": 0.672, "step": 2797 }, { "epoch": 0.08169104551691922, "grad_norm": 0.9245957636571743, "learning_rate": 4.08347927612376e-06, "loss": 0.8275, "step": 2798 }, { "epoch": 0.08172024174476658, "grad_norm": 0.8313610490324657, "learning_rate": 4.084938704028022e-06, "loss": 0.6975, "step": 2799 }, { "epoch": 0.08174943797261394, "grad_norm": 0.7442192363768553, "learning_rate": 4.086398131932282e-06, "loss": 0.7497, "step": 2800 }, { "epoch": 0.0817786342004613, "grad_norm": 0.7817296493652974, "learning_rate": 4.087857559836544e-06, "loss": 0.7779, "step": 2801 }, { "epoch": 0.08180783042830866, "grad_norm": 0.7578828419971075, "learning_rate": 4.089316987740806e-06, "loss": 0.7293, "step": 2802 }, { "epoch": 0.08183702665615603, "grad_norm": 0.7412674113580962, "learning_rate": 4.090776415645068e-06, "loss": 0.6437, "step": 2803 }, { "epoch": 0.08186622288400339, "grad_norm": 0.7945512972925919, "learning_rate": 4.092235843549329e-06, "loss": 0.7367, "step": 2804 }, { "epoch": 0.08189541911185075, "grad_norm": 0.9868034435253866, "learning_rate": 4.09369527145359e-06, "loss": 0.7125, "step": 2805 }, { "epoch": 0.08192461533969811, "grad_norm": 0.9930401315565074, "learning_rate": 4.095154699357852e-06, "loss": 0.8395, "step": 2806 }, { "epoch": 0.08195381156754547, "grad_norm": 0.7943337882082877, "learning_rate": 4.0966141272621135e-06, "loss": 0.7741, "step": 2807 }, { "epoch": 0.08198300779539283, "grad_norm": 0.8407441703574219, "learning_rate": 4.0980735551663756e-06, "loss": 0.6493, "step": 2808 }, { "epoch": 0.08201220402324019, "grad_norm": 0.9129543851316338, "learning_rate": 4.099532983070637e-06, "loss": 0.7691, "step": 2809 }, { "epoch": 0.08204140025108755, "grad_norm": 1.49794412662418, "learning_rate": 4.100992410974898e-06, "loss": 0.6922, "step": 2810 }, { "epoch": 0.08207059647893492, "grad_norm": 0.7903501986761972, "learning_rate": 4.102451838879159e-06, "loss": 0.7213, "step": 2811 }, { "epoch": 0.08209979270678229, "grad_norm": 0.9157830383958291, "learning_rate": 4.103911266783421e-06, "loss": 0.7765, "step": 2812 }, { "epoch": 0.08212898893462965, "grad_norm": 0.8030121600016615, "learning_rate": 4.105370694687683e-06, "loss": 0.6897, "step": 2813 }, { "epoch": 0.08215818516247701, "grad_norm": 0.79247635039252, "learning_rate": 4.106830122591945e-06, "loss": 0.7473, "step": 2814 }, { "epoch": 0.08218738139032437, "grad_norm": 0.7843873804702112, "learning_rate": 4.108289550496206e-06, "loss": 0.7843, "step": 2815 }, { "epoch": 0.08221657761817174, "grad_norm": 0.8284329713620119, "learning_rate": 4.109748978400467e-06, "loss": 0.6292, "step": 2816 }, { "epoch": 0.0822457738460191, "grad_norm": 0.7377158026375482, "learning_rate": 4.1112084063047285e-06, "loss": 0.6951, "step": 2817 }, { "epoch": 0.08227497007386646, "grad_norm": 0.8791450407480023, "learning_rate": 4.112667834208991e-06, "loss": 0.7801, "step": 2818 }, { "epoch": 0.08230416630171382, "grad_norm": 0.8353047184757709, "learning_rate": 4.114127262113252e-06, "loss": 0.8497, "step": 2819 }, { "epoch": 0.08233336252956118, "grad_norm": 0.7632861290657128, "learning_rate": 4.115586690017513e-06, "loss": 0.6719, "step": 2820 }, { "epoch": 0.08236255875740854, "grad_norm": 0.7183884415193397, "learning_rate": 4.117046117921775e-06, "loss": 0.6283, "step": 2821 }, { "epoch": 0.0823917549852559, "grad_norm": 0.7268824118232212, "learning_rate": 4.118505545826036e-06, "loss": 0.6456, "step": 2822 }, { "epoch": 0.08242095121310326, "grad_norm": 0.820941622558524, "learning_rate": 4.1199649737302985e-06, "loss": 0.7728, "step": 2823 }, { "epoch": 0.08245014744095062, "grad_norm": 0.8597130604963642, "learning_rate": 4.12142440163456e-06, "loss": 0.7935, "step": 2824 }, { "epoch": 0.08247934366879799, "grad_norm": 0.7550626770034448, "learning_rate": 4.122883829538821e-06, "loss": 0.6516, "step": 2825 }, { "epoch": 0.08250853989664535, "grad_norm": 0.8553487392094977, "learning_rate": 4.124343257443082e-06, "loss": 0.7301, "step": 2826 }, { "epoch": 0.08253773612449272, "grad_norm": 0.8936355945776636, "learning_rate": 4.125802685347344e-06, "loss": 0.8249, "step": 2827 }, { "epoch": 0.08256693235234008, "grad_norm": 1.0957425315204872, "learning_rate": 4.127262113251606e-06, "loss": 0.7783, "step": 2828 }, { "epoch": 0.08259612858018744, "grad_norm": 0.7691664399244512, "learning_rate": 4.128721541155868e-06, "loss": 0.7485, "step": 2829 }, { "epoch": 0.0826253248080348, "grad_norm": 0.7098967524526639, "learning_rate": 4.130180969060129e-06, "loss": 0.6902, "step": 2830 }, { "epoch": 0.08265452103588217, "grad_norm": 0.89315483556809, "learning_rate": 4.13164039696439e-06, "loss": 0.7504, "step": 2831 }, { "epoch": 0.08268371726372953, "grad_norm": 0.7768021510536001, "learning_rate": 4.133099824868651e-06, "loss": 0.6513, "step": 2832 }, { "epoch": 0.08271291349157689, "grad_norm": 0.7851660616276781, "learning_rate": 4.1345592527729135e-06, "loss": 0.7189, "step": 2833 }, { "epoch": 0.08274210971942425, "grad_norm": 0.7447437388763976, "learning_rate": 4.136018680677175e-06, "loss": 0.7381, "step": 2834 }, { "epoch": 0.08277130594727161, "grad_norm": 0.8551229484390357, "learning_rate": 4.137478108581436e-06, "loss": 0.8183, "step": 2835 }, { "epoch": 0.08280050217511897, "grad_norm": 0.8329764696379581, "learning_rate": 4.138937536485698e-06, "loss": 0.7089, "step": 2836 }, { "epoch": 0.08282969840296633, "grad_norm": 0.8697248698940035, "learning_rate": 4.140396964389959e-06, "loss": 0.8217, "step": 2837 }, { "epoch": 0.0828588946308137, "grad_norm": 0.7734560127459804, "learning_rate": 4.1418563922942214e-06, "loss": 0.6871, "step": 2838 }, { "epoch": 0.08288809085866106, "grad_norm": 0.7611335555094118, "learning_rate": 4.143315820198483e-06, "loss": 0.6473, "step": 2839 }, { "epoch": 0.08291728708650842, "grad_norm": 0.7970637877713581, "learning_rate": 4.144775248102744e-06, "loss": 0.6437, "step": 2840 }, { "epoch": 0.08294648331435578, "grad_norm": 0.7458171978340182, "learning_rate": 4.146234676007005e-06, "loss": 0.6693, "step": 2841 }, { "epoch": 0.08297567954220315, "grad_norm": 0.8154989959128897, "learning_rate": 4.147694103911267e-06, "loss": 0.8098, "step": 2842 }, { "epoch": 0.08300487577005052, "grad_norm": 0.7473682354868317, "learning_rate": 4.1491535318155285e-06, "loss": 0.6666, "step": 2843 }, { "epoch": 0.08303407199789788, "grad_norm": 0.8239444609316185, "learning_rate": 4.150612959719791e-06, "loss": 0.7322, "step": 2844 }, { "epoch": 0.08306326822574524, "grad_norm": 0.8241522280461252, "learning_rate": 4.152072387624052e-06, "loss": 0.7149, "step": 2845 }, { "epoch": 0.0830924644535926, "grad_norm": 0.7411385270788943, "learning_rate": 4.153531815528313e-06, "loss": 0.678, "step": 2846 }, { "epoch": 0.08312166068143996, "grad_norm": 0.8155830284493161, "learning_rate": 4.154991243432574e-06, "loss": 0.7544, "step": 2847 }, { "epoch": 0.08315085690928732, "grad_norm": 0.809901581815689, "learning_rate": 4.1564506713368364e-06, "loss": 0.7477, "step": 2848 }, { "epoch": 0.08318005313713468, "grad_norm": 0.8666277406135116, "learning_rate": 4.157910099241098e-06, "loss": 0.8577, "step": 2849 }, { "epoch": 0.08320924936498204, "grad_norm": 0.8468801876700128, "learning_rate": 4.159369527145359e-06, "loss": 0.7709, "step": 2850 }, { "epoch": 0.0832384455928294, "grad_norm": 0.7431547533507608, "learning_rate": 4.160828955049621e-06, "loss": 0.6635, "step": 2851 }, { "epoch": 0.08326764182067677, "grad_norm": 0.7324412716663675, "learning_rate": 4.162288382953882e-06, "loss": 0.6586, "step": 2852 }, { "epoch": 0.08329683804852413, "grad_norm": 1.5660430313906295, "learning_rate": 4.163747810858144e-06, "loss": 0.7806, "step": 2853 }, { "epoch": 0.08332603427637149, "grad_norm": 0.84398712915663, "learning_rate": 4.165207238762406e-06, "loss": 0.8489, "step": 2854 }, { "epoch": 0.08335523050421885, "grad_norm": 0.8126756634243157, "learning_rate": 4.166666666666667e-06, "loss": 0.7428, "step": 2855 }, { "epoch": 0.08338442673206621, "grad_norm": 0.8396845702030462, "learning_rate": 4.168126094570928e-06, "loss": 0.7818, "step": 2856 }, { "epoch": 0.08341362295991359, "grad_norm": 0.8406560915940026, "learning_rate": 4.16958552247519e-06, "loss": 0.8498, "step": 2857 }, { "epoch": 0.08344281918776095, "grad_norm": 0.7360170183028145, "learning_rate": 4.1710449503794515e-06, "loss": 0.6501, "step": 2858 }, { "epoch": 0.08347201541560831, "grad_norm": 0.7593818947268579, "learning_rate": 4.1725043782837135e-06, "loss": 0.6926, "step": 2859 }, { "epoch": 0.08350121164345567, "grad_norm": 0.8281792542277767, "learning_rate": 4.173963806187975e-06, "loss": 0.7819, "step": 2860 }, { "epoch": 0.08353040787130303, "grad_norm": 0.8908636418304879, "learning_rate": 4.175423234092236e-06, "loss": 0.6765, "step": 2861 }, { "epoch": 0.08355960409915039, "grad_norm": 0.7326883962524098, "learning_rate": 4.176882661996497e-06, "loss": 0.694, "step": 2862 }, { "epoch": 0.08358880032699775, "grad_norm": 0.7522441056913837, "learning_rate": 4.178342089900759e-06, "loss": 0.6416, "step": 2863 }, { "epoch": 0.08361799655484511, "grad_norm": 0.7676183525033186, "learning_rate": 4.179801517805021e-06, "loss": 0.6763, "step": 2864 }, { "epoch": 0.08364719278269248, "grad_norm": 0.8875627787202264, "learning_rate": 4.181260945709283e-06, "loss": 0.7354, "step": 2865 }, { "epoch": 0.08367638901053984, "grad_norm": 0.7425935849580136, "learning_rate": 4.182720373613544e-06, "loss": 0.6973, "step": 2866 }, { "epoch": 0.0837055852383872, "grad_norm": 0.817188883593217, "learning_rate": 4.184179801517805e-06, "loss": 0.7336, "step": 2867 }, { "epoch": 0.08373478146623456, "grad_norm": 0.8221430662783835, "learning_rate": 4.185639229422067e-06, "loss": 0.7648, "step": 2868 }, { "epoch": 0.08376397769408192, "grad_norm": 0.7436679955664246, "learning_rate": 4.1870986573263286e-06, "loss": 0.6913, "step": 2869 }, { "epoch": 0.08379317392192928, "grad_norm": 0.8610491332952234, "learning_rate": 4.18855808523059e-06, "loss": 0.7627, "step": 2870 }, { "epoch": 0.08382237014977664, "grad_norm": 0.7895708353205185, "learning_rate": 4.190017513134851e-06, "loss": 0.7503, "step": 2871 }, { "epoch": 0.08385156637762402, "grad_norm": 0.8369633085491058, "learning_rate": 4.191476941039113e-06, "loss": 0.7194, "step": 2872 }, { "epoch": 0.08388076260547138, "grad_norm": 0.8458920376087022, "learning_rate": 4.192936368943374e-06, "loss": 0.7105, "step": 2873 }, { "epoch": 0.08390995883331874, "grad_norm": 0.7203686836029728, "learning_rate": 4.1943957968476365e-06, "loss": 0.6763, "step": 2874 }, { "epoch": 0.0839391550611661, "grad_norm": 0.7676174732359147, "learning_rate": 4.195855224751898e-06, "loss": 0.7348, "step": 2875 }, { "epoch": 0.08396835128901346, "grad_norm": 0.7953630746691283, "learning_rate": 4.197314652656159e-06, "loss": 0.7339, "step": 2876 }, { "epoch": 0.08399754751686082, "grad_norm": 0.7947233407431717, "learning_rate": 4.19877408056042e-06, "loss": 0.7083, "step": 2877 }, { "epoch": 0.08402674374470818, "grad_norm": 0.7866867254044073, "learning_rate": 4.200233508464682e-06, "loss": 0.6851, "step": 2878 }, { "epoch": 0.08405593997255555, "grad_norm": 0.7232559887298501, "learning_rate": 4.2016929363689436e-06, "loss": 0.6674, "step": 2879 }, { "epoch": 0.08408513620040291, "grad_norm": 0.8185353966053084, "learning_rate": 4.203152364273206e-06, "loss": 0.7243, "step": 2880 }, { "epoch": 0.08411433242825027, "grad_norm": 1.0503973237127202, "learning_rate": 4.204611792177467e-06, "loss": 0.8041, "step": 2881 }, { "epoch": 0.08414352865609763, "grad_norm": 0.7294181069454089, "learning_rate": 4.206071220081728e-06, "loss": 0.6983, "step": 2882 }, { "epoch": 0.08417272488394499, "grad_norm": 0.9744953502728212, "learning_rate": 4.20753064798599e-06, "loss": 0.7506, "step": 2883 }, { "epoch": 0.08420192111179235, "grad_norm": 0.7526031327294571, "learning_rate": 4.2089900758902515e-06, "loss": 0.6221, "step": 2884 }, { "epoch": 0.08423111733963971, "grad_norm": 0.816108059014555, "learning_rate": 4.210449503794513e-06, "loss": 0.7905, "step": 2885 }, { "epoch": 0.08426031356748707, "grad_norm": 0.7817291254251068, "learning_rate": 4.211908931698774e-06, "loss": 0.695, "step": 2886 }, { "epoch": 0.08428950979533445, "grad_norm": 0.7417883375478338, "learning_rate": 4.213368359603036e-06, "loss": 0.7218, "step": 2887 }, { "epoch": 0.08431870602318181, "grad_norm": 1.0096457553620475, "learning_rate": 4.214827787507297e-06, "loss": 0.8672, "step": 2888 }, { "epoch": 0.08434790225102917, "grad_norm": 0.7858343754285535, "learning_rate": 4.216287215411559e-06, "loss": 0.7504, "step": 2889 }, { "epoch": 0.08437709847887653, "grad_norm": 0.8866179667196693, "learning_rate": 4.217746643315821e-06, "loss": 0.8166, "step": 2890 }, { "epoch": 0.0844062947067239, "grad_norm": 0.7776299107847646, "learning_rate": 4.219206071220082e-06, "loss": 0.7765, "step": 2891 }, { "epoch": 0.08443549093457126, "grad_norm": 0.7588921772403938, "learning_rate": 4.220665499124343e-06, "loss": 0.6998, "step": 2892 }, { "epoch": 0.08446468716241862, "grad_norm": 0.7647455425955002, "learning_rate": 4.222124927028605e-06, "loss": 0.7104, "step": 2893 }, { "epoch": 0.08449388339026598, "grad_norm": 0.9036037244133587, "learning_rate": 4.2235843549328665e-06, "loss": 0.7245, "step": 2894 }, { "epoch": 0.08452307961811334, "grad_norm": 0.7723601613293576, "learning_rate": 4.225043782837129e-06, "loss": 0.6171, "step": 2895 }, { "epoch": 0.0845522758459607, "grad_norm": 0.8074564987057474, "learning_rate": 4.22650321074139e-06, "loss": 0.7172, "step": 2896 }, { "epoch": 0.08458147207380806, "grad_norm": 1.0742238295754891, "learning_rate": 4.227962638645651e-06, "loss": 0.7285, "step": 2897 }, { "epoch": 0.08461066830165542, "grad_norm": 0.8595970263879046, "learning_rate": 4.229422066549913e-06, "loss": 0.8503, "step": 2898 }, { "epoch": 0.08463986452950278, "grad_norm": 0.737709372276743, "learning_rate": 4.230881494454174e-06, "loss": 0.6695, "step": 2899 }, { "epoch": 0.08466906075735015, "grad_norm": 0.85583418767821, "learning_rate": 4.232340922358436e-06, "loss": 0.7901, "step": 2900 }, { "epoch": 0.0846982569851975, "grad_norm": 0.8018632275903903, "learning_rate": 4.233800350262697e-06, "loss": 0.7368, "step": 2901 }, { "epoch": 0.08472745321304488, "grad_norm": 0.7572084214138431, "learning_rate": 4.235259778166959e-06, "loss": 0.7162, "step": 2902 }, { "epoch": 0.08475664944089224, "grad_norm": 0.8362023713549784, "learning_rate": 4.23671920607122e-06, "loss": 0.7979, "step": 2903 }, { "epoch": 0.0847858456687396, "grad_norm": 0.7541809217670243, "learning_rate": 4.238178633975482e-06, "loss": 0.6964, "step": 2904 }, { "epoch": 0.08481504189658697, "grad_norm": 0.7956837310653823, "learning_rate": 4.239638061879744e-06, "loss": 0.7772, "step": 2905 }, { "epoch": 0.08484423812443433, "grad_norm": 0.8702682824371184, "learning_rate": 4.241097489784005e-06, "loss": 0.7899, "step": 2906 }, { "epoch": 0.08487343435228169, "grad_norm": 1.0472788184628556, "learning_rate": 4.242556917688266e-06, "loss": 0.8281, "step": 2907 }, { "epoch": 0.08490263058012905, "grad_norm": 0.8594811569003229, "learning_rate": 4.244016345592528e-06, "loss": 0.8158, "step": 2908 }, { "epoch": 0.08493182680797641, "grad_norm": 0.8008500508540215, "learning_rate": 4.2454757734967894e-06, "loss": 0.7057, "step": 2909 }, { "epoch": 0.08496102303582377, "grad_norm": 0.7793905167632259, "learning_rate": 4.2469352014010515e-06, "loss": 0.7531, "step": 2910 }, { "epoch": 0.08499021926367113, "grad_norm": 0.7642838854728727, "learning_rate": 4.248394629305313e-06, "loss": 0.6826, "step": 2911 }, { "epoch": 0.0850194154915185, "grad_norm": 0.7488512416235968, "learning_rate": 4.249854057209574e-06, "loss": 0.717, "step": 2912 }, { "epoch": 0.08504861171936585, "grad_norm": 0.789168832682813, "learning_rate": 4.251313485113835e-06, "loss": 0.7736, "step": 2913 }, { "epoch": 0.08507780794721322, "grad_norm": 0.8952608553466094, "learning_rate": 4.252772913018097e-06, "loss": 0.7646, "step": 2914 }, { "epoch": 0.08510700417506058, "grad_norm": 0.7397423984084701, "learning_rate": 4.2542323409223594e-06, "loss": 0.6632, "step": 2915 }, { "epoch": 0.08513620040290794, "grad_norm": 0.7461721439387359, "learning_rate": 4.255691768826621e-06, "loss": 0.6049, "step": 2916 }, { "epoch": 0.0851653966307553, "grad_norm": 0.8989498435659729, "learning_rate": 4.257151196730882e-06, "loss": 0.6774, "step": 2917 }, { "epoch": 0.08519459285860267, "grad_norm": 0.7464862305608463, "learning_rate": 4.258610624635143e-06, "loss": 0.6473, "step": 2918 }, { "epoch": 0.08522378908645004, "grad_norm": 0.7332934214623528, "learning_rate": 4.260070052539405e-06, "loss": 0.7033, "step": 2919 }, { "epoch": 0.0852529853142974, "grad_norm": 0.785081315893928, "learning_rate": 4.2615294804436665e-06, "loss": 0.6829, "step": 2920 }, { "epoch": 0.08528218154214476, "grad_norm": 1.307678104528306, "learning_rate": 4.262988908347928e-06, "loss": 0.7506, "step": 2921 }, { "epoch": 0.08531137776999212, "grad_norm": 0.7864035089295869, "learning_rate": 4.264448336252189e-06, "loss": 0.6658, "step": 2922 }, { "epoch": 0.08534057399783948, "grad_norm": 0.7287991387918741, "learning_rate": 4.265907764156451e-06, "loss": 0.7136, "step": 2923 }, { "epoch": 0.08536977022568684, "grad_norm": 0.742264460885428, "learning_rate": 4.267367192060712e-06, "loss": 0.6319, "step": 2924 }, { "epoch": 0.0853989664535342, "grad_norm": 1.2525933893718826, "learning_rate": 4.2688266199649745e-06, "loss": 0.72, "step": 2925 }, { "epoch": 0.08542816268138156, "grad_norm": 0.7480990469691836, "learning_rate": 4.270286047869236e-06, "loss": 0.7315, "step": 2926 }, { "epoch": 0.08545735890922893, "grad_norm": 0.7560872255808898, "learning_rate": 4.271745475773497e-06, "loss": 0.6371, "step": 2927 }, { "epoch": 0.08548655513707629, "grad_norm": 0.8421770462819426, "learning_rate": 4.273204903677758e-06, "loss": 0.8342, "step": 2928 }, { "epoch": 0.08551575136492365, "grad_norm": 0.8865669171150576, "learning_rate": 4.27466433158202e-06, "loss": 0.8322, "step": 2929 }, { "epoch": 0.08554494759277101, "grad_norm": 0.7405810171191448, "learning_rate": 4.276123759486282e-06, "loss": 0.7188, "step": 2930 }, { "epoch": 0.08557414382061837, "grad_norm": 0.8459636450569678, "learning_rate": 4.277583187390544e-06, "loss": 0.798, "step": 2931 }, { "epoch": 0.08560334004846573, "grad_norm": 0.8450801012107537, "learning_rate": 4.279042615294805e-06, "loss": 0.7051, "step": 2932 }, { "epoch": 0.0856325362763131, "grad_norm": 0.8567194507480413, "learning_rate": 4.280502043199066e-06, "loss": 0.7954, "step": 2933 }, { "epoch": 0.08566173250416047, "grad_norm": 0.7654501624199962, "learning_rate": 4.281961471103328e-06, "loss": 0.727, "step": 2934 }, { "epoch": 0.08569092873200783, "grad_norm": 0.7570200572097615, "learning_rate": 4.2834208990075895e-06, "loss": 0.6776, "step": 2935 }, { "epoch": 0.08572012495985519, "grad_norm": 0.7194344060743137, "learning_rate": 4.284880326911851e-06, "loss": 0.6643, "step": 2936 }, { "epoch": 0.08574932118770255, "grad_norm": 0.7442727639082579, "learning_rate": 4.286339754816112e-06, "loss": 0.6239, "step": 2937 }, { "epoch": 0.08577851741554991, "grad_norm": 0.8769566888889216, "learning_rate": 4.287799182720374e-06, "loss": 0.7874, "step": 2938 }, { "epoch": 0.08580771364339727, "grad_norm": 0.8324777993168206, "learning_rate": 4.289258610624635e-06, "loss": 0.7284, "step": 2939 }, { "epoch": 0.08583690987124463, "grad_norm": 1.085124316837513, "learning_rate": 4.290718038528897e-06, "loss": 0.6384, "step": 2940 }, { "epoch": 0.085866106099092, "grad_norm": 0.7841100621229239, "learning_rate": 4.292177466433159e-06, "loss": 0.6808, "step": 2941 }, { "epoch": 0.08589530232693936, "grad_norm": 0.7749664745115711, "learning_rate": 4.29363689433742e-06, "loss": 0.7827, "step": 2942 }, { "epoch": 0.08592449855478672, "grad_norm": 0.8340363743654691, "learning_rate": 4.295096322241681e-06, "loss": 0.7217, "step": 2943 }, { "epoch": 0.08595369478263408, "grad_norm": 0.7665345210837787, "learning_rate": 4.296555750145943e-06, "loss": 0.688, "step": 2944 }, { "epoch": 0.08598289101048144, "grad_norm": 0.746474438470436, "learning_rate": 4.2980151780502045e-06, "loss": 0.671, "step": 2945 }, { "epoch": 0.0860120872383288, "grad_norm": 0.9213318244554295, "learning_rate": 4.2994746059544666e-06, "loss": 0.7583, "step": 2946 }, { "epoch": 0.08604128346617616, "grad_norm": 0.8371299207063205, "learning_rate": 4.300934033858728e-06, "loss": 0.7602, "step": 2947 }, { "epoch": 0.08607047969402354, "grad_norm": 0.7834145875507659, "learning_rate": 4.302393461762989e-06, "loss": 0.7562, "step": 2948 }, { "epoch": 0.0860996759218709, "grad_norm": 0.8229202751996142, "learning_rate": 4.303852889667251e-06, "loss": 0.7984, "step": 2949 }, { "epoch": 0.08612887214971826, "grad_norm": 0.8109962195939185, "learning_rate": 4.305312317571512e-06, "loss": 0.7272, "step": 2950 }, { "epoch": 0.08615806837756562, "grad_norm": 0.773230807658091, "learning_rate": 4.306771745475774e-06, "loss": 0.6707, "step": 2951 }, { "epoch": 0.08618726460541298, "grad_norm": 0.7310275255490358, "learning_rate": 4.308231173380035e-06, "loss": 0.6555, "step": 2952 }, { "epoch": 0.08621646083326034, "grad_norm": 0.7915648879054636, "learning_rate": 4.309690601284297e-06, "loss": 0.7217, "step": 2953 }, { "epoch": 0.0862456570611077, "grad_norm": 0.762467000531794, "learning_rate": 4.311150029188558e-06, "loss": 0.6352, "step": 2954 }, { "epoch": 0.08627485328895507, "grad_norm": 0.7925416352884259, "learning_rate": 4.31260945709282e-06, "loss": 0.802, "step": 2955 }, { "epoch": 0.08630404951680243, "grad_norm": 0.9469246564113956, "learning_rate": 4.3140688849970816e-06, "loss": 0.8302, "step": 2956 }, { "epoch": 0.08633324574464979, "grad_norm": 0.9485986631651798, "learning_rate": 4.315528312901343e-06, "loss": 0.7894, "step": 2957 }, { "epoch": 0.08636244197249715, "grad_norm": 0.8957845328125822, "learning_rate": 4.316987740805604e-06, "loss": 0.6851, "step": 2958 }, { "epoch": 0.08639163820034451, "grad_norm": 0.7983093052562988, "learning_rate": 4.318447168709866e-06, "loss": 0.8328, "step": 2959 }, { "epoch": 0.08642083442819187, "grad_norm": 0.8162229540587513, "learning_rate": 4.319906596614127e-06, "loss": 0.8005, "step": 2960 }, { "epoch": 0.08645003065603923, "grad_norm": 0.7234313940239815, "learning_rate": 4.3213660245183895e-06, "loss": 0.7211, "step": 2961 }, { "epoch": 0.0864792268838866, "grad_norm": 0.7631404513546085, "learning_rate": 4.322825452422651e-06, "loss": 0.8022, "step": 2962 }, { "epoch": 0.08650842311173397, "grad_norm": 0.8085599894134015, "learning_rate": 4.324284880326912e-06, "loss": 0.7644, "step": 2963 }, { "epoch": 0.08653761933958133, "grad_norm": 0.7804725394635708, "learning_rate": 4.325744308231174e-06, "loss": 0.6905, "step": 2964 }, { "epoch": 0.08656681556742869, "grad_norm": 0.7732107732118593, "learning_rate": 4.327203736135435e-06, "loss": 0.666, "step": 2965 }, { "epoch": 0.08659601179527605, "grad_norm": 0.7906104220059949, "learning_rate": 4.328663164039697e-06, "loss": 0.7124, "step": 2966 }, { "epoch": 0.08662520802312342, "grad_norm": 1.0446934618976575, "learning_rate": 4.330122591943958e-06, "loss": 0.6588, "step": 2967 }, { "epoch": 0.08665440425097078, "grad_norm": 0.7923512442176774, "learning_rate": 4.33158201984822e-06, "loss": 0.745, "step": 2968 }, { "epoch": 0.08668360047881814, "grad_norm": 0.7542018079235133, "learning_rate": 4.333041447752481e-06, "loss": 0.7041, "step": 2969 }, { "epoch": 0.0867127967066655, "grad_norm": 0.7701219960481185, "learning_rate": 4.334500875656743e-06, "loss": 0.7005, "step": 2970 }, { "epoch": 0.08674199293451286, "grad_norm": 0.731826807977221, "learning_rate": 4.3359603035610045e-06, "loss": 0.7141, "step": 2971 }, { "epoch": 0.08677118916236022, "grad_norm": 0.7929699501037004, "learning_rate": 4.337419731465266e-06, "loss": 0.6147, "step": 2972 }, { "epoch": 0.08680038539020758, "grad_norm": 1.1447231838431868, "learning_rate": 4.338879159369527e-06, "loss": 0.7938, "step": 2973 }, { "epoch": 0.08682958161805494, "grad_norm": 0.791493764259998, "learning_rate": 4.340338587273789e-06, "loss": 0.74, "step": 2974 }, { "epoch": 0.0868587778459023, "grad_norm": 0.7814013652088847, "learning_rate": 4.34179801517805e-06, "loss": 0.782, "step": 2975 }, { "epoch": 0.08688797407374967, "grad_norm": 0.7753750946938055, "learning_rate": 4.343257443082312e-06, "loss": 0.7363, "step": 2976 }, { "epoch": 0.08691717030159703, "grad_norm": 0.7716381485978621, "learning_rate": 4.344716870986574e-06, "loss": 0.7105, "step": 2977 }, { "epoch": 0.0869463665294444, "grad_norm": 0.8104013646084568, "learning_rate": 4.346176298890835e-06, "loss": 0.7645, "step": 2978 }, { "epoch": 0.08697556275729176, "grad_norm": 0.8124047133744822, "learning_rate": 4.347635726795097e-06, "loss": 0.7489, "step": 2979 }, { "epoch": 0.08700475898513912, "grad_norm": 0.7519864186827345, "learning_rate": 4.349095154699358e-06, "loss": 0.7341, "step": 2980 }, { "epoch": 0.08703395521298649, "grad_norm": 0.7669373977090511, "learning_rate": 4.35055458260362e-06, "loss": 0.7151, "step": 2981 }, { "epoch": 0.08706315144083385, "grad_norm": 0.7527153134289547, "learning_rate": 4.352014010507882e-06, "loss": 0.6636, "step": 2982 }, { "epoch": 0.08709234766868121, "grad_norm": 0.7581164360665557, "learning_rate": 4.353473438412143e-06, "loss": 0.7124, "step": 2983 }, { "epoch": 0.08712154389652857, "grad_norm": 0.8195128320644824, "learning_rate": 4.354932866316404e-06, "loss": 0.7585, "step": 2984 }, { "epoch": 0.08715074012437593, "grad_norm": 0.8528477076747318, "learning_rate": 4.356392294220666e-06, "loss": 0.7959, "step": 2985 }, { "epoch": 0.08717993635222329, "grad_norm": 0.7022294595705576, "learning_rate": 4.3578517221249274e-06, "loss": 0.623, "step": 2986 }, { "epoch": 0.08720913258007065, "grad_norm": 0.7994506213223871, "learning_rate": 4.359311150029189e-06, "loss": 0.7753, "step": 2987 }, { "epoch": 0.08723832880791801, "grad_norm": 0.7733509322977183, "learning_rate": 4.36077057793345e-06, "loss": 0.6576, "step": 2988 }, { "epoch": 0.08726752503576538, "grad_norm": 0.8388599245460867, "learning_rate": 4.362230005837712e-06, "loss": 0.6797, "step": 2989 }, { "epoch": 0.08729672126361274, "grad_norm": 0.7459566616090935, "learning_rate": 4.363689433741973e-06, "loss": 0.6863, "step": 2990 }, { "epoch": 0.0873259174914601, "grad_norm": 0.7861829884519738, "learning_rate": 4.365148861646235e-06, "loss": 0.7151, "step": 2991 }, { "epoch": 0.08735511371930746, "grad_norm": 0.8421238600897213, "learning_rate": 4.366608289550497e-06, "loss": 0.8189, "step": 2992 }, { "epoch": 0.08738430994715483, "grad_norm": 0.7751349281549357, "learning_rate": 4.368067717454758e-06, "loss": 0.7358, "step": 2993 }, { "epoch": 0.0874135061750022, "grad_norm": 0.756635261803099, "learning_rate": 4.36952714535902e-06, "loss": 0.6835, "step": 2994 }, { "epoch": 0.08744270240284956, "grad_norm": 0.8231739455618136, "learning_rate": 4.370986573263281e-06, "loss": 0.7551, "step": 2995 }, { "epoch": 0.08747189863069692, "grad_norm": 0.7939510349442171, "learning_rate": 4.372446001167543e-06, "loss": 0.6807, "step": 2996 }, { "epoch": 0.08750109485854428, "grad_norm": 0.7489725343853404, "learning_rate": 4.3739054290718045e-06, "loss": 0.684, "step": 2997 }, { "epoch": 0.08753029108639164, "grad_norm": 0.8282108413417275, "learning_rate": 4.375364856976066e-06, "loss": 0.7897, "step": 2998 }, { "epoch": 0.087559487314239, "grad_norm": 1.0103147931392276, "learning_rate": 4.376824284880327e-06, "loss": 0.9268, "step": 2999 }, { "epoch": 0.08758868354208636, "grad_norm": 0.8446148510746291, "learning_rate": 4.378283712784589e-06, "loss": 0.7709, "step": 3000 }, { "epoch": 0.08761787976993372, "grad_norm": 0.866335089961784, "learning_rate": 4.37974314068885e-06, "loss": 0.8008, "step": 3001 }, { "epoch": 0.08764707599778108, "grad_norm": 0.772659176896776, "learning_rate": 4.381202568593112e-06, "loss": 0.719, "step": 3002 }, { "epoch": 0.08767627222562845, "grad_norm": 0.7663249866585345, "learning_rate": 4.382661996497373e-06, "loss": 0.7268, "step": 3003 }, { "epoch": 0.08770546845347581, "grad_norm": 0.9072570345332672, "learning_rate": 4.384121424401635e-06, "loss": 0.8179, "step": 3004 }, { "epoch": 0.08773466468132317, "grad_norm": 0.8320511091322649, "learning_rate": 4.385580852305896e-06, "loss": 0.8031, "step": 3005 }, { "epoch": 0.08776386090917053, "grad_norm": 0.7806470900548952, "learning_rate": 4.387040280210158e-06, "loss": 0.6557, "step": 3006 }, { "epoch": 0.08779305713701789, "grad_norm": 0.7835973899799916, "learning_rate": 4.3884997081144195e-06, "loss": 0.7758, "step": 3007 }, { "epoch": 0.08782225336486527, "grad_norm": 0.9555107286716474, "learning_rate": 4.389959136018681e-06, "loss": 0.768, "step": 3008 }, { "epoch": 0.08785144959271263, "grad_norm": 0.7999085348343433, "learning_rate": 4.391418563922942e-06, "loss": 0.7447, "step": 3009 }, { "epoch": 0.08788064582055999, "grad_norm": 0.877245957383222, "learning_rate": 4.392877991827204e-06, "loss": 0.6938, "step": 3010 }, { "epoch": 0.08790984204840735, "grad_norm": 0.7499352835943665, "learning_rate": 4.394337419731466e-06, "loss": 0.706, "step": 3011 }, { "epoch": 0.08793903827625471, "grad_norm": 0.9405507600410163, "learning_rate": 4.3957968476357275e-06, "loss": 0.8253, "step": 3012 }, { "epoch": 0.08796823450410207, "grad_norm": 0.7757397849944113, "learning_rate": 4.397256275539989e-06, "loss": 0.7516, "step": 3013 }, { "epoch": 0.08799743073194943, "grad_norm": 0.7878363942480582, "learning_rate": 4.39871570344425e-06, "loss": 0.7573, "step": 3014 }, { "epoch": 0.0880266269597968, "grad_norm": 0.8353120782667762, "learning_rate": 4.400175131348512e-06, "loss": 0.8215, "step": 3015 }, { "epoch": 0.08805582318764416, "grad_norm": 0.769210614190021, "learning_rate": 4.401634559252773e-06, "loss": 0.6812, "step": 3016 }, { "epoch": 0.08808501941549152, "grad_norm": 0.7755515604371673, "learning_rate": 4.4030939871570345e-06, "loss": 0.7464, "step": 3017 }, { "epoch": 0.08811421564333888, "grad_norm": 0.8270570056401307, "learning_rate": 4.404553415061296e-06, "loss": 0.755, "step": 3018 }, { "epoch": 0.08814341187118624, "grad_norm": 0.7662040922067823, "learning_rate": 4.406012842965558e-06, "loss": 0.7324, "step": 3019 }, { "epoch": 0.0881726080990336, "grad_norm": 0.7682243116428925, "learning_rate": 4.407472270869819e-06, "loss": 0.7408, "step": 3020 }, { "epoch": 0.08820180432688096, "grad_norm": 0.8322922863520712, "learning_rate": 4.408931698774081e-06, "loss": 0.8135, "step": 3021 }, { "epoch": 0.08823100055472832, "grad_norm": 0.7701345853699161, "learning_rate": 4.4103911266783425e-06, "loss": 0.7228, "step": 3022 }, { "epoch": 0.0882601967825757, "grad_norm": 0.7424827555215132, "learning_rate": 4.411850554582604e-06, "loss": 0.7037, "step": 3023 }, { "epoch": 0.08828939301042306, "grad_norm": 0.7908688599443063, "learning_rate": 4.413309982486865e-06, "loss": 0.7389, "step": 3024 }, { "epoch": 0.08831858923827042, "grad_norm": 0.8313663225770865, "learning_rate": 4.414769410391127e-06, "loss": 0.6517, "step": 3025 }, { "epoch": 0.08834778546611778, "grad_norm": 0.7493593675638314, "learning_rate": 4.416228838295389e-06, "loss": 0.6624, "step": 3026 }, { "epoch": 0.08837698169396514, "grad_norm": 0.7311133581827208, "learning_rate": 4.41768826619965e-06, "loss": 0.6758, "step": 3027 }, { "epoch": 0.0884061779218125, "grad_norm": 0.8332282635411709, "learning_rate": 4.419147694103912e-06, "loss": 0.8238, "step": 3028 }, { "epoch": 0.08843537414965986, "grad_norm": 0.8642863306104758, "learning_rate": 4.420607122008173e-06, "loss": 0.8317, "step": 3029 }, { "epoch": 0.08846457037750723, "grad_norm": 0.8003012147453015, "learning_rate": 4.422066549912435e-06, "loss": 0.796, "step": 3030 }, { "epoch": 0.08849376660535459, "grad_norm": 0.7576173168218338, "learning_rate": 4.423525977816696e-06, "loss": 0.7352, "step": 3031 }, { "epoch": 0.08852296283320195, "grad_norm": 0.8039363440389894, "learning_rate": 4.424985405720958e-06, "loss": 0.7582, "step": 3032 }, { "epoch": 0.08855215906104931, "grad_norm": 0.7912793060110357, "learning_rate": 4.4264448336252196e-06, "loss": 0.7622, "step": 3033 }, { "epoch": 0.08858135528889667, "grad_norm": 0.7583113196965853, "learning_rate": 4.427904261529481e-06, "loss": 0.657, "step": 3034 }, { "epoch": 0.08861055151674403, "grad_norm": 0.7980185487258492, "learning_rate": 4.429363689433742e-06, "loss": 0.7758, "step": 3035 }, { "epoch": 0.0886397477445914, "grad_norm": 0.9278524735662026, "learning_rate": 4.430823117338004e-06, "loss": 0.7629, "step": 3036 }, { "epoch": 0.08866894397243875, "grad_norm": 0.7511459351790274, "learning_rate": 4.432282545242265e-06, "loss": 0.6849, "step": 3037 }, { "epoch": 0.08869814020028613, "grad_norm": 0.7593327286203182, "learning_rate": 4.433741973146527e-06, "loss": 0.7208, "step": 3038 }, { "epoch": 0.08872733642813349, "grad_norm": 0.7959846315106213, "learning_rate": 4.435201401050788e-06, "loss": 0.7277, "step": 3039 }, { "epoch": 0.08875653265598085, "grad_norm": 0.8419446847726476, "learning_rate": 4.43666082895505e-06, "loss": 0.7647, "step": 3040 }, { "epoch": 0.08878572888382821, "grad_norm": 0.8246552342655882, "learning_rate": 4.438120256859311e-06, "loss": 0.877, "step": 3041 }, { "epoch": 0.08881492511167557, "grad_norm": 0.8054267874250225, "learning_rate": 4.439579684763573e-06, "loss": 0.729, "step": 3042 }, { "epoch": 0.08884412133952294, "grad_norm": 0.7909275547081193, "learning_rate": 4.4410391126678346e-06, "loss": 0.7296, "step": 3043 }, { "epoch": 0.0888733175673703, "grad_norm": 0.8191320202595951, "learning_rate": 4.442498540572096e-06, "loss": 0.7655, "step": 3044 }, { "epoch": 0.08890251379521766, "grad_norm": 0.7773419410598352, "learning_rate": 4.443957968476358e-06, "loss": 0.7841, "step": 3045 }, { "epoch": 0.08893171002306502, "grad_norm": 0.7830232691258118, "learning_rate": 4.445417396380619e-06, "loss": 0.7794, "step": 3046 }, { "epoch": 0.08896090625091238, "grad_norm": 0.7196469660843126, "learning_rate": 4.446876824284881e-06, "loss": 0.6605, "step": 3047 }, { "epoch": 0.08899010247875974, "grad_norm": 0.8340773055970797, "learning_rate": 4.4483362521891425e-06, "loss": 0.7376, "step": 3048 }, { "epoch": 0.0890192987066071, "grad_norm": 0.9232783457903272, "learning_rate": 4.449795680093404e-06, "loss": 0.756, "step": 3049 }, { "epoch": 0.08904849493445446, "grad_norm": 0.8320475985336012, "learning_rate": 4.451255107997665e-06, "loss": 0.8111, "step": 3050 }, { "epoch": 0.08907769116230183, "grad_norm": 0.7727866255443657, "learning_rate": 4.452714535901927e-06, "loss": 0.7621, "step": 3051 }, { "epoch": 0.08910688739014919, "grad_norm": 0.89751397544966, "learning_rate": 4.454173963806188e-06, "loss": 0.7702, "step": 3052 }, { "epoch": 0.08913608361799656, "grad_norm": 0.7679619968401905, "learning_rate": 4.45563339171045e-06, "loss": 0.6931, "step": 3053 }, { "epoch": 0.08916527984584392, "grad_norm": 0.7728049296839885, "learning_rate": 4.457092819614711e-06, "loss": 0.7114, "step": 3054 }, { "epoch": 0.08919447607369128, "grad_norm": 0.8516097601917247, "learning_rate": 4.458552247518973e-06, "loss": 0.7258, "step": 3055 }, { "epoch": 0.08922367230153865, "grad_norm": 0.7393340935576915, "learning_rate": 4.460011675423234e-06, "loss": 0.6273, "step": 3056 }, { "epoch": 0.089252868529386, "grad_norm": 0.7472281439939831, "learning_rate": 4.461471103327496e-06, "loss": 0.711, "step": 3057 }, { "epoch": 0.08928206475723337, "grad_norm": 0.7702528929950113, "learning_rate": 4.4629305312317575e-06, "loss": 0.6946, "step": 3058 }, { "epoch": 0.08931126098508073, "grad_norm": 0.9357372695485864, "learning_rate": 4.464389959136019e-06, "loss": 0.819, "step": 3059 }, { "epoch": 0.08934045721292809, "grad_norm": 0.901793076667923, "learning_rate": 4.465849387040281e-06, "loss": 0.7739, "step": 3060 }, { "epoch": 0.08936965344077545, "grad_norm": 0.7565241415017142, "learning_rate": 4.467308814944542e-06, "loss": 0.7358, "step": 3061 }, { "epoch": 0.08939884966862281, "grad_norm": 0.809655175359519, "learning_rate": 4.468768242848804e-06, "loss": 0.7791, "step": 3062 }, { "epoch": 0.08942804589647017, "grad_norm": 0.746515728758124, "learning_rate": 4.4702276707530654e-06, "loss": 0.6641, "step": 3063 }, { "epoch": 0.08945724212431753, "grad_norm": 0.7483097753820321, "learning_rate": 4.471687098657327e-06, "loss": 0.6883, "step": 3064 }, { "epoch": 0.0894864383521649, "grad_norm": 0.8647895895801571, "learning_rate": 4.473146526561588e-06, "loss": 0.7506, "step": 3065 }, { "epoch": 0.08951563458001226, "grad_norm": 0.7595958937099037, "learning_rate": 4.47460595446585e-06, "loss": 0.6623, "step": 3066 }, { "epoch": 0.08954483080785962, "grad_norm": 0.7210931235848967, "learning_rate": 4.476065382370111e-06, "loss": 0.6148, "step": 3067 }, { "epoch": 0.089574027035707, "grad_norm": 0.7540762498613555, "learning_rate": 4.4775248102743725e-06, "loss": 0.736, "step": 3068 }, { "epoch": 0.08960322326355435, "grad_norm": 0.8084673063291618, "learning_rate": 4.478984238178634e-06, "loss": 0.7441, "step": 3069 }, { "epoch": 0.08963241949140172, "grad_norm": 0.8904493693861038, "learning_rate": 4.480443666082896e-06, "loss": 0.7677, "step": 3070 }, { "epoch": 0.08966161571924908, "grad_norm": 0.7941836328419966, "learning_rate": 4.481903093987157e-06, "loss": 0.771, "step": 3071 }, { "epoch": 0.08969081194709644, "grad_norm": 0.8153529016695317, "learning_rate": 4.483362521891419e-06, "loss": 0.7459, "step": 3072 }, { "epoch": 0.0897200081749438, "grad_norm": 0.711623036467451, "learning_rate": 4.4848219497956804e-06, "loss": 0.688, "step": 3073 }, { "epoch": 0.08974920440279116, "grad_norm": 0.7668111082851499, "learning_rate": 4.486281377699942e-06, "loss": 0.7075, "step": 3074 }, { "epoch": 0.08977840063063852, "grad_norm": 0.7999570884150686, "learning_rate": 4.487740805604204e-06, "loss": 0.6953, "step": 3075 }, { "epoch": 0.08980759685848588, "grad_norm": 0.7901559230525126, "learning_rate": 4.489200233508465e-06, "loss": 0.7025, "step": 3076 }, { "epoch": 0.08983679308633324, "grad_norm": 0.8108852193879283, "learning_rate": 4.490659661412727e-06, "loss": 0.8162, "step": 3077 }, { "epoch": 0.0898659893141806, "grad_norm": 0.7861790466828182, "learning_rate": 4.492119089316988e-06, "loss": 0.7942, "step": 3078 }, { "epoch": 0.08989518554202797, "grad_norm": 1.3881726738562514, "learning_rate": 4.49357851722125e-06, "loss": 0.8447, "step": 3079 }, { "epoch": 0.08992438176987533, "grad_norm": 0.8417854605933804, "learning_rate": 4.495037945125511e-06, "loss": 0.7354, "step": 3080 }, { "epoch": 0.08995357799772269, "grad_norm": 0.793498467771498, "learning_rate": 4.496497373029773e-06, "loss": 0.7487, "step": 3081 }, { "epoch": 0.08998277422557005, "grad_norm": 0.8359051525705032, "learning_rate": 4.497956800934034e-06, "loss": 0.8236, "step": 3082 }, { "epoch": 0.09001197045341743, "grad_norm": 1.0229868612175133, "learning_rate": 4.499416228838296e-06, "loss": 0.8075, "step": 3083 }, { "epoch": 0.09004116668126479, "grad_norm": 0.8287410232170397, "learning_rate": 4.500875656742557e-06, "loss": 0.7722, "step": 3084 }, { "epoch": 0.09007036290911215, "grad_norm": 0.8087366609654438, "learning_rate": 4.502335084646819e-06, "loss": 0.7935, "step": 3085 }, { "epoch": 0.09009955913695951, "grad_norm": 0.7782844694860894, "learning_rate": 4.50379451255108e-06, "loss": 0.6822, "step": 3086 }, { "epoch": 0.09012875536480687, "grad_norm": 0.7630577054878334, "learning_rate": 4.505253940455342e-06, "loss": 0.6065, "step": 3087 }, { "epoch": 0.09015795159265423, "grad_norm": 0.7714686600128146, "learning_rate": 4.506713368359603e-06, "loss": 0.7308, "step": 3088 }, { "epoch": 0.09018714782050159, "grad_norm": 0.8551932675327271, "learning_rate": 4.508172796263865e-06, "loss": 0.7673, "step": 3089 }, { "epoch": 0.09021634404834895, "grad_norm": 0.8517716475548994, "learning_rate": 4.509632224168127e-06, "loss": 0.7448, "step": 3090 }, { "epoch": 0.09024554027619631, "grad_norm": 1.0005817110442412, "learning_rate": 4.511091652072388e-06, "loss": 0.7557, "step": 3091 }, { "epoch": 0.09027473650404368, "grad_norm": 0.7138909494686511, "learning_rate": 4.51255107997665e-06, "loss": 0.6611, "step": 3092 }, { "epoch": 0.09030393273189104, "grad_norm": 0.8724208012050298, "learning_rate": 4.514010507880911e-06, "loss": 0.7324, "step": 3093 }, { "epoch": 0.0903331289597384, "grad_norm": 0.8030480703095888, "learning_rate": 4.5154699357851725e-06, "loss": 0.7252, "step": 3094 }, { "epoch": 0.09036232518758576, "grad_norm": 0.7637648922876796, "learning_rate": 4.516929363689434e-06, "loss": 0.7219, "step": 3095 }, { "epoch": 0.09039152141543312, "grad_norm": 0.7605376193104855, "learning_rate": 4.518388791593696e-06, "loss": 0.6535, "step": 3096 }, { "epoch": 0.09042071764328048, "grad_norm": 0.8112152051968368, "learning_rate": 4.519848219497957e-06, "loss": 0.7835, "step": 3097 }, { "epoch": 0.09044991387112786, "grad_norm": 0.8733404137696076, "learning_rate": 4.521307647402219e-06, "loss": 0.7642, "step": 3098 }, { "epoch": 0.09047911009897522, "grad_norm": 0.7777455008344768, "learning_rate": 4.5227670753064805e-06, "loss": 0.7648, "step": 3099 }, { "epoch": 0.09050830632682258, "grad_norm": 0.8083847247150705, "learning_rate": 4.524226503210742e-06, "loss": 0.7649, "step": 3100 }, { "epoch": 0.09053750255466994, "grad_norm": 0.9035139642790329, "learning_rate": 4.525685931115003e-06, "loss": 0.8123, "step": 3101 }, { "epoch": 0.0905666987825173, "grad_norm": 0.7585333640578703, "learning_rate": 4.527145359019265e-06, "loss": 0.6759, "step": 3102 }, { "epoch": 0.09059589501036466, "grad_norm": 0.779682681396995, "learning_rate": 4.528604786923526e-06, "loss": 0.7411, "step": 3103 }, { "epoch": 0.09062509123821202, "grad_norm": 0.7660419675841643, "learning_rate": 4.5300642148277876e-06, "loss": 0.6714, "step": 3104 }, { "epoch": 0.09065428746605939, "grad_norm": 0.8167784843874972, "learning_rate": 4.531523642732049e-06, "loss": 0.7239, "step": 3105 }, { "epoch": 0.09068348369390675, "grad_norm": 0.8306460878803877, "learning_rate": 4.532983070636311e-06, "loss": 0.7282, "step": 3106 }, { "epoch": 0.09071267992175411, "grad_norm": 0.756450901932458, "learning_rate": 4.534442498540573e-06, "loss": 0.7295, "step": 3107 }, { "epoch": 0.09074187614960147, "grad_norm": 0.9687624111078256, "learning_rate": 4.535901926444834e-06, "loss": 0.7431, "step": 3108 }, { "epoch": 0.09077107237744883, "grad_norm": 0.7554001212436436, "learning_rate": 4.5373613543490955e-06, "loss": 0.6546, "step": 3109 }, { "epoch": 0.09080026860529619, "grad_norm": 0.7319646560827175, "learning_rate": 4.538820782253357e-06, "loss": 0.6284, "step": 3110 }, { "epoch": 0.09082946483314355, "grad_norm": 0.7827590223625377, "learning_rate": 4.540280210157619e-06, "loss": 0.6968, "step": 3111 }, { "epoch": 0.09085866106099091, "grad_norm": 0.8052951602834962, "learning_rate": 4.54173963806188e-06, "loss": 0.7336, "step": 3112 }, { "epoch": 0.09088785728883828, "grad_norm": 0.8448087434454382, "learning_rate": 4.543199065966142e-06, "loss": 0.7965, "step": 3113 }, { "epoch": 0.09091705351668565, "grad_norm": 0.8854416457295361, "learning_rate": 4.544658493870403e-06, "loss": 0.7814, "step": 3114 }, { "epoch": 0.09094624974453301, "grad_norm": 2.5578139404305498, "learning_rate": 4.546117921774665e-06, "loss": 0.7348, "step": 3115 }, { "epoch": 0.09097544597238037, "grad_norm": 0.9088581820905752, "learning_rate": 4.547577349678926e-06, "loss": 0.7987, "step": 3116 }, { "epoch": 0.09100464220022773, "grad_norm": 0.7574977097068474, "learning_rate": 4.549036777583188e-06, "loss": 0.7665, "step": 3117 }, { "epoch": 0.0910338384280751, "grad_norm": 0.7816994551987179, "learning_rate": 4.550496205487449e-06, "loss": 0.7667, "step": 3118 }, { "epoch": 0.09106303465592246, "grad_norm": 0.9220816207809516, "learning_rate": 4.5519556333917105e-06, "loss": 0.7863, "step": 3119 }, { "epoch": 0.09109223088376982, "grad_norm": 0.8781594913486419, "learning_rate": 4.553415061295972e-06, "loss": 0.7257, "step": 3120 }, { "epoch": 0.09112142711161718, "grad_norm": 0.8420932448887103, "learning_rate": 4.554874489200234e-06, "loss": 0.8366, "step": 3121 }, { "epoch": 0.09115062333946454, "grad_norm": 0.8176992682747917, "learning_rate": 4.556333917104496e-06, "loss": 0.7824, "step": 3122 }, { "epoch": 0.0911798195673119, "grad_norm": 0.7796578529675134, "learning_rate": 4.557793345008757e-06, "loss": 0.7285, "step": 3123 }, { "epoch": 0.09120901579515926, "grad_norm": 0.7860756250505629, "learning_rate": 4.559252772913018e-06, "loss": 0.7216, "step": 3124 }, { "epoch": 0.09123821202300662, "grad_norm": 0.7746029642501042, "learning_rate": 4.56071220081728e-06, "loss": 0.7951, "step": 3125 }, { "epoch": 0.09126740825085398, "grad_norm": 0.7242642683914648, "learning_rate": 4.562171628721542e-06, "loss": 0.6659, "step": 3126 }, { "epoch": 0.09129660447870135, "grad_norm": 0.7350580830597042, "learning_rate": 4.563631056625803e-06, "loss": 0.7075, "step": 3127 }, { "epoch": 0.09132580070654871, "grad_norm": 0.7813975444523964, "learning_rate": 4.565090484530065e-06, "loss": 0.7243, "step": 3128 }, { "epoch": 0.09135499693439608, "grad_norm": 0.731570479668782, "learning_rate": 4.566549912434326e-06, "loss": 0.6797, "step": 3129 }, { "epoch": 0.09138419316224344, "grad_norm": 0.8311472230024414, "learning_rate": 4.568009340338588e-06, "loss": 0.7688, "step": 3130 }, { "epoch": 0.0914133893900908, "grad_norm": 0.7930325413282617, "learning_rate": 4.569468768242849e-06, "loss": 0.7606, "step": 3131 }, { "epoch": 0.09144258561793817, "grad_norm": 0.7778432320252843, "learning_rate": 4.570928196147111e-06, "loss": 0.7528, "step": 3132 }, { "epoch": 0.09147178184578553, "grad_norm": 0.8248130281202626, "learning_rate": 4.572387624051372e-06, "loss": 0.7227, "step": 3133 }, { "epoch": 0.09150097807363289, "grad_norm": 0.8520221206543783, "learning_rate": 4.573847051955633e-06, "loss": 0.82, "step": 3134 }, { "epoch": 0.09153017430148025, "grad_norm": 0.7692690315134613, "learning_rate": 4.575306479859895e-06, "loss": 0.7024, "step": 3135 }, { "epoch": 0.09155937052932761, "grad_norm": 0.7996514833890473, "learning_rate": 4.576765907764157e-06, "loss": 0.747, "step": 3136 }, { "epoch": 0.09158856675717497, "grad_norm": 0.8591956374421145, "learning_rate": 4.578225335668418e-06, "loss": 0.7843, "step": 3137 }, { "epoch": 0.09161776298502233, "grad_norm": 0.8003249942001879, "learning_rate": 4.57968476357268e-06, "loss": 0.8036, "step": 3138 }, { "epoch": 0.0916469592128697, "grad_norm": 0.7628973500798281, "learning_rate": 4.581144191476941e-06, "loss": 0.6856, "step": 3139 }, { "epoch": 0.09167615544071706, "grad_norm": 0.7897665143920849, "learning_rate": 4.582603619381203e-06, "loss": 0.7007, "step": 3140 }, { "epoch": 0.09170535166856442, "grad_norm": 0.8154635494294492, "learning_rate": 4.584063047285465e-06, "loss": 0.7673, "step": 3141 }, { "epoch": 0.09173454789641178, "grad_norm": 0.7435668631101165, "learning_rate": 4.585522475189726e-06, "loss": 0.6814, "step": 3142 }, { "epoch": 0.09176374412425914, "grad_norm": 0.7695848564045699, "learning_rate": 4.586981903093988e-06, "loss": 0.6964, "step": 3143 }, { "epoch": 0.09179294035210651, "grad_norm": 0.8863953744848674, "learning_rate": 4.588441330998249e-06, "loss": 0.8256, "step": 3144 }, { "epoch": 0.09182213657995388, "grad_norm": 0.7781075941127367, "learning_rate": 4.5899007589025105e-06, "loss": 0.7104, "step": 3145 }, { "epoch": 0.09185133280780124, "grad_norm": 0.824963814614948, "learning_rate": 4.591360186806772e-06, "loss": 0.74, "step": 3146 }, { "epoch": 0.0918805290356486, "grad_norm": 0.8057708910549309, "learning_rate": 4.592819614711034e-06, "loss": 0.8247, "step": 3147 }, { "epoch": 0.09190972526349596, "grad_norm": 0.7768004935676478, "learning_rate": 4.594279042615295e-06, "loss": 0.7487, "step": 3148 }, { "epoch": 0.09193892149134332, "grad_norm": 0.827556009924197, "learning_rate": 4.595738470519557e-06, "loss": 0.7705, "step": 3149 }, { "epoch": 0.09196811771919068, "grad_norm": 0.7916601498212738, "learning_rate": 4.5971978984238184e-06, "loss": 0.7325, "step": 3150 }, { "epoch": 0.09199731394703804, "grad_norm": 0.8687820298675888, "learning_rate": 4.59865732632808e-06, "loss": 0.8443, "step": 3151 }, { "epoch": 0.0920265101748854, "grad_norm": 0.7513242067138038, "learning_rate": 4.600116754232341e-06, "loss": 0.689, "step": 3152 }, { "epoch": 0.09205570640273276, "grad_norm": 0.7560395776166738, "learning_rate": 4.601576182136603e-06, "loss": 0.7192, "step": 3153 }, { "epoch": 0.09208490263058013, "grad_norm": 0.7283236298390714, "learning_rate": 4.603035610040864e-06, "loss": 0.6859, "step": 3154 }, { "epoch": 0.09211409885842749, "grad_norm": 0.7903489207848793, "learning_rate": 4.6044950379451255e-06, "loss": 0.6641, "step": 3155 }, { "epoch": 0.09214329508627485, "grad_norm": 0.8288216187421621, "learning_rate": 4.605954465849388e-06, "loss": 0.6801, "step": 3156 }, { "epoch": 0.09217249131412221, "grad_norm": 0.8446580280290965, "learning_rate": 4.607413893753649e-06, "loss": 0.8044, "step": 3157 }, { "epoch": 0.09220168754196957, "grad_norm": 0.8720061725481718, "learning_rate": 4.608873321657911e-06, "loss": 0.7921, "step": 3158 }, { "epoch": 0.09223088376981695, "grad_norm": 0.802949529374486, "learning_rate": 4.610332749562172e-06, "loss": 0.7827, "step": 3159 }, { "epoch": 0.09226007999766431, "grad_norm": 0.7292755630228416, "learning_rate": 4.6117921774664335e-06, "loss": 0.7086, "step": 3160 }, { "epoch": 0.09228927622551167, "grad_norm": 0.9533299670281454, "learning_rate": 4.613251605370695e-06, "loss": 0.5701, "step": 3161 }, { "epoch": 0.09231847245335903, "grad_norm": 0.7848660037187076, "learning_rate": 4.614711033274957e-06, "loss": 0.767, "step": 3162 }, { "epoch": 0.09234766868120639, "grad_norm": 0.9717179186748707, "learning_rate": 4.616170461179218e-06, "loss": 0.8936, "step": 3163 }, { "epoch": 0.09237686490905375, "grad_norm": 0.6985466296895361, "learning_rate": 4.61762988908348e-06, "loss": 0.6399, "step": 3164 }, { "epoch": 0.09240606113690111, "grad_norm": 0.7811956479027873, "learning_rate": 4.619089316987741e-06, "loss": 0.7191, "step": 3165 }, { "epoch": 0.09243525736474847, "grad_norm": 1.140142323570078, "learning_rate": 4.620548744892003e-06, "loss": 0.8408, "step": 3166 }, { "epoch": 0.09246445359259584, "grad_norm": 0.7956678997896243, "learning_rate": 4.622008172796264e-06, "loss": 0.7457, "step": 3167 }, { "epoch": 0.0924936498204432, "grad_norm": 0.8072219004830589, "learning_rate": 4.623467600700526e-06, "loss": 0.6574, "step": 3168 }, { "epoch": 0.09252284604829056, "grad_norm": 1.734117572161096, "learning_rate": 4.624927028604787e-06, "loss": 0.8009, "step": 3169 }, { "epoch": 0.09255204227613792, "grad_norm": 0.7773920568392271, "learning_rate": 4.6263864565090485e-06, "loss": 0.6994, "step": 3170 }, { "epoch": 0.09258123850398528, "grad_norm": 0.7050383470028888, "learning_rate": 4.6278458844133106e-06, "loss": 0.6347, "step": 3171 }, { "epoch": 0.09261043473183264, "grad_norm": 0.7923943158844597, "learning_rate": 4.629305312317572e-06, "loss": 0.7573, "step": 3172 }, { "epoch": 0.09263963095968, "grad_norm": 0.7708782534379669, "learning_rate": 4.630764740221834e-06, "loss": 0.7313, "step": 3173 }, { "epoch": 0.09266882718752738, "grad_norm": 0.7564068742024741, "learning_rate": 4.632224168126095e-06, "loss": 0.7272, "step": 3174 }, { "epoch": 0.09269802341537474, "grad_norm": 1.1775826696126293, "learning_rate": 4.633683596030356e-06, "loss": 0.7896, "step": 3175 }, { "epoch": 0.0927272196432221, "grad_norm": 0.7244145424758732, "learning_rate": 4.635143023934618e-06, "loss": 0.6926, "step": 3176 }, { "epoch": 0.09275641587106946, "grad_norm": 0.731806862038673, "learning_rate": 4.63660245183888e-06, "loss": 0.6419, "step": 3177 }, { "epoch": 0.09278561209891682, "grad_norm": 1.655881683892565, "learning_rate": 4.638061879743141e-06, "loss": 0.7995, "step": 3178 }, { "epoch": 0.09281480832676418, "grad_norm": 0.8263184556763007, "learning_rate": 4.639521307647403e-06, "loss": 0.7559, "step": 3179 }, { "epoch": 0.09284400455461154, "grad_norm": 0.8445664876312011, "learning_rate": 4.640980735551664e-06, "loss": 0.7386, "step": 3180 }, { "epoch": 0.0928732007824589, "grad_norm": 0.8875868452507413, "learning_rate": 4.6424401634559256e-06, "loss": 0.8169, "step": 3181 }, { "epoch": 0.09290239701030627, "grad_norm": 0.920414551025705, "learning_rate": 4.643899591360187e-06, "loss": 0.7318, "step": 3182 }, { "epoch": 0.09293159323815363, "grad_norm": 0.7657768032387149, "learning_rate": 4.645359019264449e-06, "loss": 0.6651, "step": 3183 }, { "epoch": 0.09296078946600099, "grad_norm": 0.8748419058942508, "learning_rate": 4.64681844716871e-06, "loss": 0.7621, "step": 3184 }, { "epoch": 0.09298998569384835, "grad_norm": 0.7032509009176476, "learning_rate": 4.648277875072971e-06, "loss": 0.6298, "step": 3185 }, { "epoch": 0.09301918192169571, "grad_norm": 0.8188295394681318, "learning_rate": 4.6497373029772335e-06, "loss": 0.7938, "step": 3186 }, { "epoch": 0.09304837814954307, "grad_norm": 1.191927575092044, "learning_rate": 4.651196730881495e-06, "loss": 0.8023, "step": 3187 }, { "epoch": 0.09307757437739043, "grad_norm": 0.7797444990285921, "learning_rate": 4.652656158785757e-06, "loss": 0.7421, "step": 3188 }, { "epoch": 0.09310677060523781, "grad_norm": 0.7890334044399134, "learning_rate": 4.654115586690018e-06, "loss": 0.7838, "step": 3189 }, { "epoch": 0.09313596683308517, "grad_norm": 0.7778240212625201, "learning_rate": 4.655575014594279e-06, "loss": 0.7436, "step": 3190 }, { "epoch": 0.09316516306093253, "grad_norm": 0.8175183119878096, "learning_rate": 4.6570344424985406e-06, "loss": 0.7582, "step": 3191 }, { "epoch": 0.0931943592887799, "grad_norm": 0.8475710156361028, "learning_rate": 4.658493870402803e-06, "loss": 0.8012, "step": 3192 }, { "epoch": 0.09322355551662725, "grad_norm": 0.8306123841034927, "learning_rate": 4.659953298307064e-06, "loss": 0.7697, "step": 3193 }, { "epoch": 0.09325275174447462, "grad_norm": 0.8203859318062784, "learning_rate": 4.661412726211326e-06, "loss": 0.8149, "step": 3194 }, { "epoch": 0.09328194797232198, "grad_norm": 0.7616658287801026, "learning_rate": 4.662872154115587e-06, "loss": 0.7122, "step": 3195 }, { "epoch": 0.09331114420016934, "grad_norm": 0.7913157234888947, "learning_rate": 4.6643315820198485e-06, "loss": 0.718, "step": 3196 }, { "epoch": 0.0933403404280167, "grad_norm": 0.7759069035278501, "learning_rate": 4.66579100992411e-06, "loss": 0.6925, "step": 3197 }, { "epoch": 0.09336953665586406, "grad_norm": 0.7677150190354768, "learning_rate": 4.667250437828372e-06, "loss": 0.7588, "step": 3198 }, { "epoch": 0.09339873288371142, "grad_norm": 0.7551166030256866, "learning_rate": 4.668709865732633e-06, "loss": 0.6813, "step": 3199 }, { "epoch": 0.09342792911155878, "grad_norm": 0.8323321318545558, "learning_rate": 4.670169293636895e-06, "loss": 0.7191, "step": 3200 }, { "epoch": 0.09345712533940614, "grad_norm": 0.7698002249784345, "learning_rate": 4.6716287215411556e-06, "loss": 0.7119, "step": 3201 }, { "epoch": 0.0934863215672535, "grad_norm": 0.7455702197700952, "learning_rate": 4.673088149445418e-06, "loss": 0.7094, "step": 3202 }, { "epoch": 0.09351551779510087, "grad_norm": 0.7981299816422786, "learning_rate": 4.67454757734968e-06, "loss": 0.7363, "step": 3203 }, { "epoch": 0.09354471402294824, "grad_norm": 0.8170381778017796, "learning_rate": 4.676007005253941e-06, "loss": 0.7641, "step": 3204 }, { "epoch": 0.0935739102507956, "grad_norm": 0.8245960817607135, "learning_rate": 4.677466433158202e-06, "loss": 0.6907, "step": 3205 }, { "epoch": 0.09360310647864296, "grad_norm": 0.8261971315682611, "learning_rate": 4.6789258610624635e-06, "loss": 0.6852, "step": 3206 }, { "epoch": 0.09363230270649033, "grad_norm": 0.8034991096809678, "learning_rate": 4.680385288966726e-06, "loss": 0.7497, "step": 3207 }, { "epoch": 0.09366149893433769, "grad_norm": 0.7488014812300825, "learning_rate": 4.681844716870987e-06, "loss": 0.6887, "step": 3208 }, { "epoch": 0.09369069516218505, "grad_norm": 0.8157070019906532, "learning_rate": 4.683304144775249e-06, "loss": 0.7776, "step": 3209 }, { "epoch": 0.09371989139003241, "grad_norm": 0.7879535213922985, "learning_rate": 4.68476357267951e-06, "loss": 0.7237, "step": 3210 }, { "epoch": 0.09374908761787977, "grad_norm": 0.771101839983947, "learning_rate": 4.686223000583771e-06, "loss": 0.7364, "step": 3211 }, { "epoch": 0.09377828384572713, "grad_norm": 0.8114979753812388, "learning_rate": 4.687682428488033e-06, "loss": 0.7173, "step": 3212 }, { "epoch": 0.09380748007357449, "grad_norm": 0.7269741417274744, "learning_rate": 4.689141856392295e-06, "loss": 0.6618, "step": 3213 }, { "epoch": 0.09383667630142185, "grad_norm": 0.7279562679992372, "learning_rate": 4.690601284296556e-06, "loss": 0.6364, "step": 3214 }, { "epoch": 0.09386587252926921, "grad_norm": 0.7540598683287094, "learning_rate": 4.692060712200818e-06, "loss": 0.7276, "step": 3215 }, { "epoch": 0.09389506875711658, "grad_norm": 0.7888738503400131, "learning_rate": 4.693520140105079e-06, "loss": 0.6629, "step": 3216 }, { "epoch": 0.09392426498496394, "grad_norm": 0.7761801847014848, "learning_rate": 4.694979568009341e-06, "loss": 0.7285, "step": 3217 }, { "epoch": 0.0939534612128113, "grad_norm": 0.8198237790429346, "learning_rate": 4.696438995913603e-06, "loss": 0.808, "step": 3218 }, { "epoch": 0.09398265744065867, "grad_norm": 0.7758195719504027, "learning_rate": 4.697898423817864e-06, "loss": 0.762, "step": 3219 }, { "epoch": 0.09401185366850603, "grad_norm": 0.8046215053401947, "learning_rate": 4.699357851722125e-06, "loss": 0.7945, "step": 3220 }, { "epoch": 0.0940410498963534, "grad_norm": 0.7907590055101351, "learning_rate": 4.7008172796263864e-06, "loss": 0.7254, "step": 3221 }, { "epoch": 0.09407024612420076, "grad_norm": 0.8881723315427238, "learning_rate": 4.7022767075306485e-06, "loss": 0.6431, "step": 3222 }, { "epoch": 0.09409944235204812, "grad_norm": 0.898594440288765, "learning_rate": 4.70373613543491e-06, "loss": 0.8254, "step": 3223 }, { "epoch": 0.09412863857989548, "grad_norm": 0.7414695364308851, "learning_rate": 4.705195563339172e-06, "loss": 0.6687, "step": 3224 }, { "epoch": 0.09415783480774284, "grad_norm": 0.817119912972345, "learning_rate": 4.706654991243433e-06, "loss": 0.7332, "step": 3225 }, { "epoch": 0.0941870310355902, "grad_norm": 0.7786574552038008, "learning_rate": 4.708114419147694e-06, "loss": 0.6737, "step": 3226 }, { "epoch": 0.09421622726343756, "grad_norm": 1.061880567558424, "learning_rate": 4.709573847051956e-06, "loss": 0.8127, "step": 3227 }, { "epoch": 0.09424542349128492, "grad_norm": 0.7396239159301884, "learning_rate": 4.711033274956218e-06, "loss": 0.6474, "step": 3228 }, { "epoch": 0.09427461971913229, "grad_norm": 0.7917932729650162, "learning_rate": 4.712492702860479e-06, "loss": 0.7863, "step": 3229 }, { "epoch": 0.09430381594697965, "grad_norm": 0.8191879132999887, "learning_rate": 4.713952130764741e-06, "loss": 0.7943, "step": 3230 }, { "epoch": 0.09433301217482701, "grad_norm": 0.9216540905802287, "learning_rate": 4.715411558669002e-06, "loss": 0.7762, "step": 3231 }, { "epoch": 0.09436220840267437, "grad_norm": 1.1485600152274855, "learning_rate": 4.7168709865732635e-06, "loss": 0.831, "step": 3232 }, { "epoch": 0.09439140463052173, "grad_norm": 0.8454628586155656, "learning_rate": 4.718330414477525e-06, "loss": 0.787, "step": 3233 }, { "epoch": 0.0944206008583691, "grad_norm": 0.8643413892809888, "learning_rate": 4.719789842381787e-06, "loss": 0.7013, "step": 3234 }, { "epoch": 0.09444979708621647, "grad_norm": 0.8595234467309384, "learning_rate": 4.721249270286048e-06, "loss": 0.7337, "step": 3235 }, { "epoch": 0.09447899331406383, "grad_norm": 0.8123711524394956, "learning_rate": 4.722708698190309e-06, "loss": 0.7444, "step": 3236 }, { "epoch": 0.09450818954191119, "grad_norm": 0.8747382046683038, "learning_rate": 4.7241681260945715e-06, "loss": 0.7361, "step": 3237 }, { "epoch": 0.09453738576975855, "grad_norm": 0.7535872577402097, "learning_rate": 4.725627553998833e-06, "loss": 0.7282, "step": 3238 }, { "epoch": 0.09456658199760591, "grad_norm": 0.7705394174863932, "learning_rate": 4.727086981903095e-06, "loss": 0.7145, "step": 3239 }, { "epoch": 0.09459577822545327, "grad_norm": 0.8002921679800226, "learning_rate": 4.728546409807356e-06, "loss": 0.7538, "step": 3240 }, { "epoch": 0.09462497445330063, "grad_norm": 0.7461663524454935, "learning_rate": 4.730005837711617e-06, "loss": 0.6743, "step": 3241 }, { "epoch": 0.094654170681148, "grad_norm": 0.7142682965978561, "learning_rate": 4.7314652656158785e-06, "loss": 0.693, "step": 3242 }, { "epoch": 0.09468336690899536, "grad_norm": 0.7591156534424482, "learning_rate": 4.732924693520141e-06, "loss": 0.7004, "step": 3243 }, { "epoch": 0.09471256313684272, "grad_norm": 0.8474915695415932, "learning_rate": 4.734384121424402e-06, "loss": 0.7856, "step": 3244 }, { "epoch": 0.09474175936469008, "grad_norm": 0.7301998729222254, "learning_rate": 4.735843549328664e-06, "loss": 0.6943, "step": 3245 }, { "epoch": 0.09477095559253744, "grad_norm": 0.8260437266169238, "learning_rate": 4.737302977232925e-06, "loss": 0.7593, "step": 3246 }, { "epoch": 0.0948001518203848, "grad_norm": 0.7283060282855701, "learning_rate": 4.7387624051371865e-06, "loss": 0.7022, "step": 3247 }, { "epoch": 0.09482934804823216, "grad_norm": 0.7604797867801898, "learning_rate": 4.740221833041448e-06, "loss": 0.673, "step": 3248 }, { "epoch": 0.09485854427607954, "grad_norm": 0.7384286796292225, "learning_rate": 4.74168126094571e-06, "loss": 0.6318, "step": 3249 }, { "epoch": 0.0948877405039269, "grad_norm": 0.716468767903225, "learning_rate": 4.743140688849972e-06, "loss": 0.6736, "step": 3250 }, { "epoch": 0.09491693673177426, "grad_norm": 0.8353961288587395, "learning_rate": 4.744600116754232e-06, "loss": 0.8129, "step": 3251 }, { "epoch": 0.09494613295962162, "grad_norm": 0.7272053174772145, "learning_rate": 4.746059544658494e-06, "loss": 0.6624, "step": 3252 }, { "epoch": 0.09497532918746898, "grad_norm": 0.7899916539426076, "learning_rate": 4.747518972562756e-06, "loss": 0.7597, "step": 3253 }, { "epoch": 0.09500452541531634, "grad_norm": 0.7481986765327264, "learning_rate": 4.748978400467018e-06, "loss": 0.6851, "step": 3254 }, { "epoch": 0.0950337216431637, "grad_norm": 0.8143203027144773, "learning_rate": 4.750437828371279e-06, "loss": 0.7362, "step": 3255 }, { "epoch": 0.09506291787101107, "grad_norm": 0.8242025288759652, "learning_rate": 4.75189725627554e-06, "loss": 0.88, "step": 3256 }, { "epoch": 0.09509211409885843, "grad_norm": 0.7576605349997071, "learning_rate": 4.7533566841798015e-06, "loss": 0.7017, "step": 3257 }, { "epoch": 0.09512131032670579, "grad_norm": 1.0009234989001807, "learning_rate": 4.7548161120840636e-06, "loss": 0.8011, "step": 3258 }, { "epoch": 0.09515050655455315, "grad_norm": 0.796080844920878, "learning_rate": 4.756275539988325e-06, "loss": 0.7674, "step": 3259 }, { "epoch": 0.09517970278240051, "grad_norm": 0.7554511705906852, "learning_rate": 4.757734967892587e-06, "loss": 0.6491, "step": 3260 }, { "epoch": 0.09520889901024787, "grad_norm": 0.9677130725367434, "learning_rate": 4.759194395796848e-06, "loss": 0.7594, "step": 3261 }, { "epoch": 0.09523809523809523, "grad_norm": 0.7861229387102572, "learning_rate": 4.760653823701109e-06, "loss": 0.7227, "step": 3262 }, { "epoch": 0.0952672914659426, "grad_norm": 0.7948427259034726, "learning_rate": 4.762113251605371e-06, "loss": 0.7136, "step": 3263 }, { "epoch": 0.09529648769378997, "grad_norm": 0.749625711642646, "learning_rate": 4.763572679509633e-06, "loss": 0.6919, "step": 3264 }, { "epoch": 0.09532568392163733, "grad_norm": 0.792929616700946, "learning_rate": 4.765032107413894e-06, "loss": 0.7624, "step": 3265 }, { "epoch": 0.09535488014948469, "grad_norm": 0.8280105488151608, "learning_rate": 4.766491535318156e-06, "loss": 0.7512, "step": 3266 }, { "epoch": 0.09538407637733205, "grad_norm": 0.8713611057687117, "learning_rate": 4.767950963222417e-06, "loss": 0.7164, "step": 3267 }, { "epoch": 0.09541327260517941, "grad_norm": 1.1935964538527881, "learning_rate": 4.7694103911266786e-06, "loss": 0.6973, "step": 3268 }, { "epoch": 0.09544246883302678, "grad_norm": 0.7894029754907119, "learning_rate": 4.770869819030941e-06, "loss": 0.7242, "step": 3269 }, { "epoch": 0.09547166506087414, "grad_norm": 0.8070327818411634, "learning_rate": 4.772329246935202e-06, "loss": 0.6401, "step": 3270 }, { "epoch": 0.0955008612887215, "grad_norm": 0.7851418555643351, "learning_rate": 4.773788674839463e-06, "loss": 0.7334, "step": 3271 }, { "epoch": 0.09553005751656886, "grad_norm": 0.8155148607489646, "learning_rate": 4.775248102743724e-06, "loss": 0.789, "step": 3272 }, { "epoch": 0.09555925374441622, "grad_norm": 0.750007281839435, "learning_rate": 4.7767075306479865e-06, "loss": 0.7192, "step": 3273 }, { "epoch": 0.09558844997226358, "grad_norm": 0.8181893614990441, "learning_rate": 4.778166958552248e-06, "loss": 0.6703, "step": 3274 }, { "epoch": 0.09561764620011094, "grad_norm": 0.7491847693734668, "learning_rate": 4.77962638645651e-06, "loss": 0.7217, "step": 3275 }, { "epoch": 0.0956468424279583, "grad_norm": 0.7223954518357771, "learning_rate": 4.781085814360771e-06, "loss": 0.6598, "step": 3276 }, { "epoch": 0.09567603865580566, "grad_norm": 0.8419731401361185, "learning_rate": 4.782545242265032e-06, "loss": 0.8492, "step": 3277 }, { "epoch": 0.09570523488365303, "grad_norm": 0.8147348040951576, "learning_rate": 4.7840046701692936e-06, "loss": 0.7568, "step": 3278 }, { "epoch": 0.0957344311115004, "grad_norm": 0.7930675107053253, "learning_rate": 4.785464098073556e-06, "loss": 0.6828, "step": 3279 }, { "epoch": 0.09576362733934776, "grad_norm": 0.7203053798689071, "learning_rate": 4.786923525977817e-06, "loss": 0.6295, "step": 3280 }, { "epoch": 0.09579282356719512, "grad_norm": 0.7221064373455561, "learning_rate": 4.788382953882079e-06, "loss": 0.6563, "step": 3281 }, { "epoch": 0.09582201979504248, "grad_norm": 0.8597894411448403, "learning_rate": 4.78984238178634e-06, "loss": 0.6983, "step": 3282 }, { "epoch": 0.09585121602288985, "grad_norm": 0.8007902647213347, "learning_rate": 4.7913018096906015e-06, "loss": 0.75, "step": 3283 }, { "epoch": 0.09588041225073721, "grad_norm": 0.7708660026387488, "learning_rate": 4.792761237594864e-06, "loss": 0.7472, "step": 3284 }, { "epoch": 0.09590960847858457, "grad_norm": 1.058682908386878, "learning_rate": 4.794220665499125e-06, "loss": 0.6767, "step": 3285 }, { "epoch": 0.09593880470643193, "grad_norm": 0.7578774892549495, "learning_rate": 4.795680093403386e-06, "loss": 0.7275, "step": 3286 }, { "epoch": 0.09596800093427929, "grad_norm": 0.7904660104977748, "learning_rate": 4.797139521307647e-06, "loss": 0.7611, "step": 3287 }, { "epoch": 0.09599719716212665, "grad_norm": 0.7979453080984771, "learning_rate": 4.7985989492119094e-06, "loss": 0.7456, "step": 3288 }, { "epoch": 0.09602639338997401, "grad_norm": 0.7939329743727422, "learning_rate": 4.800058377116171e-06, "loss": 0.6477, "step": 3289 }, { "epoch": 0.09605558961782137, "grad_norm": 1.0005578749698167, "learning_rate": 4.801517805020433e-06, "loss": 0.8116, "step": 3290 }, { "epoch": 0.09608478584566874, "grad_norm": 0.8264725752792057, "learning_rate": 4.802977232924694e-06, "loss": 0.7975, "step": 3291 }, { "epoch": 0.0961139820735161, "grad_norm": 0.6649294888761157, "learning_rate": 4.804436660828955e-06, "loss": 0.5326, "step": 3292 }, { "epoch": 0.09614317830136346, "grad_norm": 0.7556941297784588, "learning_rate": 4.8058960887332165e-06, "loss": 0.7375, "step": 3293 }, { "epoch": 0.09617237452921083, "grad_norm": 0.7390015190083694, "learning_rate": 4.807355516637479e-06, "loss": 0.626, "step": 3294 }, { "epoch": 0.0962015707570582, "grad_norm": 0.6855054816263124, "learning_rate": 4.80881494454174e-06, "loss": 0.5996, "step": 3295 }, { "epoch": 0.09623076698490556, "grad_norm": 0.7241555746013242, "learning_rate": 4.810274372446002e-06, "loss": 0.668, "step": 3296 }, { "epoch": 0.09625996321275292, "grad_norm": 0.7703034545197676, "learning_rate": 4.811733800350263e-06, "loss": 0.7629, "step": 3297 }, { "epoch": 0.09628915944060028, "grad_norm": 0.7513428766751158, "learning_rate": 4.8131932282545244e-06, "loss": 0.6861, "step": 3298 }, { "epoch": 0.09631835566844764, "grad_norm": 0.794558677410127, "learning_rate": 4.8146526561587865e-06, "loss": 0.7382, "step": 3299 }, { "epoch": 0.096347551896295, "grad_norm": 0.713500406738096, "learning_rate": 4.816112084063048e-06, "loss": 0.6611, "step": 3300 }, { "epoch": 0.09637674812414236, "grad_norm": 0.795100783069535, "learning_rate": 4.817571511967309e-06, "loss": 0.6624, "step": 3301 }, { "epoch": 0.09640594435198972, "grad_norm": 0.8494432886941176, "learning_rate": 4.81903093987157e-06, "loss": 0.8356, "step": 3302 }, { "epoch": 0.09643514057983708, "grad_norm": 0.7786631053043227, "learning_rate": 4.820490367775832e-06, "loss": 0.7639, "step": 3303 }, { "epoch": 0.09646433680768444, "grad_norm": 0.8052581133008418, "learning_rate": 4.821949795680094e-06, "loss": 0.7852, "step": 3304 }, { "epoch": 0.0964935330355318, "grad_norm": 0.7772244914124946, "learning_rate": 4.823409223584356e-06, "loss": 0.7284, "step": 3305 }, { "epoch": 0.09652272926337917, "grad_norm": 0.8911789608537976, "learning_rate": 4.824868651488617e-06, "loss": 0.843, "step": 3306 }, { "epoch": 0.09655192549122653, "grad_norm": 1.7928798718708028, "learning_rate": 4.826328079392878e-06, "loss": 0.778, "step": 3307 }, { "epoch": 0.09658112171907389, "grad_norm": 0.763612764902764, "learning_rate": 4.8277875072971394e-06, "loss": 0.6491, "step": 3308 }, { "epoch": 0.09661031794692125, "grad_norm": 0.7685058925985058, "learning_rate": 4.8292469352014015e-06, "loss": 0.7386, "step": 3309 }, { "epoch": 0.09663951417476863, "grad_norm": 0.7291398698510531, "learning_rate": 4.830706363105663e-06, "loss": 0.6807, "step": 3310 }, { "epoch": 0.09666871040261599, "grad_norm": 0.7987012873298297, "learning_rate": 4.832165791009925e-06, "loss": 0.7791, "step": 3311 }, { "epoch": 0.09669790663046335, "grad_norm": 0.8172865155332291, "learning_rate": 4.833625218914186e-06, "loss": 0.7017, "step": 3312 }, { "epoch": 0.09672710285831071, "grad_norm": 0.7075633842027281, "learning_rate": 4.835084646818447e-06, "loss": 0.6223, "step": 3313 }, { "epoch": 0.09675629908615807, "grad_norm": 0.9103014510832654, "learning_rate": 4.8365440747227095e-06, "loss": 0.8127, "step": 3314 }, { "epoch": 0.09678549531400543, "grad_norm": 0.7713847855346331, "learning_rate": 4.838003502626971e-06, "loss": 0.7303, "step": 3315 }, { "epoch": 0.0968146915418528, "grad_norm": 0.8647320279299825, "learning_rate": 4.839462930531233e-06, "loss": 0.7384, "step": 3316 }, { "epoch": 0.09684388776970015, "grad_norm": 0.9067210697716649, "learning_rate": 4.840922358435494e-06, "loss": 0.8035, "step": 3317 }, { "epoch": 0.09687308399754752, "grad_norm": 0.7580929726068218, "learning_rate": 4.842381786339755e-06, "loss": 0.7091, "step": 3318 }, { "epoch": 0.09690228022539488, "grad_norm": 0.8179959553702743, "learning_rate": 4.8438412142440165e-06, "loss": 0.7599, "step": 3319 }, { "epoch": 0.09693147645324224, "grad_norm": 0.8171509003346347, "learning_rate": 4.845300642148279e-06, "loss": 0.7688, "step": 3320 }, { "epoch": 0.0969606726810896, "grad_norm": 0.8052255882421903, "learning_rate": 4.84676007005254e-06, "loss": 0.738, "step": 3321 }, { "epoch": 0.09698986890893696, "grad_norm": 0.759235040463848, "learning_rate": 4.848219497956801e-06, "loss": 0.7176, "step": 3322 }, { "epoch": 0.09701906513678432, "grad_norm": 0.8119236776469951, "learning_rate": 4.849678925861062e-06, "loss": 0.7368, "step": 3323 }, { "epoch": 0.09704826136463168, "grad_norm": 0.8251581541387887, "learning_rate": 4.8511383537653245e-06, "loss": 0.7643, "step": 3324 }, { "epoch": 0.09707745759247906, "grad_norm": 0.7619964166917573, "learning_rate": 4.852597781669586e-06, "loss": 0.753, "step": 3325 }, { "epoch": 0.09710665382032642, "grad_norm": 0.8630054337568301, "learning_rate": 4.854057209573848e-06, "loss": 0.7813, "step": 3326 }, { "epoch": 0.09713585004817378, "grad_norm": 0.9511762565439504, "learning_rate": 4.855516637478109e-06, "loss": 0.7494, "step": 3327 }, { "epoch": 0.09716504627602114, "grad_norm": 1.0533956880053237, "learning_rate": 4.85697606538237e-06, "loss": 0.7544, "step": 3328 }, { "epoch": 0.0971942425038685, "grad_norm": 1.3679496839058691, "learning_rate": 4.8584354932866315e-06, "loss": 0.7784, "step": 3329 }, { "epoch": 0.09722343873171586, "grad_norm": 0.8293150434061595, "learning_rate": 4.859894921190894e-06, "loss": 0.692, "step": 3330 }, { "epoch": 0.09725263495956323, "grad_norm": 0.8984604011068682, "learning_rate": 4.861354349095156e-06, "loss": 0.6763, "step": 3331 }, { "epoch": 0.09728183118741059, "grad_norm": 0.6902765651855516, "learning_rate": 4.862813776999417e-06, "loss": 0.6, "step": 3332 }, { "epoch": 0.09731102741525795, "grad_norm": 0.7507071356277035, "learning_rate": 4.864273204903678e-06, "loss": 0.7101, "step": 3333 }, { "epoch": 0.09734022364310531, "grad_norm": 0.7439160283412469, "learning_rate": 4.8657326328079395e-06, "loss": 0.7133, "step": 3334 }, { "epoch": 0.09736941987095267, "grad_norm": 0.8172037403373765, "learning_rate": 4.8671920607122016e-06, "loss": 0.7656, "step": 3335 }, { "epoch": 0.09739861609880003, "grad_norm": 0.7280426928932481, "learning_rate": 4.868651488616463e-06, "loss": 0.544, "step": 3336 }, { "epoch": 0.09742781232664739, "grad_norm": 0.9809258943286298, "learning_rate": 4.870110916520724e-06, "loss": 0.6937, "step": 3337 }, { "epoch": 0.09745700855449475, "grad_norm": 0.7744307819026017, "learning_rate": 4.871570344424985e-06, "loss": 0.6755, "step": 3338 }, { "epoch": 0.09748620478234211, "grad_norm": 0.7115911471591305, "learning_rate": 4.873029772329247e-06, "loss": 0.6681, "step": 3339 }, { "epoch": 0.09751540101018949, "grad_norm": 0.7855199910102042, "learning_rate": 4.874489200233509e-06, "loss": 0.7299, "step": 3340 }, { "epoch": 0.09754459723803685, "grad_norm": 0.808298579528917, "learning_rate": 4.875948628137771e-06, "loss": 0.7574, "step": 3341 }, { "epoch": 0.09757379346588421, "grad_norm": 0.7973425750593486, "learning_rate": 4.877408056042032e-06, "loss": 0.767, "step": 3342 }, { "epoch": 0.09760298969373157, "grad_norm": 0.84648193345243, "learning_rate": 4.878867483946293e-06, "loss": 0.8233, "step": 3343 }, { "epoch": 0.09763218592157893, "grad_norm": 0.7864466891218584, "learning_rate": 4.8803269118505545e-06, "loss": 0.7422, "step": 3344 }, { "epoch": 0.0976613821494263, "grad_norm": 0.743814499323568, "learning_rate": 4.8817863397548166e-06, "loss": 0.6696, "step": 3345 }, { "epoch": 0.09769057837727366, "grad_norm": 0.8219708493462686, "learning_rate": 4.883245767659079e-06, "loss": 0.7552, "step": 3346 }, { "epoch": 0.09771977460512102, "grad_norm": 0.7456270848698388, "learning_rate": 4.88470519556334e-06, "loss": 0.6404, "step": 3347 }, { "epoch": 0.09774897083296838, "grad_norm": 0.7248587644030227, "learning_rate": 4.886164623467601e-06, "loss": 0.6692, "step": 3348 }, { "epoch": 0.09777816706081574, "grad_norm": 0.9008973772695488, "learning_rate": 4.887624051371862e-06, "loss": 0.7218, "step": 3349 }, { "epoch": 0.0978073632886631, "grad_norm": 0.7866230664813298, "learning_rate": 4.8890834792761245e-06, "loss": 0.7399, "step": 3350 }, { "epoch": 0.09783655951651046, "grad_norm": 0.7855808240946729, "learning_rate": 4.890542907180386e-06, "loss": 0.7686, "step": 3351 }, { "epoch": 0.09786575574435782, "grad_norm": 0.7702024507318899, "learning_rate": 4.892002335084647e-06, "loss": 0.7265, "step": 3352 }, { "epoch": 0.09789495197220519, "grad_norm": 0.9710065999938078, "learning_rate": 4.893461762988908e-06, "loss": 0.7145, "step": 3353 }, { "epoch": 0.09792414820005255, "grad_norm": 0.7959829624711406, "learning_rate": 4.89492119089317e-06, "loss": 0.7597, "step": 3354 }, { "epoch": 0.09795334442789992, "grad_norm": 0.8024283257158733, "learning_rate": 4.896380618797432e-06, "loss": 0.8115, "step": 3355 }, { "epoch": 0.09798254065574728, "grad_norm": 0.7344013562700753, "learning_rate": 4.897840046701694e-06, "loss": 0.7178, "step": 3356 }, { "epoch": 0.09801173688359464, "grad_norm": 0.7545833578536469, "learning_rate": 4.899299474605955e-06, "loss": 0.7416, "step": 3357 }, { "epoch": 0.098040933111442, "grad_norm": 0.8989666337864776, "learning_rate": 4.900758902510216e-06, "loss": 0.8011, "step": 3358 }, { "epoch": 0.09807012933928937, "grad_norm": 0.99567425854035, "learning_rate": 4.902218330414477e-06, "loss": 0.7613, "step": 3359 }, { "epoch": 0.09809932556713673, "grad_norm": 0.726978103063889, "learning_rate": 4.9036777583187395e-06, "loss": 0.6893, "step": 3360 }, { "epoch": 0.09812852179498409, "grad_norm": 0.840168085338517, "learning_rate": 4.905137186223001e-06, "loss": 0.7949, "step": 3361 }, { "epoch": 0.09815771802283145, "grad_norm": 0.7111991993509382, "learning_rate": 4.906596614127263e-06, "loss": 0.6561, "step": 3362 }, { "epoch": 0.09818691425067881, "grad_norm": 0.7616646589450324, "learning_rate": 4.908056042031524e-06, "loss": 0.7123, "step": 3363 }, { "epoch": 0.09821611047852617, "grad_norm": 0.8341612060987939, "learning_rate": 4.909515469935785e-06, "loss": 0.7593, "step": 3364 }, { "epoch": 0.09824530670637353, "grad_norm": 0.7868751267412726, "learning_rate": 4.9109748978400474e-06, "loss": 0.7634, "step": 3365 }, { "epoch": 0.0982745029342209, "grad_norm": 0.7890769508619778, "learning_rate": 4.912434325744309e-06, "loss": 0.7063, "step": 3366 }, { "epoch": 0.09830369916206826, "grad_norm": 0.8232070352812436, "learning_rate": 4.913893753648571e-06, "loss": 0.8128, "step": 3367 }, { "epoch": 0.09833289538991562, "grad_norm": 0.7615359369524893, "learning_rate": 4.915353181552831e-06, "loss": 0.7271, "step": 3368 }, { "epoch": 0.09836209161776298, "grad_norm": 0.7456876591641111, "learning_rate": 4.916812609457093e-06, "loss": 0.7237, "step": 3369 }, { "epoch": 0.09839128784561035, "grad_norm": 1.0404284053013781, "learning_rate": 4.9182720373613545e-06, "loss": 0.7653, "step": 3370 }, { "epoch": 0.09842048407345771, "grad_norm": 0.7050005557081472, "learning_rate": 4.919731465265617e-06, "loss": 0.6537, "step": 3371 }, { "epoch": 0.09844968030130508, "grad_norm": 0.8033359488896215, "learning_rate": 4.921190893169878e-06, "loss": 0.7797, "step": 3372 }, { "epoch": 0.09847887652915244, "grad_norm": 0.7884046094327278, "learning_rate": 4.922650321074139e-06, "loss": 0.7156, "step": 3373 }, { "epoch": 0.0985080727569998, "grad_norm": 0.8345748241375419, "learning_rate": 4.9241097489784e-06, "loss": 0.8288, "step": 3374 }, { "epoch": 0.09853726898484716, "grad_norm": 0.7317198012531481, "learning_rate": 4.9255691768826624e-06, "loss": 0.6677, "step": 3375 }, { "epoch": 0.09856646521269452, "grad_norm": 0.7766527945535576, "learning_rate": 4.927028604786924e-06, "loss": 0.7038, "step": 3376 }, { "epoch": 0.09859566144054188, "grad_norm": 0.77146657297373, "learning_rate": 4.928488032691186e-06, "loss": 0.7363, "step": 3377 }, { "epoch": 0.09862485766838924, "grad_norm": 0.7838154282097856, "learning_rate": 4.929947460595447e-06, "loss": 0.7792, "step": 3378 }, { "epoch": 0.0986540538962366, "grad_norm": 0.7247034236311146, "learning_rate": 4.931406888499708e-06, "loss": 0.6923, "step": 3379 }, { "epoch": 0.09868325012408397, "grad_norm": 0.7648608981743185, "learning_rate": 4.93286631640397e-06, "loss": 0.6684, "step": 3380 }, { "epoch": 0.09871244635193133, "grad_norm": 0.7602289237561424, "learning_rate": 4.934325744308232e-06, "loss": 0.6938, "step": 3381 }, { "epoch": 0.09874164257977869, "grad_norm": 0.7693756381812068, "learning_rate": 4.935785172212494e-06, "loss": 0.7534, "step": 3382 }, { "epoch": 0.09877083880762605, "grad_norm": 0.8398584068061755, "learning_rate": 4.937244600116755e-06, "loss": 0.7633, "step": 3383 }, { "epoch": 0.09880003503547341, "grad_norm": 0.7506655331322487, "learning_rate": 4.938704028021016e-06, "loss": 0.7092, "step": 3384 }, { "epoch": 0.09882923126332079, "grad_norm": 0.8231335207715957, "learning_rate": 4.9401634559252774e-06, "loss": 0.7503, "step": 3385 }, { "epoch": 0.09885842749116815, "grad_norm": 0.7233110745048693, "learning_rate": 4.9416228838295395e-06, "loss": 0.7148, "step": 3386 }, { "epoch": 0.09888762371901551, "grad_norm": 0.784640073108935, "learning_rate": 4.943082311733801e-06, "loss": 0.671, "step": 3387 }, { "epoch": 0.09891681994686287, "grad_norm": 0.9437891900530305, "learning_rate": 4.944541739638062e-06, "loss": 0.7302, "step": 3388 }, { "epoch": 0.09894601617471023, "grad_norm": 0.8438507437763851, "learning_rate": 4.946001167542323e-06, "loss": 0.767, "step": 3389 }, { "epoch": 0.09897521240255759, "grad_norm": 0.7292927119488835, "learning_rate": 4.947460595446585e-06, "loss": 0.6698, "step": 3390 }, { "epoch": 0.09900440863040495, "grad_norm": 0.7939108406738066, "learning_rate": 4.948920023350847e-06, "loss": 0.8262, "step": 3391 }, { "epoch": 0.09903360485825231, "grad_norm": 1.2864492465766957, "learning_rate": 4.950379451255109e-06, "loss": 0.7517, "step": 3392 }, { "epoch": 0.09906280108609967, "grad_norm": 0.8442986432979044, "learning_rate": 4.95183887915937e-06, "loss": 0.7892, "step": 3393 }, { "epoch": 0.09909199731394704, "grad_norm": 0.7800792009512754, "learning_rate": 4.953298307063631e-06, "loss": 0.758, "step": 3394 }, { "epoch": 0.0991211935417944, "grad_norm": 0.77397814629736, "learning_rate": 4.954757734967893e-06, "loss": 0.6624, "step": 3395 }, { "epoch": 0.09915038976964176, "grad_norm": 0.8543849599822675, "learning_rate": 4.9562171628721545e-06, "loss": 0.7884, "step": 3396 }, { "epoch": 0.09917958599748912, "grad_norm": 0.7838251785154449, "learning_rate": 4.957676590776417e-06, "loss": 0.7786, "step": 3397 }, { "epoch": 0.09920878222533648, "grad_norm": 0.7394854441070743, "learning_rate": 4.959136018680678e-06, "loss": 0.7511, "step": 3398 }, { "epoch": 0.09923797845318384, "grad_norm": 0.7805038183110656, "learning_rate": 4.960595446584939e-06, "loss": 0.5775, "step": 3399 }, { "epoch": 0.09926717468103122, "grad_norm": 0.756726688637789, "learning_rate": 4.9620548744892e-06, "loss": 0.7119, "step": 3400 }, { "epoch": 0.09929637090887858, "grad_norm": 0.8869783288260699, "learning_rate": 4.9635143023934625e-06, "loss": 0.727, "step": 3401 }, { "epoch": 0.09932556713672594, "grad_norm": 0.7777911581281557, "learning_rate": 4.964973730297724e-06, "loss": 0.7217, "step": 3402 }, { "epoch": 0.0993547633645733, "grad_norm": 0.7538047905931605, "learning_rate": 4.966433158201985e-06, "loss": 0.7258, "step": 3403 }, { "epoch": 0.09938395959242066, "grad_norm": 0.820759546134742, "learning_rate": 4.967892586106246e-06, "loss": 0.7761, "step": 3404 }, { "epoch": 0.09941315582026802, "grad_norm": 0.7127110092438649, "learning_rate": 4.969352014010508e-06, "loss": 0.6402, "step": 3405 }, { "epoch": 0.09944235204811538, "grad_norm": 0.7768267006880814, "learning_rate": 4.9708114419147695e-06, "loss": 0.6922, "step": 3406 }, { "epoch": 0.09947154827596275, "grad_norm": 0.7855842859337853, "learning_rate": 4.972270869819032e-06, "loss": 0.7569, "step": 3407 }, { "epoch": 0.0995007445038101, "grad_norm": 0.8644277080828437, "learning_rate": 4.973730297723293e-06, "loss": 0.7386, "step": 3408 }, { "epoch": 0.09952994073165747, "grad_norm": 0.7401758197556362, "learning_rate": 4.975189725627554e-06, "loss": 0.7048, "step": 3409 }, { "epoch": 0.09955913695950483, "grad_norm": 0.8707600913336095, "learning_rate": 4.976649153531816e-06, "loss": 0.7622, "step": 3410 }, { "epoch": 0.09958833318735219, "grad_norm": 0.7707566356409998, "learning_rate": 4.9781085814360775e-06, "loss": 0.6844, "step": 3411 }, { "epoch": 0.09961752941519955, "grad_norm": 0.7793827509236866, "learning_rate": 4.9795680093403396e-06, "loss": 0.7278, "step": 3412 }, { "epoch": 0.09964672564304691, "grad_norm": 0.7570864460360072, "learning_rate": 4.981027437244601e-06, "loss": 0.7004, "step": 3413 }, { "epoch": 0.09967592187089427, "grad_norm": 0.8965531895836131, "learning_rate": 4.982486865148862e-06, "loss": 0.7757, "step": 3414 }, { "epoch": 0.09970511809874165, "grad_norm": 0.741234442670166, "learning_rate": 4.983946293053123e-06, "loss": 0.649, "step": 3415 }, { "epoch": 0.09973431432658901, "grad_norm": 0.7601720838138575, "learning_rate": 4.985405720957385e-06, "loss": 0.7195, "step": 3416 }, { "epoch": 0.09976351055443637, "grad_norm": 0.7780980060661933, "learning_rate": 4.986865148861647e-06, "loss": 0.7316, "step": 3417 }, { "epoch": 0.09979270678228373, "grad_norm": 0.7512894748894507, "learning_rate": 4.988324576765908e-06, "loss": 0.678, "step": 3418 }, { "epoch": 0.0998219030101311, "grad_norm": 0.8057293408376255, "learning_rate": 4.989784004670169e-06, "loss": 0.7577, "step": 3419 }, { "epoch": 0.09985109923797846, "grad_norm": 0.7727806390541195, "learning_rate": 4.991243432574431e-06, "loss": 0.7635, "step": 3420 }, { "epoch": 0.09988029546582582, "grad_norm": 0.7621843143774544, "learning_rate": 4.9927028604786925e-06, "loss": 0.6936, "step": 3421 }, { "epoch": 0.09990949169367318, "grad_norm": 0.7835443309034643, "learning_rate": 4.9941622883829546e-06, "loss": 0.6985, "step": 3422 }, { "epoch": 0.09993868792152054, "grad_norm": 0.8282763819895403, "learning_rate": 4.995621716287216e-06, "loss": 0.7994, "step": 3423 }, { "epoch": 0.0999678841493679, "grad_norm": 0.8208806568680447, "learning_rate": 4.997081144191477e-06, "loss": 0.762, "step": 3424 }, { "epoch": 0.09999708037721526, "grad_norm": 0.7407157764539807, "learning_rate": 4.998540572095738e-06, "loss": 0.6423, "step": 3425 }, { "epoch": 0.10002627660506262, "grad_norm": 0.7805962480251085, "learning_rate": 5e-06, "loss": 0.7422, "step": 3426 }, { "epoch": 0.10005547283290998, "grad_norm": 0.7219059461886197, "learning_rate": 4.999837793998378e-06, "loss": 0.6246, "step": 3427 }, { "epoch": 0.10008466906075734, "grad_norm": 0.7507052611006971, "learning_rate": 4.999675587996756e-06, "loss": 0.666, "step": 3428 }, { "epoch": 0.1001138652886047, "grad_norm": 1.2000740572662287, "learning_rate": 4.9995133819951344e-06, "loss": 0.7383, "step": 3429 }, { "epoch": 0.10014306151645208, "grad_norm": 0.9425335874822267, "learning_rate": 4.9993511759935124e-06, "loss": 0.7788, "step": 3430 }, { "epoch": 0.10017225774429944, "grad_norm": 0.7152316861370218, "learning_rate": 4.99918896999189e-06, "loss": 0.5834, "step": 3431 }, { "epoch": 0.1002014539721468, "grad_norm": 0.7903394680749244, "learning_rate": 4.999026763990268e-06, "loss": 0.7222, "step": 3432 }, { "epoch": 0.10023065019999416, "grad_norm": 0.933413982413828, "learning_rate": 4.998864557988646e-06, "loss": 0.8141, "step": 3433 }, { "epoch": 0.10025984642784153, "grad_norm": 0.8146090322287249, "learning_rate": 4.998702351987024e-06, "loss": 0.7674, "step": 3434 }, { "epoch": 0.10028904265568889, "grad_norm": 0.9159437185500481, "learning_rate": 4.998540145985402e-06, "loss": 0.681, "step": 3435 }, { "epoch": 0.10031823888353625, "grad_norm": 0.8526132800188562, "learning_rate": 4.9983779399837805e-06, "loss": 0.7679, "step": 3436 }, { "epoch": 0.10034743511138361, "grad_norm": 0.790074985141104, "learning_rate": 4.998215733982158e-06, "loss": 0.7292, "step": 3437 }, { "epoch": 0.10037663133923097, "grad_norm": 0.8321746758492556, "learning_rate": 4.998053527980536e-06, "loss": 0.7378, "step": 3438 }, { "epoch": 0.10040582756707833, "grad_norm": 0.8117505642864427, "learning_rate": 4.997891321978914e-06, "loss": 0.7243, "step": 3439 }, { "epoch": 0.10043502379492569, "grad_norm": 0.7732406603800849, "learning_rate": 4.997729115977292e-06, "loss": 0.68, "step": 3440 }, { "epoch": 0.10046422002277305, "grad_norm": 0.7302741158791412, "learning_rate": 4.99756690997567e-06, "loss": 0.6472, "step": 3441 }, { "epoch": 0.10049341625062042, "grad_norm": 0.7824650055889695, "learning_rate": 4.997404703974048e-06, "loss": 0.6988, "step": 3442 }, { "epoch": 0.10052261247846778, "grad_norm": 0.7991398275787467, "learning_rate": 4.997242497972426e-06, "loss": 0.7764, "step": 3443 }, { "epoch": 0.10055180870631514, "grad_norm": 0.8354177209937149, "learning_rate": 4.997080291970804e-06, "loss": 0.8266, "step": 3444 }, { "epoch": 0.10058100493416251, "grad_norm": 0.7431101582950054, "learning_rate": 4.996918085969181e-06, "loss": 0.6819, "step": 3445 }, { "epoch": 0.10061020116200987, "grad_norm": 0.8273781047261831, "learning_rate": 4.996755879967559e-06, "loss": 0.7595, "step": 3446 }, { "epoch": 0.10063939738985724, "grad_norm": 0.7408043307215255, "learning_rate": 4.996593673965937e-06, "loss": 0.6977, "step": 3447 }, { "epoch": 0.1006685936177046, "grad_norm": 0.8659542605765765, "learning_rate": 4.996431467964315e-06, "loss": 0.8106, "step": 3448 }, { "epoch": 0.10069778984555196, "grad_norm": 0.7538531697862983, "learning_rate": 4.996269261962693e-06, "loss": 0.7187, "step": 3449 }, { "epoch": 0.10072698607339932, "grad_norm": 0.7579109762798997, "learning_rate": 4.996107055961071e-06, "loss": 0.6303, "step": 3450 }, { "epoch": 0.10075618230124668, "grad_norm": 0.7253127970121437, "learning_rate": 4.995944849959449e-06, "loss": 0.6725, "step": 3451 }, { "epoch": 0.10078537852909404, "grad_norm": 0.7980160887561696, "learning_rate": 4.995782643957827e-06, "loss": 0.7388, "step": 3452 }, { "epoch": 0.1008145747569414, "grad_norm": 0.8034887407480128, "learning_rate": 4.995620437956205e-06, "loss": 0.7908, "step": 3453 }, { "epoch": 0.10084377098478876, "grad_norm": 0.7973775647397233, "learning_rate": 4.995458231954582e-06, "loss": 0.6634, "step": 3454 }, { "epoch": 0.10087296721263612, "grad_norm": 0.783980551761264, "learning_rate": 4.995296025952961e-06, "loss": 0.7372, "step": 3455 }, { "epoch": 0.10090216344048349, "grad_norm": 0.8012112559054664, "learning_rate": 4.995133819951339e-06, "loss": 0.7475, "step": 3456 }, { "epoch": 0.10093135966833085, "grad_norm": 0.8644425007060106, "learning_rate": 4.994971613949717e-06, "loss": 0.7898, "step": 3457 }, { "epoch": 0.10096055589617821, "grad_norm": 0.7997520840854558, "learning_rate": 4.994809407948095e-06, "loss": 0.733, "step": 3458 }, { "epoch": 0.10098975212402557, "grad_norm": 0.7611786382732731, "learning_rate": 4.994647201946473e-06, "loss": 0.6645, "step": 3459 }, { "epoch": 0.10101894835187294, "grad_norm": 0.8537568420439423, "learning_rate": 4.99448499594485e-06, "loss": 0.7011, "step": 3460 }, { "epoch": 0.1010481445797203, "grad_norm": 0.794092675130385, "learning_rate": 4.994322789943228e-06, "loss": 0.7305, "step": 3461 }, { "epoch": 0.10107734080756767, "grad_norm": 0.8768266225328075, "learning_rate": 4.994160583941606e-06, "loss": 0.7995, "step": 3462 }, { "epoch": 0.10110653703541503, "grad_norm": 0.7973512669789699, "learning_rate": 4.993998377939984e-06, "loss": 0.7408, "step": 3463 }, { "epoch": 0.10113573326326239, "grad_norm": 0.7649205352432759, "learning_rate": 4.993836171938362e-06, "loss": 0.695, "step": 3464 }, { "epoch": 0.10116492949110975, "grad_norm": 0.7883313067652539, "learning_rate": 4.99367396593674e-06, "loss": 0.7898, "step": 3465 }, { "epoch": 0.10119412571895711, "grad_norm": 0.7349711213509391, "learning_rate": 4.993511759935118e-06, "loss": 0.6819, "step": 3466 }, { "epoch": 0.10122332194680447, "grad_norm": 0.8042255510417843, "learning_rate": 4.993349553933496e-06, "loss": 0.7123, "step": 3467 }, { "epoch": 0.10125251817465183, "grad_norm": 1.1235272730066765, "learning_rate": 4.993187347931874e-06, "loss": 0.7778, "step": 3468 }, { "epoch": 0.1012817144024992, "grad_norm": 0.8376117069710157, "learning_rate": 4.993025141930251e-06, "loss": 0.7753, "step": 3469 }, { "epoch": 0.10131091063034656, "grad_norm": 0.7912001325284465, "learning_rate": 4.992862935928629e-06, "loss": 0.7422, "step": 3470 }, { "epoch": 0.10134010685819392, "grad_norm": 0.7982332054016268, "learning_rate": 4.992700729927007e-06, "loss": 0.6962, "step": 3471 }, { "epoch": 0.10136930308604128, "grad_norm": 0.7161783882154131, "learning_rate": 4.992538523925385e-06, "loss": 0.6014, "step": 3472 }, { "epoch": 0.10139849931388864, "grad_norm": 0.7988739590971059, "learning_rate": 4.992376317923763e-06, "loss": 0.8562, "step": 3473 }, { "epoch": 0.101427695541736, "grad_norm": 0.7949421349068962, "learning_rate": 4.992214111922142e-06, "loss": 0.7284, "step": 3474 }, { "epoch": 0.10145689176958338, "grad_norm": 0.7759442363263578, "learning_rate": 4.992051905920519e-06, "loss": 0.7379, "step": 3475 }, { "epoch": 0.10148608799743074, "grad_norm": 0.8752239236592756, "learning_rate": 4.991889699918897e-06, "loss": 0.6955, "step": 3476 }, { "epoch": 0.1015152842252781, "grad_norm": 0.7936600075757186, "learning_rate": 4.991727493917275e-06, "loss": 0.7185, "step": 3477 }, { "epoch": 0.10154448045312546, "grad_norm": 0.8066310409657217, "learning_rate": 4.991565287915653e-06, "loss": 0.7973, "step": 3478 }, { "epoch": 0.10157367668097282, "grad_norm": 0.817143041688902, "learning_rate": 4.991403081914031e-06, "loss": 0.7903, "step": 3479 }, { "epoch": 0.10160287290882018, "grad_norm": 0.7727188349180962, "learning_rate": 4.991240875912409e-06, "loss": 0.7204, "step": 3480 }, { "epoch": 0.10163206913666754, "grad_norm": 0.7595790710895957, "learning_rate": 4.991078669910787e-06, "loss": 0.6493, "step": 3481 }, { "epoch": 0.1016612653645149, "grad_norm": 0.7612677317879044, "learning_rate": 4.990916463909165e-06, "loss": 0.7363, "step": 3482 }, { "epoch": 0.10169046159236227, "grad_norm": 0.8028275495793701, "learning_rate": 4.9907542579075426e-06, "loss": 0.7234, "step": 3483 }, { "epoch": 0.10171965782020963, "grad_norm": 0.7995598441755805, "learning_rate": 4.9905920519059206e-06, "loss": 0.7328, "step": 3484 }, { "epoch": 0.10174885404805699, "grad_norm": 0.8035913814111615, "learning_rate": 4.9904298459042986e-06, "loss": 0.7518, "step": 3485 }, { "epoch": 0.10177805027590435, "grad_norm": 0.7968052365055519, "learning_rate": 4.990267639902677e-06, "loss": 0.7919, "step": 3486 }, { "epoch": 0.10180724650375171, "grad_norm": 0.7338969867136612, "learning_rate": 4.990105433901055e-06, "loss": 0.6919, "step": 3487 }, { "epoch": 0.10183644273159907, "grad_norm": 0.7685739083518347, "learning_rate": 4.989943227899433e-06, "loss": 0.6771, "step": 3488 }, { "epoch": 0.10186563895944643, "grad_norm": 0.7811631377969589, "learning_rate": 4.989781021897811e-06, "loss": 0.7377, "step": 3489 }, { "epoch": 0.10189483518729381, "grad_norm": 0.7538021048793117, "learning_rate": 4.989618815896189e-06, "loss": 0.7461, "step": 3490 }, { "epoch": 0.10192403141514117, "grad_norm": 0.7999609474893323, "learning_rate": 4.989456609894567e-06, "loss": 0.7163, "step": 3491 }, { "epoch": 0.10195322764298853, "grad_norm": 0.6949851244237529, "learning_rate": 4.989294403892944e-06, "loss": 0.6349, "step": 3492 }, { "epoch": 0.10198242387083589, "grad_norm": 0.7301135152514571, "learning_rate": 4.989132197891323e-06, "loss": 0.7012, "step": 3493 }, { "epoch": 0.10201162009868325, "grad_norm": 0.8010928135684389, "learning_rate": 4.988969991889701e-06, "loss": 0.7586, "step": 3494 }, { "epoch": 0.10204081632653061, "grad_norm": 0.8500622675273342, "learning_rate": 4.988807785888079e-06, "loss": 0.6918, "step": 3495 }, { "epoch": 0.10207001255437798, "grad_norm": 0.8012430406761639, "learning_rate": 4.988645579886457e-06, "loss": 0.6661, "step": 3496 }, { "epoch": 0.10209920878222534, "grad_norm": 0.9371875713438447, "learning_rate": 4.988483373884835e-06, "loss": 0.7375, "step": 3497 }, { "epoch": 0.1021284050100727, "grad_norm": 0.7733396565357031, "learning_rate": 4.988321167883212e-06, "loss": 0.7157, "step": 3498 }, { "epoch": 0.10215760123792006, "grad_norm": 0.854355746411261, "learning_rate": 4.98815896188159e-06, "loss": 0.6962, "step": 3499 }, { "epoch": 0.10218679746576742, "grad_norm": 0.8095783983412044, "learning_rate": 4.987996755879968e-06, "loss": 0.7143, "step": 3500 }, { "epoch": 0.10221599369361478, "grad_norm": 0.7923142451892665, "learning_rate": 4.987834549878346e-06, "loss": 0.7107, "step": 3501 }, { "epoch": 0.10224518992146214, "grad_norm": 0.7312987541895113, "learning_rate": 4.987672343876724e-06, "loss": 0.6431, "step": 3502 }, { "epoch": 0.1022743861493095, "grad_norm": 0.7846702687532813, "learning_rate": 4.987510137875102e-06, "loss": 0.7831, "step": 3503 }, { "epoch": 0.10230358237715687, "grad_norm": 0.8081451826219468, "learning_rate": 4.98734793187348e-06, "loss": 0.7391, "step": 3504 }, { "epoch": 0.10233277860500424, "grad_norm": 0.7649551005337328, "learning_rate": 4.987185725871858e-06, "loss": 0.6431, "step": 3505 }, { "epoch": 0.1023619748328516, "grad_norm": 0.8110133040940347, "learning_rate": 4.987023519870236e-06, "loss": 0.7287, "step": 3506 }, { "epoch": 0.10239117106069896, "grad_norm": 0.7641527647210579, "learning_rate": 4.986861313868613e-06, "loss": 0.6896, "step": 3507 }, { "epoch": 0.10242036728854632, "grad_norm": 0.7740810019331561, "learning_rate": 4.986699107866991e-06, "loss": 0.6848, "step": 3508 }, { "epoch": 0.10244956351639369, "grad_norm": 1.3044146836969843, "learning_rate": 4.986536901865369e-06, "loss": 0.755, "step": 3509 }, { "epoch": 0.10247875974424105, "grad_norm": 0.7633336675213002, "learning_rate": 4.986374695863747e-06, "loss": 0.6575, "step": 3510 }, { "epoch": 0.10250795597208841, "grad_norm": 0.7507031439004368, "learning_rate": 4.986212489862126e-06, "loss": 0.682, "step": 3511 }, { "epoch": 0.10253715219993577, "grad_norm": 0.7482804411092309, "learning_rate": 4.986050283860504e-06, "loss": 0.7043, "step": 3512 }, { "epoch": 0.10256634842778313, "grad_norm": 0.7620576276224145, "learning_rate": 4.985888077858881e-06, "loss": 0.7131, "step": 3513 }, { "epoch": 0.10259554465563049, "grad_norm": 0.7653940618144897, "learning_rate": 4.985725871857259e-06, "loss": 0.6941, "step": 3514 }, { "epoch": 0.10262474088347785, "grad_norm": 0.7423798738129161, "learning_rate": 4.985563665855637e-06, "loss": 0.7084, "step": 3515 }, { "epoch": 0.10265393711132521, "grad_norm": 0.7342529292493846, "learning_rate": 4.985401459854015e-06, "loss": 0.6706, "step": 3516 }, { "epoch": 0.10268313333917257, "grad_norm": 0.6835233795717631, "learning_rate": 4.985239253852393e-06, "loss": 0.5963, "step": 3517 }, { "epoch": 0.10271232956701994, "grad_norm": 0.8233604162583148, "learning_rate": 4.985077047850771e-06, "loss": 0.7633, "step": 3518 }, { "epoch": 0.1027415257948673, "grad_norm": 1.1007367683269382, "learning_rate": 4.984914841849149e-06, "loss": 0.7676, "step": 3519 }, { "epoch": 0.10277072202271466, "grad_norm": 0.7569671832360881, "learning_rate": 4.984752635847527e-06, "loss": 0.6994, "step": 3520 }, { "epoch": 0.10279991825056203, "grad_norm": 0.757611328805674, "learning_rate": 4.984590429845904e-06, "loss": 0.6253, "step": 3521 }, { "epoch": 0.1028291144784094, "grad_norm": 0.7831061720426489, "learning_rate": 4.984428223844282e-06, "loss": 0.6996, "step": 3522 }, { "epoch": 0.10285831070625676, "grad_norm": 0.7822603223383126, "learning_rate": 4.98426601784266e-06, "loss": 0.7153, "step": 3523 }, { "epoch": 0.10288750693410412, "grad_norm": 0.7240144191532476, "learning_rate": 4.984103811841038e-06, "loss": 0.6524, "step": 3524 }, { "epoch": 0.10291670316195148, "grad_norm": 0.7794470078731082, "learning_rate": 4.983941605839416e-06, "loss": 0.698, "step": 3525 }, { "epoch": 0.10294589938979884, "grad_norm": 0.8575785374840972, "learning_rate": 4.983779399837794e-06, "loss": 0.8035, "step": 3526 }, { "epoch": 0.1029750956176462, "grad_norm": 0.7227881493573673, "learning_rate": 4.983617193836172e-06, "loss": 0.6084, "step": 3527 }, { "epoch": 0.10300429184549356, "grad_norm": 0.8001852877489524, "learning_rate": 4.98345498783455e-06, "loss": 0.761, "step": 3528 }, { "epoch": 0.10303348807334092, "grad_norm": 0.7459523890225833, "learning_rate": 4.983292781832928e-06, "loss": 0.706, "step": 3529 }, { "epoch": 0.10306268430118828, "grad_norm": 0.8000047165746598, "learning_rate": 4.983130575831306e-06, "loss": 0.7759, "step": 3530 }, { "epoch": 0.10309188052903565, "grad_norm": 0.9059724211998599, "learning_rate": 4.982968369829684e-06, "loss": 0.677, "step": 3531 }, { "epoch": 0.103121076756883, "grad_norm": 0.7250972781639713, "learning_rate": 4.982806163828062e-06, "loss": 0.6911, "step": 3532 }, { "epoch": 0.10315027298473037, "grad_norm": 0.7921284863373457, "learning_rate": 4.98264395782644e-06, "loss": 0.807, "step": 3533 }, { "epoch": 0.10317946921257773, "grad_norm": 0.740457675724126, "learning_rate": 4.982481751824818e-06, "loss": 0.7258, "step": 3534 }, { "epoch": 0.10320866544042509, "grad_norm": 0.8666837014230862, "learning_rate": 4.982319545823196e-06, "loss": 0.7321, "step": 3535 }, { "epoch": 0.10323786166827247, "grad_norm": 0.7595501858960229, "learning_rate": 4.9821573398215735e-06, "loss": 0.7184, "step": 3536 }, { "epoch": 0.10326705789611983, "grad_norm": 0.7241935395216634, "learning_rate": 4.9819951338199515e-06, "loss": 0.6294, "step": 3537 }, { "epoch": 0.10329625412396719, "grad_norm": 0.7755726247301497, "learning_rate": 4.9818329278183295e-06, "loss": 0.6728, "step": 3538 }, { "epoch": 0.10332545035181455, "grad_norm": 0.818285134996468, "learning_rate": 4.9816707218167076e-06, "loss": 0.7345, "step": 3539 }, { "epoch": 0.10335464657966191, "grad_norm": 0.8477017709927659, "learning_rate": 4.9815085158150856e-06, "loss": 0.6804, "step": 3540 }, { "epoch": 0.10338384280750927, "grad_norm": 0.9759406494896863, "learning_rate": 4.9813463098134636e-06, "loss": 0.7936, "step": 3541 }, { "epoch": 0.10341303903535663, "grad_norm": 0.7985974211796102, "learning_rate": 4.9811841038118416e-06, "loss": 0.7111, "step": 3542 }, { "epoch": 0.103442235263204, "grad_norm": 0.7295676274668363, "learning_rate": 4.98102189781022e-06, "loss": 0.6726, "step": 3543 }, { "epoch": 0.10347143149105135, "grad_norm": 0.8210589802768561, "learning_rate": 4.980859691808598e-06, "loss": 0.7486, "step": 3544 }, { "epoch": 0.10350062771889872, "grad_norm": 0.762379063875662, "learning_rate": 4.980697485806975e-06, "loss": 0.7596, "step": 3545 }, { "epoch": 0.10352982394674608, "grad_norm": 0.8009348377388164, "learning_rate": 4.980535279805353e-06, "loss": 0.7492, "step": 3546 }, { "epoch": 0.10355902017459344, "grad_norm": 0.7892623808065459, "learning_rate": 4.980373073803731e-06, "loss": 0.7476, "step": 3547 }, { "epoch": 0.1035882164024408, "grad_norm": 0.8074796573601059, "learning_rate": 4.980210867802109e-06, "loss": 0.7916, "step": 3548 }, { "epoch": 0.10361741263028816, "grad_norm": 0.933898320347682, "learning_rate": 4.980048661800488e-06, "loss": 0.7799, "step": 3549 }, { "epoch": 0.10364660885813552, "grad_norm": 0.6509563225820592, "learning_rate": 4.979886455798865e-06, "loss": 0.5575, "step": 3550 }, { "epoch": 0.1036758050859829, "grad_norm": 0.8022352809692576, "learning_rate": 4.979724249797243e-06, "loss": 0.7109, "step": 3551 }, { "epoch": 0.10370500131383026, "grad_norm": 0.9716494355467145, "learning_rate": 4.979562043795621e-06, "loss": 0.8155, "step": 3552 }, { "epoch": 0.10373419754167762, "grad_norm": 0.8162408490218261, "learning_rate": 4.979399837793999e-06, "loss": 0.8033, "step": 3553 }, { "epoch": 0.10376339376952498, "grad_norm": 0.7518749038054194, "learning_rate": 4.979237631792377e-06, "loss": 0.657, "step": 3554 }, { "epoch": 0.10379258999737234, "grad_norm": 1.0184143549070444, "learning_rate": 4.979075425790755e-06, "loss": 0.7648, "step": 3555 }, { "epoch": 0.1038217862252197, "grad_norm": 0.7623706978709827, "learning_rate": 4.978913219789133e-06, "loss": 0.6918, "step": 3556 }, { "epoch": 0.10385098245306706, "grad_norm": 0.7402208141625165, "learning_rate": 4.978751013787511e-06, "loss": 0.6821, "step": 3557 }, { "epoch": 0.10388017868091443, "grad_norm": 0.8060316881442567, "learning_rate": 4.978588807785889e-06, "loss": 0.748, "step": 3558 }, { "epoch": 0.10390937490876179, "grad_norm": 0.7639654863964441, "learning_rate": 4.978426601784266e-06, "loss": 0.7345, "step": 3559 }, { "epoch": 0.10393857113660915, "grad_norm": 0.7159523864030524, "learning_rate": 4.978264395782644e-06, "loss": 0.7217, "step": 3560 }, { "epoch": 0.10396776736445651, "grad_norm": 0.8279532329116446, "learning_rate": 4.978102189781022e-06, "loss": 0.7387, "step": 3561 }, { "epoch": 0.10399696359230387, "grad_norm": 1.0287682678512, "learning_rate": 4.9779399837794e-06, "loss": 0.7153, "step": 3562 }, { "epoch": 0.10402615982015123, "grad_norm": 0.7803078528253499, "learning_rate": 4.977777777777778e-06, "loss": 0.7459, "step": 3563 }, { "epoch": 0.10405535604799859, "grad_norm": 1.0105133883607065, "learning_rate": 4.977615571776156e-06, "loss": 0.7361, "step": 3564 }, { "epoch": 0.10408455227584595, "grad_norm": 0.7522168031995745, "learning_rate": 4.977453365774534e-06, "loss": 0.742, "step": 3565 }, { "epoch": 0.10411374850369333, "grad_norm": 0.8381999882069782, "learning_rate": 4.977291159772912e-06, "loss": 0.7486, "step": 3566 }, { "epoch": 0.10414294473154069, "grad_norm": 0.7232876171086068, "learning_rate": 4.97712895377129e-06, "loss": 0.633, "step": 3567 }, { "epoch": 0.10417214095938805, "grad_norm": 0.7424963403270372, "learning_rate": 4.976966747769668e-06, "loss": 0.6349, "step": 3568 }, { "epoch": 0.10420133718723541, "grad_norm": 0.9533483339809773, "learning_rate": 4.976804541768046e-06, "loss": 0.6821, "step": 3569 }, { "epoch": 0.10423053341508277, "grad_norm": 0.7679826007284007, "learning_rate": 4.976642335766424e-06, "loss": 0.6865, "step": 3570 }, { "epoch": 0.10425972964293014, "grad_norm": 0.777263743806736, "learning_rate": 4.976480129764802e-06, "loss": 0.6368, "step": 3571 }, { "epoch": 0.1042889258707775, "grad_norm": 0.7543476675469681, "learning_rate": 4.97631792376318e-06, "loss": 0.6454, "step": 3572 }, { "epoch": 0.10431812209862486, "grad_norm": 0.7277655867195097, "learning_rate": 4.976155717761558e-06, "loss": 0.6721, "step": 3573 }, { "epoch": 0.10434731832647222, "grad_norm": 0.7853906176837775, "learning_rate": 4.975993511759935e-06, "loss": 0.7285, "step": 3574 }, { "epoch": 0.10437651455431958, "grad_norm": 0.7690208855137346, "learning_rate": 4.975831305758313e-06, "loss": 0.6451, "step": 3575 }, { "epoch": 0.10440571078216694, "grad_norm": 0.7955775272425885, "learning_rate": 4.975669099756691e-06, "loss": 0.6946, "step": 3576 }, { "epoch": 0.1044349070100143, "grad_norm": 0.7563983028280353, "learning_rate": 4.975506893755069e-06, "loss": 0.7116, "step": 3577 }, { "epoch": 0.10446410323786166, "grad_norm": 0.774692113923207, "learning_rate": 4.975344687753447e-06, "loss": 0.6881, "step": 3578 }, { "epoch": 0.10449329946570902, "grad_norm": 0.727334308531265, "learning_rate": 4.975182481751825e-06, "loss": 0.6361, "step": 3579 }, { "epoch": 0.10452249569355639, "grad_norm": 0.7581108690921609, "learning_rate": 4.975020275750203e-06, "loss": 0.6556, "step": 3580 }, { "epoch": 0.10455169192140376, "grad_norm": 0.7071730485848625, "learning_rate": 4.974858069748581e-06, "loss": 0.6785, "step": 3581 }, { "epoch": 0.10458088814925112, "grad_norm": 0.7343012969859576, "learning_rate": 4.974695863746959e-06, "loss": 0.6513, "step": 3582 }, { "epoch": 0.10461008437709848, "grad_norm": 0.7809114874293916, "learning_rate": 4.9745336577453365e-06, "loss": 0.7496, "step": 3583 }, { "epoch": 0.10463928060494584, "grad_norm": 0.9025721019401757, "learning_rate": 4.9743714517437145e-06, "loss": 0.7859, "step": 3584 }, { "epoch": 0.1046684768327932, "grad_norm": 0.71652582552126, "learning_rate": 4.9742092457420925e-06, "loss": 0.673, "step": 3585 }, { "epoch": 0.10469767306064057, "grad_norm": 0.7436050858835411, "learning_rate": 4.9740470397404705e-06, "loss": 0.7017, "step": 3586 }, { "epoch": 0.10472686928848793, "grad_norm": 0.7389236654920049, "learning_rate": 4.973884833738849e-06, "loss": 0.6819, "step": 3587 }, { "epoch": 0.10475606551633529, "grad_norm": 0.853091861275772, "learning_rate": 4.9737226277372265e-06, "loss": 0.7369, "step": 3588 }, { "epoch": 0.10478526174418265, "grad_norm": 1.0172925429836897, "learning_rate": 4.9735604217356045e-06, "loss": 0.7604, "step": 3589 }, { "epoch": 0.10481445797203001, "grad_norm": 0.7715976646948122, "learning_rate": 4.9733982157339825e-06, "loss": 0.7335, "step": 3590 }, { "epoch": 0.10484365419987737, "grad_norm": 0.7170618744920806, "learning_rate": 4.9732360097323605e-06, "loss": 0.6151, "step": 3591 }, { "epoch": 0.10487285042772473, "grad_norm": 0.7371684955567439, "learning_rate": 4.9730738037307385e-06, "loss": 0.6399, "step": 3592 }, { "epoch": 0.1049020466555721, "grad_norm": 0.8498891560790627, "learning_rate": 4.9729115977291165e-06, "loss": 0.7579, "step": 3593 }, { "epoch": 0.10493124288341946, "grad_norm": 0.8145648554833446, "learning_rate": 4.9727493917274945e-06, "loss": 0.7744, "step": 3594 }, { "epoch": 0.10496043911126682, "grad_norm": 0.9573271133340614, "learning_rate": 4.9725871857258725e-06, "loss": 0.7518, "step": 3595 }, { "epoch": 0.10498963533911419, "grad_norm": 0.7275570041189503, "learning_rate": 4.9724249797242506e-06, "loss": 0.6504, "step": 3596 }, { "epoch": 0.10501883156696155, "grad_norm": 0.8589313200796559, "learning_rate": 4.972262773722628e-06, "loss": 0.7824, "step": 3597 }, { "epoch": 0.10504802779480892, "grad_norm": 0.8208901995344631, "learning_rate": 4.972100567721006e-06, "loss": 0.8513, "step": 3598 }, { "epoch": 0.10507722402265628, "grad_norm": 0.7793682788687739, "learning_rate": 4.971938361719384e-06, "loss": 0.672, "step": 3599 }, { "epoch": 0.10510642025050364, "grad_norm": 0.7433898980702254, "learning_rate": 4.971776155717762e-06, "loss": 0.6862, "step": 3600 }, { "epoch": 0.105135616478351, "grad_norm": 0.7191417097394673, "learning_rate": 4.97161394971614e-06, "loss": 0.6351, "step": 3601 }, { "epoch": 0.10516481270619836, "grad_norm": 0.8553783926296015, "learning_rate": 4.971451743714518e-06, "loss": 0.7286, "step": 3602 }, { "epoch": 0.10519400893404572, "grad_norm": 0.7312648939886733, "learning_rate": 4.971289537712896e-06, "loss": 0.6233, "step": 3603 }, { "epoch": 0.10522320516189308, "grad_norm": 0.7939202437930831, "learning_rate": 4.971127331711274e-06, "loss": 0.7295, "step": 3604 }, { "epoch": 0.10525240138974044, "grad_norm": 0.7277215461852857, "learning_rate": 4.970965125709652e-06, "loss": 0.6311, "step": 3605 }, { "epoch": 0.1052815976175878, "grad_norm": 0.7994470429626216, "learning_rate": 4.97080291970803e-06, "loss": 0.7289, "step": 3606 }, { "epoch": 0.10531079384543517, "grad_norm": 0.7320623372375308, "learning_rate": 4.970640713706408e-06, "loss": 0.6951, "step": 3607 }, { "epoch": 0.10533999007328253, "grad_norm": 0.7953257515220153, "learning_rate": 4.970478507704786e-06, "loss": 0.746, "step": 3608 }, { "epoch": 0.10536918630112989, "grad_norm": 0.8389650871313128, "learning_rate": 4.970316301703164e-06, "loss": 0.8444, "step": 3609 }, { "epoch": 0.10539838252897725, "grad_norm": 0.7886454450308785, "learning_rate": 4.970154095701542e-06, "loss": 0.7559, "step": 3610 }, { "epoch": 0.10542757875682462, "grad_norm": 0.7950771590573417, "learning_rate": 4.96999188969992e-06, "loss": 0.6658, "step": 3611 }, { "epoch": 0.10545677498467199, "grad_norm": 0.7481970975577216, "learning_rate": 4.969829683698297e-06, "loss": 0.7007, "step": 3612 }, { "epoch": 0.10548597121251935, "grad_norm": 0.8382164468027594, "learning_rate": 4.969667477696675e-06, "loss": 0.7856, "step": 3613 }, { "epoch": 0.10551516744036671, "grad_norm": 0.7681542602242308, "learning_rate": 4.969505271695053e-06, "loss": 0.7332, "step": 3614 }, { "epoch": 0.10554436366821407, "grad_norm": 0.7948978959550441, "learning_rate": 4.969343065693431e-06, "loss": 0.741, "step": 3615 }, { "epoch": 0.10557355989606143, "grad_norm": 0.760561896564949, "learning_rate": 4.969180859691809e-06, "loss": 0.6638, "step": 3616 }, { "epoch": 0.10560275612390879, "grad_norm": 0.8118155095279594, "learning_rate": 4.969018653690187e-06, "loss": 0.7648, "step": 3617 }, { "epoch": 0.10563195235175615, "grad_norm": 0.7329229508051117, "learning_rate": 4.968856447688565e-06, "loss": 0.54, "step": 3618 }, { "epoch": 0.10566114857960351, "grad_norm": 0.8404512087863617, "learning_rate": 4.968694241686943e-06, "loss": 0.7775, "step": 3619 }, { "epoch": 0.10569034480745088, "grad_norm": 0.9957122231768936, "learning_rate": 4.968532035685321e-06, "loss": 0.6692, "step": 3620 }, { "epoch": 0.10571954103529824, "grad_norm": 0.8022694756224987, "learning_rate": 4.968369829683698e-06, "loss": 0.7698, "step": 3621 }, { "epoch": 0.1057487372631456, "grad_norm": 0.8798934152182181, "learning_rate": 4.968207623682076e-06, "loss": 0.7347, "step": 3622 }, { "epoch": 0.10577793349099296, "grad_norm": 0.7472732975123119, "learning_rate": 4.968045417680454e-06, "loss": 0.7406, "step": 3623 }, { "epoch": 0.10580712971884032, "grad_norm": 0.8038726355657955, "learning_rate": 4.967883211678832e-06, "loss": 0.7856, "step": 3624 }, { "epoch": 0.10583632594668768, "grad_norm": 0.7451859175373335, "learning_rate": 4.967721005677211e-06, "loss": 0.6412, "step": 3625 }, { "epoch": 0.10586552217453506, "grad_norm": 0.7345462731392123, "learning_rate": 4.967558799675588e-06, "loss": 0.6251, "step": 3626 }, { "epoch": 0.10589471840238242, "grad_norm": 0.7481001027644675, "learning_rate": 4.967396593673966e-06, "loss": 0.7009, "step": 3627 }, { "epoch": 0.10592391463022978, "grad_norm": 0.8388511563811796, "learning_rate": 4.967234387672344e-06, "loss": 0.8259, "step": 3628 }, { "epoch": 0.10595311085807714, "grad_norm": 0.7781642414459804, "learning_rate": 4.967072181670722e-06, "loss": 0.7616, "step": 3629 }, { "epoch": 0.1059823070859245, "grad_norm": 0.7754354695796829, "learning_rate": 4.9669099756691e-06, "loss": 0.6077, "step": 3630 }, { "epoch": 0.10601150331377186, "grad_norm": 0.740349571450334, "learning_rate": 4.966747769667478e-06, "loss": 0.7436, "step": 3631 }, { "epoch": 0.10604069954161922, "grad_norm": 0.832109899627387, "learning_rate": 4.966585563665856e-06, "loss": 0.7986, "step": 3632 }, { "epoch": 0.10606989576946659, "grad_norm": 0.6739703039926721, "learning_rate": 4.966423357664234e-06, "loss": 0.5907, "step": 3633 }, { "epoch": 0.10609909199731395, "grad_norm": 1.0931857580356321, "learning_rate": 4.966261151662612e-06, "loss": 0.7298, "step": 3634 }, { "epoch": 0.10612828822516131, "grad_norm": 0.7507501155364152, "learning_rate": 4.9660989456609894e-06, "loss": 0.6859, "step": 3635 }, { "epoch": 0.10615748445300867, "grad_norm": 0.7866546541568905, "learning_rate": 4.9659367396593674e-06, "loss": 0.7273, "step": 3636 }, { "epoch": 0.10618668068085603, "grad_norm": 0.883299852135099, "learning_rate": 4.9657745336577454e-06, "loss": 0.7695, "step": 3637 }, { "epoch": 0.10621587690870339, "grad_norm": 0.7113596682933941, "learning_rate": 4.9656123276561235e-06, "loss": 0.633, "step": 3638 }, { "epoch": 0.10624507313655075, "grad_norm": 0.7387592829164449, "learning_rate": 4.9654501216545015e-06, "loss": 0.6696, "step": 3639 }, { "epoch": 0.10627426936439811, "grad_norm": 0.7255141207563613, "learning_rate": 4.9652879156528795e-06, "loss": 0.6896, "step": 3640 }, { "epoch": 0.10630346559224549, "grad_norm": 0.7681612153441834, "learning_rate": 4.9651257096512575e-06, "loss": 0.7042, "step": 3641 }, { "epoch": 0.10633266182009285, "grad_norm": 0.7528441487119109, "learning_rate": 4.9649635036496355e-06, "loss": 0.7324, "step": 3642 }, { "epoch": 0.10636185804794021, "grad_norm": 0.8066446412034288, "learning_rate": 4.9648012976480135e-06, "loss": 0.7106, "step": 3643 }, { "epoch": 0.10639105427578757, "grad_norm": 0.769048943685536, "learning_rate": 4.9646390916463915e-06, "loss": 0.6617, "step": 3644 }, { "epoch": 0.10642025050363493, "grad_norm": 0.8290871861800766, "learning_rate": 4.9644768856447695e-06, "loss": 0.7788, "step": 3645 }, { "epoch": 0.1064494467314823, "grad_norm": 0.7701742733024338, "learning_rate": 4.9643146796431475e-06, "loss": 0.7425, "step": 3646 }, { "epoch": 0.10647864295932966, "grad_norm": 0.7345413295394551, "learning_rate": 4.9641524736415255e-06, "loss": 0.6242, "step": 3647 }, { "epoch": 0.10650783918717702, "grad_norm": 0.7335227832559621, "learning_rate": 4.9639902676399035e-06, "loss": 0.6482, "step": 3648 }, { "epoch": 0.10653703541502438, "grad_norm": 0.8225087103912995, "learning_rate": 4.9638280616382815e-06, "loss": 0.7722, "step": 3649 }, { "epoch": 0.10656623164287174, "grad_norm": 0.8256498304726656, "learning_rate": 4.963665855636659e-06, "loss": 0.7914, "step": 3650 }, { "epoch": 0.1065954278707191, "grad_norm": 0.8022174594823415, "learning_rate": 4.963503649635037e-06, "loss": 0.7619, "step": 3651 }, { "epoch": 0.10662462409856646, "grad_norm": 0.9390609859814999, "learning_rate": 4.963341443633415e-06, "loss": 0.7834, "step": 3652 }, { "epoch": 0.10665382032641382, "grad_norm": 0.7794586028518636, "learning_rate": 4.963179237631793e-06, "loss": 0.7656, "step": 3653 }, { "epoch": 0.10668301655426118, "grad_norm": 0.7543864823977731, "learning_rate": 4.963017031630171e-06, "loss": 0.623, "step": 3654 }, { "epoch": 0.10671221278210855, "grad_norm": 0.8569608897460389, "learning_rate": 4.962854825628549e-06, "loss": 0.7937, "step": 3655 }, { "epoch": 0.10674140900995592, "grad_norm": 0.7622410743306309, "learning_rate": 4.962692619626927e-06, "loss": 0.6771, "step": 3656 }, { "epoch": 0.10677060523780328, "grad_norm": 0.8164371393973261, "learning_rate": 4.962530413625305e-06, "loss": 0.6604, "step": 3657 }, { "epoch": 0.10679980146565064, "grad_norm": 0.7801002053611776, "learning_rate": 4.962368207623683e-06, "loss": 0.771, "step": 3658 }, { "epoch": 0.106828997693498, "grad_norm": 0.7596344634550399, "learning_rate": 4.96220600162206e-06, "loss": 0.7136, "step": 3659 }, { "epoch": 0.10685819392134537, "grad_norm": 0.701383560626598, "learning_rate": 4.962043795620438e-06, "loss": 0.6592, "step": 3660 }, { "epoch": 0.10688739014919273, "grad_norm": 0.8659918737338294, "learning_rate": 4.961881589618816e-06, "loss": 0.7296, "step": 3661 }, { "epoch": 0.10691658637704009, "grad_norm": 0.7508376044454161, "learning_rate": 4.961719383617195e-06, "loss": 0.7078, "step": 3662 }, { "epoch": 0.10694578260488745, "grad_norm": 0.8001431195007632, "learning_rate": 4.961557177615573e-06, "loss": 0.681, "step": 3663 }, { "epoch": 0.10697497883273481, "grad_norm": 0.7250106087640454, "learning_rate": 4.96139497161395e-06, "loss": 0.6615, "step": 3664 }, { "epoch": 0.10700417506058217, "grad_norm": 0.7746491022396383, "learning_rate": 4.961232765612328e-06, "loss": 0.7028, "step": 3665 }, { "epoch": 0.10703337128842953, "grad_norm": 0.749523274887215, "learning_rate": 4.961070559610706e-06, "loss": 0.6721, "step": 3666 }, { "epoch": 0.1070625675162769, "grad_norm": 0.723121732856872, "learning_rate": 4.960908353609084e-06, "loss": 0.6018, "step": 3667 }, { "epoch": 0.10709176374412425, "grad_norm": 0.7559335554750987, "learning_rate": 4.960746147607462e-06, "loss": 0.671, "step": 3668 }, { "epoch": 0.10712095997197162, "grad_norm": 0.8501360223579423, "learning_rate": 4.96058394160584e-06, "loss": 0.8362, "step": 3669 }, { "epoch": 0.10715015619981898, "grad_norm": 0.981375822764253, "learning_rate": 4.960421735604218e-06, "loss": 0.7529, "step": 3670 }, { "epoch": 0.10717935242766635, "grad_norm": 0.9109207943055562, "learning_rate": 4.960259529602596e-06, "loss": 0.7248, "step": 3671 }, { "epoch": 0.10720854865551371, "grad_norm": 0.7337080585704417, "learning_rate": 4.960097323600974e-06, "loss": 0.6808, "step": 3672 }, { "epoch": 0.10723774488336107, "grad_norm": 0.7154375563772388, "learning_rate": 4.959935117599351e-06, "loss": 0.5937, "step": 3673 }, { "epoch": 0.10726694111120844, "grad_norm": 0.7467189107895811, "learning_rate": 4.959772911597729e-06, "loss": 0.7098, "step": 3674 }, { "epoch": 0.1072961373390558, "grad_norm": 0.7087782962303942, "learning_rate": 4.959610705596107e-06, "loss": 0.6333, "step": 3675 }, { "epoch": 0.10732533356690316, "grad_norm": 0.7632979940228732, "learning_rate": 4.959448499594485e-06, "loss": 0.714, "step": 3676 }, { "epoch": 0.10735452979475052, "grad_norm": 0.8278956159692188, "learning_rate": 4.959286293592863e-06, "loss": 0.7132, "step": 3677 }, { "epoch": 0.10738372602259788, "grad_norm": 0.7630305478490115, "learning_rate": 4.959124087591241e-06, "loss": 0.7402, "step": 3678 }, { "epoch": 0.10741292225044524, "grad_norm": 0.6997707956942375, "learning_rate": 4.958961881589619e-06, "loss": 0.6616, "step": 3679 }, { "epoch": 0.1074421184782926, "grad_norm": 0.8135215193000035, "learning_rate": 4.958799675587997e-06, "loss": 0.7826, "step": 3680 }, { "epoch": 0.10747131470613996, "grad_norm": 0.7317792925544234, "learning_rate": 4.958637469586375e-06, "loss": 0.6567, "step": 3681 }, { "epoch": 0.10750051093398733, "grad_norm": 0.812753583406758, "learning_rate": 4.958475263584753e-06, "loss": 0.8068, "step": 3682 }, { "epoch": 0.10752970716183469, "grad_norm": 0.7345433208404155, "learning_rate": 4.958313057583131e-06, "loss": 0.6116, "step": 3683 }, { "epoch": 0.10755890338968205, "grad_norm": 0.7797193854545487, "learning_rate": 4.958150851581509e-06, "loss": 0.7446, "step": 3684 }, { "epoch": 0.10758809961752941, "grad_norm": 0.6850205896552264, "learning_rate": 4.957988645579887e-06, "loss": 0.5863, "step": 3685 }, { "epoch": 0.10761729584537678, "grad_norm": 0.7802397931057349, "learning_rate": 4.957826439578265e-06, "loss": 0.6598, "step": 3686 }, { "epoch": 0.10764649207322415, "grad_norm": 0.734462739184445, "learning_rate": 4.957664233576643e-06, "loss": 0.5837, "step": 3687 }, { "epoch": 0.1076756883010715, "grad_norm": 0.7406607424790314, "learning_rate": 4.95750202757502e-06, "loss": 0.6414, "step": 3688 }, { "epoch": 0.10770488452891887, "grad_norm": 0.8644998902426858, "learning_rate": 4.957339821573398e-06, "loss": 0.7685, "step": 3689 }, { "epoch": 0.10773408075676623, "grad_norm": 0.9593238978323364, "learning_rate": 4.957177615571776e-06, "loss": 0.8671, "step": 3690 }, { "epoch": 0.10776327698461359, "grad_norm": 1.0530237269302658, "learning_rate": 4.957015409570154e-06, "loss": 0.7941, "step": 3691 }, { "epoch": 0.10779247321246095, "grad_norm": 0.7171952344170683, "learning_rate": 4.9568532035685324e-06, "loss": 0.5923, "step": 3692 }, { "epoch": 0.10782166944030831, "grad_norm": 0.7922370058281761, "learning_rate": 4.9566909975669104e-06, "loss": 0.7498, "step": 3693 }, { "epoch": 0.10785086566815567, "grad_norm": 0.7760128843697406, "learning_rate": 4.9565287915652884e-06, "loss": 0.6725, "step": 3694 }, { "epoch": 0.10788006189600303, "grad_norm": 0.7167214977092635, "learning_rate": 4.9563665855636665e-06, "loss": 0.6202, "step": 3695 }, { "epoch": 0.1079092581238504, "grad_norm": 0.8766568543921872, "learning_rate": 4.956204379562044e-06, "loss": 0.8463, "step": 3696 }, { "epoch": 0.10793845435169776, "grad_norm": 0.7551439792792846, "learning_rate": 4.956042173560422e-06, "loss": 0.6494, "step": 3697 }, { "epoch": 0.10796765057954512, "grad_norm": 0.7371508981032445, "learning_rate": 4.9558799675588e-06, "loss": 0.6943, "step": 3698 }, { "epoch": 0.10799684680739248, "grad_norm": 0.7427873907541833, "learning_rate": 4.955717761557178e-06, "loss": 0.6569, "step": 3699 }, { "epoch": 0.10802604303523984, "grad_norm": 0.7026723661498566, "learning_rate": 4.9555555555555565e-06, "loss": 0.6029, "step": 3700 }, { "epoch": 0.10805523926308722, "grad_norm": 0.7692130037998667, "learning_rate": 4.9553933495539345e-06, "loss": 0.7471, "step": 3701 }, { "epoch": 0.10808443549093458, "grad_norm": 0.8169780848431702, "learning_rate": 4.955231143552312e-06, "loss": 0.677, "step": 3702 }, { "epoch": 0.10811363171878194, "grad_norm": 0.7805999780042385, "learning_rate": 4.95506893755069e-06, "loss": 0.7541, "step": 3703 }, { "epoch": 0.1081428279466293, "grad_norm": 0.751444340623598, "learning_rate": 4.954906731549068e-06, "loss": 0.6779, "step": 3704 }, { "epoch": 0.10817202417447666, "grad_norm": 0.7433779806627749, "learning_rate": 4.954744525547446e-06, "loss": 0.6732, "step": 3705 }, { "epoch": 0.10820122040232402, "grad_norm": 0.7528400360113569, "learning_rate": 4.954582319545824e-06, "loss": 0.6906, "step": 3706 }, { "epoch": 0.10823041663017138, "grad_norm": 0.7598362994044252, "learning_rate": 4.954420113544202e-06, "loss": 0.7199, "step": 3707 }, { "epoch": 0.10825961285801874, "grad_norm": 0.6854547160525857, "learning_rate": 4.95425790754258e-06, "loss": 0.5865, "step": 3708 }, { "epoch": 0.1082888090858661, "grad_norm": 0.7990121086237828, "learning_rate": 4.954095701540958e-06, "loss": 0.754, "step": 3709 }, { "epoch": 0.10831800531371347, "grad_norm": 0.7232969344892665, "learning_rate": 4.953933495539336e-06, "loss": 0.6254, "step": 3710 }, { "epoch": 0.10834720154156083, "grad_norm": 0.7424448790871763, "learning_rate": 4.953771289537713e-06, "loss": 0.6436, "step": 3711 }, { "epoch": 0.10837639776940819, "grad_norm": 0.8089767442721608, "learning_rate": 4.953609083536091e-06, "loss": 0.7469, "step": 3712 }, { "epoch": 0.10840559399725555, "grad_norm": 0.7962047789078789, "learning_rate": 4.953446877534469e-06, "loss": 0.766, "step": 3713 }, { "epoch": 0.10843479022510291, "grad_norm": 0.9700289527546229, "learning_rate": 4.953284671532847e-06, "loss": 0.7012, "step": 3714 }, { "epoch": 0.10846398645295027, "grad_norm": 0.8288128544651823, "learning_rate": 4.953122465531225e-06, "loss": 0.759, "step": 3715 }, { "epoch": 0.10849318268079763, "grad_norm": 0.7542389580256008, "learning_rate": 4.952960259529603e-06, "loss": 0.6669, "step": 3716 }, { "epoch": 0.10852237890864501, "grad_norm": 1.250731246045663, "learning_rate": 4.952798053527981e-06, "loss": 0.6606, "step": 3717 }, { "epoch": 0.10855157513649237, "grad_norm": 0.7513203789003379, "learning_rate": 4.952635847526359e-06, "loss": 0.6286, "step": 3718 }, { "epoch": 0.10858077136433973, "grad_norm": 0.7767397755097858, "learning_rate": 4.952473641524737e-06, "loss": 0.7605, "step": 3719 }, { "epoch": 0.10860996759218709, "grad_norm": 0.8111777696656056, "learning_rate": 4.952311435523115e-06, "loss": 0.7854, "step": 3720 }, { "epoch": 0.10863916382003445, "grad_norm": 0.7773218499239064, "learning_rate": 4.952149229521493e-06, "loss": 0.7428, "step": 3721 }, { "epoch": 0.10866836004788182, "grad_norm": 0.8548381786613338, "learning_rate": 4.951987023519871e-06, "loss": 0.7595, "step": 3722 }, { "epoch": 0.10869755627572918, "grad_norm": 0.7200458243066753, "learning_rate": 4.951824817518249e-06, "loss": 0.6315, "step": 3723 }, { "epoch": 0.10872675250357654, "grad_norm": 0.7478236464200528, "learning_rate": 4.951662611516627e-06, "loss": 0.7076, "step": 3724 }, { "epoch": 0.1087559487314239, "grad_norm": 0.7722265060897437, "learning_rate": 4.951500405515005e-06, "loss": 0.7339, "step": 3725 }, { "epoch": 0.10878514495927126, "grad_norm": 0.7423117908854449, "learning_rate": 4.951338199513382e-06, "loss": 0.7205, "step": 3726 }, { "epoch": 0.10881434118711862, "grad_norm": 0.9351131558713089, "learning_rate": 4.95117599351176e-06, "loss": 0.7219, "step": 3727 }, { "epoch": 0.10884353741496598, "grad_norm": 0.806781037864041, "learning_rate": 4.951013787510138e-06, "loss": 0.7878, "step": 3728 }, { "epoch": 0.10887273364281334, "grad_norm": 0.7999278724195903, "learning_rate": 4.950851581508516e-06, "loss": 0.7099, "step": 3729 }, { "epoch": 0.1089019298706607, "grad_norm": 0.8509800711173434, "learning_rate": 4.950689375506894e-06, "loss": 0.7506, "step": 3730 }, { "epoch": 0.10893112609850807, "grad_norm": 0.832902947165063, "learning_rate": 4.950527169505272e-06, "loss": 0.7937, "step": 3731 }, { "epoch": 0.10896032232635544, "grad_norm": 0.7299962925297909, "learning_rate": 4.95036496350365e-06, "loss": 0.663, "step": 3732 }, { "epoch": 0.1089895185542028, "grad_norm": 0.7150911372121378, "learning_rate": 4.950202757502028e-06, "loss": 0.6583, "step": 3733 }, { "epoch": 0.10901871478205016, "grad_norm": 1.3090027561469313, "learning_rate": 4.950040551500405e-06, "loss": 0.7489, "step": 3734 }, { "epoch": 0.10904791100989752, "grad_norm": 0.8243522904009098, "learning_rate": 4.949878345498783e-06, "loss": 0.8264, "step": 3735 }, { "epoch": 0.10907710723774489, "grad_norm": 0.7641647755125885, "learning_rate": 4.949716139497161e-06, "loss": 0.7225, "step": 3736 }, { "epoch": 0.10910630346559225, "grad_norm": 0.8020709222575235, "learning_rate": 4.949553933495539e-06, "loss": 0.7804, "step": 3737 }, { "epoch": 0.10913549969343961, "grad_norm": 0.8687038909836136, "learning_rate": 4.949391727493918e-06, "loss": 0.7733, "step": 3738 }, { "epoch": 0.10916469592128697, "grad_norm": 0.7687398617294632, "learning_rate": 4.949229521492296e-06, "loss": 0.6984, "step": 3739 }, { "epoch": 0.10919389214913433, "grad_norm": 0.6937250605642864, "learning_rate": 4.949067315490673e-06, "loss": 0.6077, "step": 3740 }, { "epoch": 0.10922308837698169, "grad_norm": 0.78845078786611, "learning_rate": 4.948905109489051e-06, "loss": 0.7705, "step": 3741 }, { "epoch": 0.10925228460482905, "grad_norm": 0.697536043374346, "learning_rate": 4.948742903487429e-06, "loss": 0.5863, "step": 3742 }, { "epoch": 0.10928148083267641, "grad_norm": 0.8986124202722902, "learning_rate": 4.948580697485807e-06, "loss": 0.664, "step": 3743 }, { "epoch": 0.10931067706052378, "grad_norm": 0.7984618876412902, "learning_rate": 4.948418491484185e-06, "loss": 0.7014, "step": 3744 }, { "epoch": 0.10933987328837114, "grad_norm": 0.8860402897998123, "learning_rate": 4.948256285482563e-06, "loss": 0.8751, "step": 3745 }, { "epoch": 0.1093690695162185, "grad_norm": 1.0510529267381397, "learning_rate": 4.948094079480941e-06, "loss": 0.7911, "step": 3746 }, { "epoch": 0.10939826574406587, "grad_norm": 0.7755288702380684, "learning_rate": 4.947931873479319e-06, "loss": 0.7251, "step": 3747 }, { "epoch": 0.10942746197191323, "grad_norm": 0.8662531787396693, "learning_rate": 4.947769667477697e-06, "loss": 0.8387, "step": 3748 }, { "epoch": 0.1094566581997606, "grad_norm": 0.8575030768245563, "learning_rate": 4.947607461476075e-06, "loss": 0.7874, "step": 3749 }, { "epoch": 0.10948585442760796, "grad_norm": 0.7318258739661916, "learning_rate": 4.947445255474453e-06, "loss": 0.7431, "step": 3750 }, { "epoch": 0.10951505065545532, "grad_norm": 0.7977402856983159, "learning_rate": 4.947283049472831e-06, "loss": 0.7763, "step": 3751 }, { "epoch": 0.10954424688330268, "grad_norm": 0.8691178071522654, "learning_rate": 4.947120843471209e-06, "loss": 0.7696, "step": 3752 }, { "epoch": 0.10957344311115004, "grad_norm": 0.857845034812009, "learning_rate": 4.946958637469587e-06, "loss": 0.7871, "step": 3753 }, { "epoch": 0.1096026393389974, "grad_norm": 0.783732363338564, "learning_rate": 4.946796431467965e-06, "loss": 0.6871, "step": 3754 }, { "epoch": 0.10963183556684476, "grad_norm": 0.8160490284999165, "learning_rate": 4.946634225466343e-06, "loss": 0.7126, "step": 3755 }, { "epoch": 0.10966103179469212, "grad_norm": 0.8402932700670541, "learning_rate": 4.946472019464721e-06, "loss": 0.7177, "step": 3756 }, { "epoch": 0.10969022802253948, "grad_norm": 0.707987634765605, "learning_rate": 4.946309813463099e-06, "loss": 0.6331, "step": 3757 }, { "epoch": 0.10971942425038685, "grad_norm": 0.7749028489188933, "learning_rate": 4.946147607461477e-06, "loss": 0.7757, "step": 3758 }, { "epoch": 0.10974862047823421, "grad_norm": 0.7341304591511042, "learning_rate": 4.945985401459855e-06, "loss": 0.6621, "step": 3759 }, { "epoch": 0.10977781670608157, "grad_norm": 0.7721380510398351, "learning_rate": 4.945823195458233e-06, "loss": 0.6535, "step": 3760 }, { "epoch": 0.10980701293392893, "grad_norm": 0.8733542169929583, "learning_rate": 4.945660989456611e-06, "loss": 0.8511, "step": 3761 }, { "epoch": 0.1098362091617763, "grad_norm": 0.805962753885333, "learning_rate": 4.945498783454989e-06, "loss": 0.7257, "step": 3762 }, { "epoch": 0.10986540538962367, "grad_norm": 0.7505127667311016, "learning_rate": 4.945336577453367e-06, "loss": 0.74, "step": 3763 }, { "epoch": 0.10989460161747103, "grad_norm": 0.7801162859544899, "learning_rate": 4.945174371451744e-06, "loss": 0.6635, "step": 3764 }, { "epoch": 0.10992379784531839, "grad_norm": 0.7608435822922315, "learning_rate": 4.945012165450122e-06, "loss": 0.7096, "step": 3765 }, { "epoch": 0.10995299407316575, "grad_norm": 0.7375817261221496, "learning_rate": 4.9448499594485e-06, "loss": 0.6981, "step": 3766 }, { "epoch": 0.10998219030101311, "grad_norm": 0.7225510648685673, "learning_rate": 4.944687753446878e-06, "loss": 0.6486, "step": 3767 }, { "epoch": 0.11001138652886047, "grad_norm": 0.718781928298344, "learning_rate": 4.944525547445256e-06, "loss": 0.6226, "step": 3768 }, { "epoch": 0.11004058275670783, "grad_norm": 0.7921862863859351, "learning_rate": 4.944363341443634e-06, "loss": 0.7361, "step": 3769 }, { "epoch": 0.1100697789845552, "grad_norm": 0.8611328446008873, "learning_rate": 4.944201135442012e-06, "loss": 0.8062, "step": 3770 }, { "epoch": 0.11009897521240256, "grad_norm": 0.7621407004193602, "learning_rate": 4.94403892944039e-06, "loss": 0.6665, "step": 3771 }, { "epoch": 0.11012817144024992, "grad_norm": 0.7616812937859482, "learning_rate": 4.943876723438767e-06, "loss": 0.6821, "step": 3772 }, { "epoch": 0.11015736766809728, "grad_norm": 0.8321935125178455, "learning_rate": 4.943714517437145e-06, "loss": 0.8011, "step": 3773 }, { "epoch": 0.11018656389594464, "grad_norm": 0.7265524888372477, "learning_rate": 4.943552311435523e-06, "loss": 0.6109, "step": 3774 }, { "epoch": 0.110215760123792, "grad_norm": 0.7205527819296451, "learning_rate": 4.943390105433901e-06, "loss": 0.6458, "step": 3775 }, { "epoch": 0.11024495635163936, "grad_norm": 0.7775499059690899, "learning_rate": 4.94322789943228e-06, "loss": 0.752, "step": 3776 }, { "epoch": 0.11027415257948674, "grad_norm": 0.8529839799056178, "learning_rate": 4.943065693430658e-06, "loss": 0.7715, "step": 3777 }, { "epoch": 0.1103033488073341, "grad_norm": 0.8443853401408228, "learning_rate": 4.942903487429035e-06, "loss": 0.6606, "step": 3778 }, { "epoch": 0.11033254503518146, "grad_norm": 0.7200022658572134, "learning_rate": 4.942741281427413e-06, "loss": 0.6906, "step": 3779 }, { "epoch": 0.11036174126302882, "grad_norm": 0.7475040428291815, "learning_rate": 4.942579075425791e-06, "loss": 0.6201, "step": 3780 }, { "epoch": 0.11039093749087618, "grad_norm": 0.893818579995598, "learning_rate": 4.942416869424169e-06, "loss": 0.7838, "step": 3781 }, { "epoch": 0.11042013371872354, "grad_norm": 0.81191064004018, "learning_rate": 4.942254663422547e-06, "loss": 0.774, "step": 3782 }, { "epoch": 0.1104493299465709, "grad_norm": 0.8038356030584307, "learning_rate": 4.942092457420925e-06, "loss": 0.7941, "step": 3783 }, { "epoch": 0.11047852617441827, "grad_norm": 0.723839818537432, "learning_rate": 4.941930251419303e-06, "loss": 0.6248, "step": 3784 }, { "epoch": 0.11050772240226563, "grad_norm": 0.9621130275825386, "learning_rate": 4.941768045417681e-06, "loss": 0.7987, "step": 3785 }, { "epoch": 0.11053691863011299, "grad_norm": 0.7061165880109513, "learning_rate": 4.941605839416059e-06, "loss": 0.6521, "step": 3786 }, { "epoch": 0.11056611485796035, "grad_norm": 0.7066837025805309, "learning_rate": 4.941443633414436e-06, "loss": 0.6276, "step": 3787 }, { "epoch": 0.11059531108580771, "grad_norm": 0.7843985692289102, "learning_rate": 4.941281427412814e-06, "loss": 0.7182, "step": 3788 }, { "epoch": 0.11062450731365507, "grad_norm": 0.7969350931508237, "learning_rate": 4.941119221411192e-06, "loss": 0.6732, "step": 3789 }, { "epoch": 0.11065370354150243, "grad_norm": 0.9856976299073983, "learning_rate": 4.94095701540957e-06, "loss": 0.857, "step": 3790 }, { "epoch": 0.1106828997693498, "grad_norm": 0.753520933273425, "learning_rate": 4.940794809407948e-06, "loss": 0.6786, "step": 3791 }, { "epoch": 0.11071209599719717, "grad_norm": 0.7822340242030072, "learning_rate": 4.940632603406326e-06, "loss": 0.7262, "step": 3792 }, { "epoch": 0.11074129222504453, "grad_norm": 0.8068495228086172, "learning_rate": 4.940470397404704e-06, "loss": 0.7903, "step": 3793 }, { "epoch": 0.11077048845289189, "grad_norm": 0.7910967822445654, "learning_rate": 4.940308191403082e-06, "loss": 0.7358, "step": 3794 }, { "epoch": 0.11079968468073925, "grad_norm": 1.0722569640654407, "learning_rate": 4.94014598540146e-06, "loss": 0.8029, "step": 3795 }, { "epoch": 0.11082888090858661, "grad_norm": 1.154774247479475, "learning_rate": 4.939983779399838e-06, "loss": 0.722, "step": 3796 }, { "epoch": 0.11085807713643397, "grad_norm": 0.7794787547478905, "learning_rate": 4.939821573398216e-06, "loss": 0.7437, "step": 3797 }, { "epoch": 0.11088727336428134, "grad_norm": 0.8189331288565366, "learning_rate": 4.939659367396594e-06, "loss": 0.6809, "step": 3798 }, { "epoch": 0.1109164695921287, "grad_norm": 0.8540377025875105, "learning_rate": 4.939497161394972e-06, "loss": 0.7895, "step": 3799 }, { "epoch": 0.11094566581997606, "grad_norm": 0.7611791549379461, "learning_rate": 4.93933495539335e-06, "loss": 0.6579, "step": 3800 }, { "epoch": 0.11097486204782342, "grad_norm": 0.7394402719821774, "learning_rate": 4.939172749391728e-06, "loss": 0.6608, "step": 3801 }, { "epoch": 0.11100405827567078, "grad_norm": 0.7427503803045704, "learning_rate": 4.9390105433901056e-06, "loss": 0.659, "step": 3802 }, { "epoch": 0.11103325450351814, "grad_norm": 0.7358154468731163, "learning_rate": 4.9388483373884836e-06, "loss": 0.5873, "step": 3803 }, { "epoch": 0.1110624507313655, "grad_norm": 0.7904144107321139, "learning_rate": 4.9386861313868616e-06, "loss": 0.7837, "step": 3804 }, { "epoch": 0.11109164695921286, "grad_norm": 0.8603518361924999, "learning_rate": 4.9385239253852396e-06, "loss": 0.7309, "step": 3805 }, { "epoch": 0.11112084318706023, "grad_norm": 0.7590614066185853, "learning_rate": 4.938361719383618e-06, "loss": 0.6801, "step": 3806 }, { "epoch": 0.1111500394149076, "grad_norm": 0.9377772853005687, "learning_rate": 4.938199513381996e-06, "loss": 0.7019, "step": 3807 }, { "epoch": 0.11117923564275496, "grad_norm": 0.7364822600438515, "learning_rate": 4.938037307380374e-06, "loss": 0.6646, "step": 3808 }, { "epoch": 0.11120843187060232, "grad_norm": 0.8432106174655454, "learning_rate": 4.937875101378752e-06, "loss": 0.8068, "step": 3809 }, { "epoch": 0.11123762809844968, "grad_norm": 0.9216880449917065, "learning_rate": 4.937712895377129e-06, "loss": 0.7813, "step": 3810 }, { "epoch": 0.11126682432629705, "grad_norm": 0.7830406611021478, "learning_rate": 4.937550689375507e-06, "loss": 0.7157, "step": 3811 }, { "epoch": 0.1112960205541444, "grad_norm": 0.8748230527591389, "learning_rate": 4.937388483373885e-06, "loss": 0.7742, "step": 3812 }, { "epoch": 0.11132521678199177, "grad_norm": 0.7530077992221258, "learning_rate": 4.937226277372264e-06, "loss": 0.687, "step": 3813 }, { "epoch": 0.11135441300983913, "grad_norm": 0.79586017208559, "learning_rate": 4.937064071370642e-06, "loss": 0.6446, "step": 3814 }, { "epoch": 0.11138360923768649, "grad_norm": 0.7936289638606155, "learning_rate": 4.93690186536902e-06, "loss": 0.7021, "step": 3815 }, { "epoch": 0.11141280546553385, "grad_norm": 0.7490587665588948, "learning_rate": 4.936739659367397e-06, "loss": 0.6824, "step": 3816 }, { "epoch": 0.11144200169338121, "grad_norm": 0.7520210202779343, "learning_rate": 4.936577453365775e-06, "loss": 0.697, "step": 3817 }, { "epoch": 0.11147119792122857, "grad_norm": 0.8257751889162074, "learning_rate": 4.936415247364153e-06, "loss": 0.7263, "step": 3818 }, { "epoch": 0.11150039414907593, "grad_norm": 0.740512824517238, "learning_rate": 4.936253041362531e-06, "loss": 0.6639, "step": 3819 }, { "epoch": 0.1115295903769233, "grad_norm": 0.8622255810635661, "learning_rate": 4.936090835360909e-06, "loss": 0.7847, "step": 3820 }, { "epoch": 0.11155878660477066, "grad_norm": 0.757359097989274, "learning_rate": 4.935928629359287e-06, "loss": 0.7023, "step": 3821 }, { "epoch": 0.11158798283261803, "grad_norm": 0.7273126570988272, "learning_rate": 4.935766423357665e-06, "loss": 0.6496, "step": 3822 }, { "epoch": 0.1116171790604654, "grad_norm": 0.7677788463158982, "learning_rate": 4.935604217356043e-06, "loss": 0.7217, "step": 3823 }, { "epoch": 0.11164637528831275, "grad_norm": 0.7308119370578364, "learning_rate": 4.935442011354421e-06, "loss": 0.6125, "step": 3824 }, { "epoch": 0.11167557151616012, "grad_norm": 0.8303190441863542, "learning_rate": 4.935279805352798e-06, "loss": 0.7461, "step": 3825 }, { "epoch": 0.11170476774400748, "grad_norm": 0.7960840486832667, "learning_rate": 4.935117599351176e-06, "loss": 0.7361, "step": 3826 }, { "epoch": 0.11173396397185484, "grad_norm": 0.7170483290475929, "learning_rate": 4.934955393349554e-06, "loss": 0.6387, "step": 3827 }, { "epoch": 0.1117631601997022, "grad_norm": 0.8142608349951613, "learning_rate": 4.934793187347932e-06, "loss": 0.7216, "step": 3828 }, { "epoch": 0.11179235642754956, "grad_norm": 0.6973956951588492, "learning_rate": 4.93463098134631e-06, "loss": 0.5932, "step": 3829 }, { "epoch": 0.11182155265539692, "grad_norm": 0.7675539994538944, "learning_rate": 4.934468775344688e-06, "loss": 0.6996, "step": 3830 }, { "epoch": 0.11185074888324428, "grad_norm": 0.8258329903324302, "learning_rate": 4.934306569343066e-06, "loss": 0.7855, "step": 3831 }, { "epoch": 0.11187994511109164, "grad_norm": 0.7437155499248889, "learning_rate": 4.934144363341444e-06, "loss": 0.6635, "step": 3832 }, { "epoch": 0.111909141338939, "grad_norm": 0.7710910702269457, "learning_rate": 4.933982157339822e-06, "loss": 0.6406, "step": 3833 }, { "epoch": 0.11193833756678637, "grad_norm": 0.7787032175184339, "learning_rate": 4.9338199513382e-06, "loss": 0.7044, "step": 3834 }, { "epoch": 0.11196753379463373, "grad_norm": 0.7115166879298116, "learning_rate": 4.933657745336578e-06, "loss": 0.6211, "step": 3835 }, { "epoch": 0.11199673002248109, "grad_norm": 0.7404909299682172, "learning_rate": 4.933495539334956e-06, "loss": 0.6631, "step": 3836 }, { "epoch": 0.11202592625032846, "grad_norm": 0.7397348829071508, "learning_rate": 4.933333333333334e-06, "loss": 0.6112, "step": 3837 }, { "epoch": 0.11205512247817583, "grad_norm": 0.7851678964449931, "learning_rate": 4.933171127331712e-06, "loss": 0.7249, "step": 3838 }, { "epoch": 0.11208431870602319, "grad_norm": 0.7818269825710898, "learning_rate": 4.933008921330089e-06, "loss": 0.7006, "step": 3839 }, { "epoch": 0.11211351493387055, "grad_norm": 0.8760443729290966, "learning_rate": 4.932846715328467e-06, "loss": 0.7203, "step": 3840 }, { "epoch": 0.11214271116171791, "grad_norm": 0.8792206193380994, "learning_rate": 4.932684509326845e-06, "loss": 0.7701, "step": 3841 }, { "epoch": 0.11217190738956527, "grad_norm": 0.8906652483542485, "learning_rate": 4.932522303325223e-06, "loss": 0.7659, "step": 3842 }, { "epoch": 0.11220110361741263, "grad_norm": 0.8773988180293044, "learning_rate": 4.932360097323601e-06, "loss": 0.7473, "step": 3843 }, { "epoch": 0.11223029984525999, "grad_norm": 1.6368959762092004, "learning_rate": 4.932197891321979e-06, "loss": 0.6121, "step": 3844 }, { "epoch": 0.11225949607310735, "grad_norm": 0.8065121607929892, "learning_rate": 4.932035685320357e-06, "loss": 0.7118, "step": 3845 }, { "epoch": 0.11228869230095471, "grad_norm": 0.7534972527749838, "learning_rate": 4.931873479318735e-06, "loss": 0.6486, "step": 3846 }, { "epoch": 0.11231788852880208, "grad_norm": 0.8044344249394676, "learning_rate": 4.931711273317113e-06, "loss": 0.6148, "step": 3847 }, { "epoch": 0.11234708475664944, "grad_norm": 0.7207077269828451, "learning_rate": 4.9315490673154905e-06, "loss": 0.694, "step": 3848 }, { "epoch": 0.1123762809844968, "grad_norm": 0.8170452591965794, "learning_rate": 4.9313868613138685e-06, "loss": 0.7055, "step": 3849 }, { "epoch": 0.11240547721234416, "grad_norm": 0.7784935073084196, "learning_rate": 4.9312246553122465e-06, "loss": 0.7332, "step": 3850 }, { "epoch": 0.11243467344019152, "grad_norm": 0.8154660831113824, "learning_rate": 4.931062449310625e-06, "loss": 0.7232, "step": 3851 }, { "epoch": 0.1124638696680389, "grad_norm": 0.8225611284761766, "learning_rate": 4.930900243309003e-06, "loss": 0.7278, "step": 3852 }, { "epoch": 0.11249306589588626, "grad_norm": 1.1084411276915016, "learning_rate": 4.930738037307381e-06, "loss": 0.6918, "step": 3853 }, { "epoch": 0.11252226212373362, "grad_norm": 0.7318340826911265, "learning_rate": 4.9305758313057585e-06, "loss": 0.678, "step": 3854 }, { "epoch": 0.11255145835158098, "grad_norm": 0.7696787782768112, "learning_rate": 4.9304136253041365e-06, "loss": 0.6981, "step": 3855 }, { "epoch": 0.11258065457942834, "grad_norm": 0.8252699999465011, "learning_rate": 4.9302514193025145e-06, "loss": 0.7307, "step": 3856 }, { "epoch": 0.1126098508072757, "grad_norm": 0.8939531078070556, "learning_rate": 4.9300892133008925e-06, "loss": 0.712, "step": 3857 }, { "epoch": 0.11263904703512306, "grad_norm": 0.7836424252404172, "learning_rate": 4.9299270072992705e-06, "loss": 0.7143, "step": 3858 }, { "epoch": 0.11266824326297042, "grad_norm": 0.7395177002496729, "learning_rate": 4.9297648012976486e-06, "loss": 0.6597, "step": 3859 }, { "epoch": 0.11269743949081779, "grad_norm": 0.9982197412908096, "learning_rate": 4.9296025952960266e-06, "loss": 0.8442, "step": 3860 }, { "epoch": 0.11272663571866515, "grad_norm": 0.778871908016368, "learning_rate": 4.9294403892944046e-06, "loss": 0.7334, "step": 3861 }, { "epoch": 0.11275583194651251, "grad_norm": 0.9533108204755455, "learning_rate": 4.9292781832927826e-06, "loss": 0.771, "step": 3862 }, { "epoch": 0.11278502817435987, "grad_norm": 0.7136321346259362, "learning_rate": 4.92911597729116e-06, "loss": 0.6518, "step": 3863 }, { "epoch": 0.11281422440220723, "grad_norm": 0.6735951471352513, "learning_rate": 4.928953771289538e-06, "loss": 0.5662, "step": 3864 }, { "epoch": 0.11284342063005459, "grad_norm": 0.7378347290112348, "learning_rate": 4.928791565287916e-06, "loss": 0.6889, "step": 3865 }, { "epoch": 0.11287261685790195, "grad_norm": 0.7722357201150813, "learning_rate": 4.928629359286294e-06, "loss": 0.7476, "step": 3866 }, { "epoch": 0.11290181308574933, "grad_norm": 0.7573440074208507, "learning_rate": 4.928467153284672e-06, "loss": 0.6713, "step": 3867 }, { "epoch": 0.11293100931359669, "grad_norm": 0.8495800989981509, "learning_rate": 4.92830494728305e-06, "loss": 0.8873, "step": 3868 }, { "epoch": 0.11296020554144405, "grad_norm": 0.7744081313515319, "learning_rate": 4.928142741281428e-06, "loss": 0.702, "step": 3869 }, { "epoch": 0.11298940176929141, "grad_norm": 0.7958738788950078, "learning_rate": 4.927980535279806e-06, "loss": 0.7259, "step": 3870 }, { "epoch": 0.11301859799713877, "grad_norm": 0.6530089244599984, "learning_rate": 4.927818329278184e-06, "loss": 0.5551, "step": 3871 }, { "epoch": 0.11304779422498613, "grad_norm": 0.8100790298244824, "learning_rate": 4.927656123276562e-06, "loss": 0.7342, "step": 3872 }, { "epoch": 0.1130769904528335, "grad_norm": 0.7732578985277111, "learning_rate": 4.92749391727494e-06, "loss": 0.7004, "step": 3873 }, { "epoch": 0.11310618668068086, "grad_norm": 0.8271364073234476, "learning_rate": 4.927331711273318e-06, "loss": 0.7381, "step": 3874 }, { "epoch": 0.11313538290852822, "grad_norm": 0.7540641180932702, "learning_rate": 4.927169505271696e-06, "loss": 0.7013, "step": 3875 }, { "epoch": 0.11316457913637558, "grad_norm": 0.8984916504318341, "learning_rate": 4.927007299270074e-06, "loss": 0.7454, "step": 3876 }, { "epoch": 0.11319377536422294, "grad_norm": 0.8105489750208869, "learning_rate": 4.926845093268451e-06, "loss": 0.7315, "step": 3877 }, { "epoch": 0.1132229715920703, "grad_norm": 0.7420045060429009, "learning_rate": 4.926682887266829e-06, "loss": 0.6004, "step": 3878 }, { "epoch": 0.11325216781991766, "grad_norm": 0.7576663567040389, "learning_rate": 4.926520681265207e-06, "loss": 0.7023, "step": 3879 }, { "epoch": 0.11328136404776502, "grad_norm": 0.7716573506481804, "learning_rate": 4.926358475263585e-06, "loss": 0.7197, "step": 3880 }, { "epoch": 0.11331056027561238, "grad_norm": 0.7229370352073551, "learning_rate": 4.926196269261963e-06, "loss": 0.6981, "step": 3881 }, { "epoch": 0.11333975650345976, "grad_norm": 0.8245411166288374, "learning_rate": 4.926034063260341e-06, "loss": 0.7434, "step": 3882 }, { "epoch": 0.11336895273130712, "grad_norm": 0.7015324360858871, "learning_rate": 4.925871857258719e-06, "loss": 0.6223, "step": 3883 }, { "epoch": 0.11339814895915448, "grad_norm": 0.7324017913273165, "learning_rate": 4.925709651257097e-06, "loss": 0.6918, "step": 3884 }, { "epoch": 0.11342734518700184, "grad_norm": 0.7664635740038005, "learning_rate": 4.925547445255475e-06, "loss": 0.6294, "step": 3885 }, { "epoch": 0.1134565414148492, "grad_norm": 0.8061389237055251, "learning_rate": 4.925385239253852e-06, "loss": 0.768, "step": 3886 }, { "epoch": 0.11348573764269657, "grad_norm": 0.9125441585530307, "learning_rate": 4.92522303325223e-06, "loss": 0.7458, "step": 3887 }, { "epoch": 0.11351493387054393, "grad_norm": 0.7912395805666507, "learning_rate": 4.925060827250608e-06, "loss": 0.6683, "step": 3888 }, { "epoch": 0.11354413009839129, "grad_norm": 0.8108907347402656, "learning_rate": 4.924898621248987e-06, "loss": 0.7453, "step": 3889 }, { "epoch": 0.11357332632623865, "grad_norm": 0.806092917168607, "learning_rate": 4.924736415247365e-06, "loss": 0.7865, "step": 3890 }, { "epoch": 0.11360252255408601, "grad_norm": 0.7900599328482677, "learning_rate": 4.924574209245743e-06, "loss": 0.7827, "step": 3891 }, { "epoch": 0.11363171878193337, "grad_norm": 0.7596885337185122, "learning_rate": 4.92441200324412e-06, "loss": 0.6921, "step": 3892 }, { "epoch": 0.11366091500978073, "grad_norm": 0.7294525507899775, "learning_rate": 4.924249797242498e-06, "loss": 0.69, "step": 3893 }, { "epoch": 0.1136901112376281, "grad_norm": 0.7455303051280333, "learning_rate": 4.924087591240876e-06, "loss": 0.6406, "step": 3894 }, { "epoch": 0.11371930746547546, "grad_norm": 0.7337877163568123, "learning_rate": 4.923925385239254e-06, "loss": 0.7361, "step": 3895 }, { "epoch": 0.11374850369332282, "grad_norm": 0.7305522747492808, "learning_rate": 4.923763179237632e-06, "loss": 0.6389, "step": 3896 }, { "epoch": 0.11377769992117019, "grad_norm": 0.850303871965617, "learning_rate": 4.92360097323601e-06, "loss": 0.7759, "step": 3897 }, { "epoch": 0.11380689614901755, "grad_norm": 0.8330602823544118, "learning_rate": 4.923438767234388e-06, "loss": 0.7147, "step": 3898 }, { "epoch": 0.11383609237686491, "grad_norm": 0.8294949981250367, "learning_rate": 4.923276561232766e-06, "loss": 0.7801, "step": 3899 }, { "epoch": 0.11386528860471228, "grad_norm": 0.7837188704260869, "learning_rate": 4.923114355231144e-06, "loss": 0.6955, "step": 3900 }, { "epoch": 0.11389448483255964, "grad_norm": 0.867509440990648, "learning_rate": 4.9229521492295215e-06, "loss": 0.8449, "step": 3901 }, { "epoch": 0.113923681060407, "grad_norm": 0.7131359721995608, "learning_rate": 4.9227899432278995e-06, "loss": 0.6639, "step": 3902 }, { "epoch": 0.11395287728825436, "grad_norm": 0.7635692231119848, "learning_rate": 4.9226277372262775e-06, "loss": 0.6977, "step": 3903 }, { "epoch": 0.11398207351610172, "grad_norm": 0.7257938720789328, "learning_rate": 4.9224655312246555e-06, "loss": 0.6693, "step": 3904 }, { "epoch": 0.11401126974394908, "grad_norm": 0.767173874395628, "learning_rate": 4.9223033252230335e-06, "loss": 0.6758, "step": 3905 }, { "epoch": 0.11404046597179644, "grad_norm": 0.9667158153059726, "learning_rate": 4.9221411192214115e-06, "loss": 0.7006, "step": 3906 }, { "epoch": 0.1140696621996438, "grad_norm": 0.7688475766972185, "learning_rate": 4.9219789132197895e-06, "loss": 0.7535, "step": 3907 }, { "epoch": 0.11409885842749116, "grad_norm": 0.853492907172691, "learning_rate": 4.9218167072181675e-06, "loss": 0.8362, "step": 3908 }, { "epoch": 0.11412805465533853, "grad_norm": 0.9957109064389417, "learning_rate": 4.9216545012165455e-06, "loss": 0.7055, "step": 3909 }, { "epoch": 0.11415725088318589, "grad_norm": 0.7570046846111969, "learning_rate": 4.9214922952149235e-06, "loss": 0.7524, "step": 3910 }, { "epoch": 0.11418644711103325, "grad_norm": 0.8456920781062055, "learning_rate": 4.9213300892133015e-06, "loss": 0.7682, "step": 3911 }, { "epoch": 0.11421564333888061, "grad_norm": 0.8594877363115527, "learning_rate": 4.9211678832116795e-06, "loss": 0.7789, "step": 3912 }, { "epoch": 0.11424483956672798, "grad_norm": 0.8431609364700955, "learning_rate": 4.9210056772100575e-06, "loss": 0.7243, "step": 3913 }, { "epoch": 0.11427403579457535, "grad_norm": 0.7697897060812245, "learning_rate": 4.9208434712084355e-06, "loss": 0.6287, "step": 3914 }, { "epoch": 0.11430323202242271, "grad_norm": 0.782548074319389, "learning_rate": 4.920681265206813e-06, "loss": 0.7084, "step": 3915 }, { "epoch": 0.11433242825027007, "grad_norm": 1.3769929658507742, "learning_rate": 4.920519059205191e-06, "loss": 0.7731, "step": 3916 }, { "epoch": 0.11436162447811743, "grad_norm": 0.8028385193804904, "learning_rate": 4.920356853203569e-06, "loss": 0.7711, "step": 3917 }, { "epoch": 0.11439082070596479, "grad_norm": 0.7885545757372876, "learning_rate": 4.920194647201947e-06, "loss": 0.7539, "step": 3918 }, { "epoch": 0.11442001693381215, "grad_norm": 0.7451004163557987, "learning_rate": 4.920032441200325e-06, "loss": 0.7103, "step": 3919 }, { "epoch": 0.11444921316165951, "grad_norm": 0.8063496509539025, "learning_rate": 4.919870235198703e-06, "loss": 0.7877, "step": 3920 }, { "epoch": 0.11447840938950687, "grad_norm": 0.9206042868494542, "learning_rate": 4.919708029197081e-06, "loss": 0.8417, "step": 3921 }, { "epoch": 0.11450760561735424, "grad_norm": 0.7412788172091205, "learning_rate": 4.919545823195459e-06, "loss": 0.6908, "step": 3922 }, { "epoch": 0.1145368018452016, "grad_norm": 0.7725899627511624, "learning_rate": 4.919383617193837e-06, "loss": 0.7833, "step": 3923 }, { "epoch": 0.11456599807304896, "grad_norm": 0.8195292960609353, "learning_rate": 4.919221411192214e-06, "loss": 0.7252, "step": 3924 }, { "epoch": 0.11459519430089632, "grad_norm": 0.6491208672752918, "learning_rate": 4.919059205190592e-06, "loss": 0.6044, "step": 3925 }, { "epoch": 0.11462439052874368, "grad_norm": 0.8010233459211437, "learning_rate": 4.91889699918897e-06, "loss": 0.7418, "step": 3926 }, { "epoch": 0.11465358675659104, "grad_norm": 1.0954391891176734, "learning_rate": 4.918734793187349e-06, "loss": 0.7704, "step": 3927 }, { "epoch": 0.11468278298443842, "grad_norm": 0.7289360235618821, "learning_rate": 4.918572587185727e-06, "loss": 0.6878, "step": 3928 }, { "epoch": 0.11471197921228578, "grad_norm": 0.7956461450816115, "learning_rate": 4.918410381184105e-06, "loss": 0.7737, "step": 3929 }, { "epoch": 0.11474117544013314, "grad_norm": 0.789475046316696, "learning_rate": 4.918248175182482e-06, "loss": 0.7648, "step": 3930 }, { "epoch": 0.1147703716679805, "grad_norm": 0.8310798813936658, "learning_rate": 4.91808596918086e-06, "loss": 0.7346, "step": 3931 }, { "epoch": 0.11479956789582786, "grad_norm": 0.8576410339459627, "learning_rate": 4.917923763179238e-06, "loss": 0.6568, "step": 3932 }, { "epoch": 0.11482876412367522, "grad_norm": 0.7749055954159569, "learning_rate": 4.917761557177616e-06, "loss": 0.7462, "step": 3933 }, { "epoch": 0.11485796035152258, "grad_norm": 0.7950613520699071, "learning_rate": 4.917599351175994e-06, "loss": 0.7576, "step": 3934 }, { "epoch": 0.11488715657936995, "grad_norm": 0.8984500546200895, "learning_rate": 4.917437145174372e-06, "loss": 0.8521, "step": 3935 }, { "epoch": 0.1149163528072173, "grad_norm": 0.755204039968741, "learning_rate": 4.91727493917275e-06, "loss": 0.6996, "step": 3936 }, { "epoch": 0.11494554903506467, "grad_norm": 0.7664485581810242, "learning_rate": 4.917112733171128e-06, "loss": 0.7384, "step": 3937 }, { "epoch": 0.11497474526291203, "grad_norm": 0.9224966207421496, "learning_rate": 4.916950527169506e-06, "loss": 0.7029, "step": 3938 }, { "epoch": 0.11500394149075939, "grad_norm": 0.7721556901570025, "learning_rate": 4.916788321167883e-06, "loss": 0.6602, "step": 3939 }, { "epoch": 0.11503313771860675, "grad_norm": 0.7533998833624349, "learning_rate": 4.916626115166261e-06, "loss": 0.7149, "step": 3940 }, { "epoch": 0.11506233394645411, "grad_norm": 0.7248831933277134, "learning_rate": 4.916463909164639e-06, "loss": 0.6956, "step": 3941 }, { "epoch": 0.11509153017430147, "grad_norm": 0.8379029049056723, "learning_rate": 4.916301703163017e-06, "loss": 0.713, "step": 3942 }, { "epoch": 0.11512072640214885, "grad_norm": 0.6999511782984452, "learning_rate": 4.916139497161395e-06, "loss": 0.6358, "step": 3943 }, { "epoch": 0.11514992262999621, "grad_norm": 0.8636747309488637, "learning_rate": 4.915977291159773e-06, "loss": 0.7336, "step": 3944 }, { "epoch": 0.11517911885784357, "grad_norm": 0.9204331088051546, "learning_rate": 4.915815085158151e-06, "loss": 0.7419, "step": 3945 }, { "epoch": 0.11520831508569093, "grad_norm": 0.7240525398857053, "learning_rate": 4.915652879156529e-06, "loss": 0.6116, "step": 3946 }, { "epoch": 0.1152375113135383, "grad_norm": 0.8388329443136794, "learning_rate": 4.915490673154907e-06, "loss": 0.6977, "step": 3947 }, { "epoch": 0.11526670754138565, "grad_norm": 0.8614338532729652, "learning_rate": 4.915328467153285e-06, "loss": 0.7744, "step": 3948 }, { "epoch": 0.11529590376923302, "grad_norm": 0.8608716888494732, "learning_rate": 4.915166261151663e-06, "loss": 0.8051, "step": 3949 }, { "epoch": 0.11532509999708038, "grad_norm": 0.8032683980384733, "learning_rate": 4.915004055150041e-06, "loss": 0.7065, "step": 3950 }, { "epoch": 0.11535429622492774, "grad_norm": 0.85335893193196, "learning_rate": 4.914841849148419e-06, "loss": 0.862, "step": 3951 }, { "epoch": 0.1153834924527751, "grad_norm": 0.735573968599495, "learning_rate": 4.914679643146797e-06, "loss": 0.5941, "step": 3952 }, { "epoch": 0.11541268868062246, "grad_norm": 0.7738068343237849, "learning_rate": 4.914517437145174e-06, "loss": 0.7293, "step": 3953 }, { "epoch": 0.11544188490846982, "grad_norm": 0.7384444423380429, "learning_rate": 4.914355231143552e-06, "loss": 0.6477, "step": 3954 }, { "epoch": 0.11547108113631718, "grad_norm": 0.7759539994966583, "learning_rate": 4.9141930251419304e-06, "loss": 0.7241, "step": 3955 }, { "epoch": 0.11550027736416454, "grad_norm": 0.7141650131165511, "learning_rate": 4.9140308191403084e-06, "loss": 0.6049, "step": 3956 }, { "epoch": 0.1155294735920119, "grad_norm": 0.7860227196809163, "learning_rate": 4.9138686131386864e-06, "loss": 0.6849, "step": 3957 }, { "epoch": 0.11555866981985928, "grad_norm": 0.7665813992776903, "learning_rate": 4.9137064071370645e-06, "loss": 0.6356, "step": 3958 }, { "epoch": 0.11558786604770664, "grad_norm": 0.6919977377919581, "learning_rate": 4.9135442011354425e-06, "loss": 0.6421, "step": 3959 }, { "epoch": 0.115617062275554, "grad_norm": 0.7937675188376103, "learning_rate": 4.9133819951338205e-06, "loss": 0.6897, "step": 3960 }, { "epoch": 0.11564625850340136, "grad_norm": 0.8554932505819056, "learning_rate": 4.9132197891321985e-06, "loss": 0.7994, "step": 3961 }, { "epoch": 0.11567545473124873, "grad_norm": 0.8399719832841904, "learning_rate": 4.913057583130576e-06, "loss": 0.8129, "step": 3962 }, { "epoch": 0.11570465095909609, "grad_norm": 0.806124252380254, "learning_rate": 4.912895377128954e-06, "loss": 0.8348, "step": 3963 }, { "epoch": 0.11573384718694345, "grad_norm": 0.8189642627499079, "learning_rate": 4.9127331711273325e-06, "loss": 0.7402, "step": 3964 }, { "epoch": 0.11576304341479081, "grad_norm": 0.7682606408154377, "learning_rate": 4.9125709651257105e-06, "loss": 0.7024, "step": 3965 }, { "epoch": 0.11579223964263817, "grad_norm": 0.8177582261042193, "learning_rate": 4.9124087591240885e-06, "loss": 0.8141, "step": 3966 }, { "epoch": 0.11582143587048553, "grad_norm": 0.7775312545859663, "learning_rate": 4.9122465531224665e-06, "loss": 0.7615, "step": 3967 }, { "epoch": 0.11585063209833289, "grad_norm": 0.7510923958077476, "learning_rate": 4.912084347120844e-06, "loss": 0.6883, "step": 3968 }, { "epoch": 0.11587982832618025, "grad_norm": 0.7457105657707145, "learning_rate": 4.911922141119222e-06, "loss": 0.7076, "step": 3969 }, { "epoch": 0.11590902455402761, "grad_norm": 0.8277215062913045, "learning_rate": 4.9117599351176e-06, "loss": 0.834, "step": 3970 }, { "epoch": 0.11593822078187498, "grad_norm": 0.7833443163056073, "learning_rate": 4.911597729115978e-06, "loss": 0.7914, "step": 3971 }, { "epoch": 0.11596741700972234, "grad_norm": 0.7633137657997568, "learning_rate": 4.911435523114356e-06, "loss": 0.7246, "step": 3972 }, { "epoch": 0.11599661323756971, "grad_norm": 0.7686083792526957, "learning_rate": 4.911273317112734e-06, "loss": 0.6917, "step": 3973 }, { "epoch": 0.11602580946541707, "grad_norm": 0.8542076774159658, "learning_rate": 4.911111111111112e-06, "loss": 0.7716, "step": 3974 }, { "epoch": 0.11605500569326443, "grad_norm": 0.7243985378332536, "learning_rate": 4.91094890510949e-06, "loss": 0.6559, "step": 3975 }, { "epoch": 0.1160842019211118, "grad_norm": 0.8442915321484938, "learning_rate": 4.910786699107868e-06, "loss": 0.8065, "step": 3976 }, { "epoch": 0.11611339814895916, "grad_norm": 0.7900464196004431, "learning_rate": 4.910624493106245e-06, "loss": 0.7786, "step": 3977 }, { "epoch": 0.11614259437680652, "grad_norm": 0.707725531578969, "learning_rate": 4.910462287104623e-06, "loss": 0.6577, "step": 3978 }, { "epoch": 0.11617179060465388, "grad_norm": 0.7895098860678004, "learning_rate": 4.910300081103001e-06, "loss": 0.8075, "step": 3979 }, { "epoch": 0.11620098683250124, "grad_norm": 0.8843039181023019, "learning_rate": 4.910137875101379e-06, "loss": 0.8064, "step": 3980 }, { "epoch": 0.1162301830603486, "grad_norm": 1.8435768987456158, "learning_rate": 4.909975669099757e-06, "loss": 0.6994, "step": 3981 }, { "epoch": 0.11625937928819596, "grad_norm": 0.8045333391079622, "learning_rate": 4.909813463098135e-06, "loss": 0.7995, "step": 3982 }, { "epoch": 0.11628857551604332, "grad_norm": 0.7947936284739548, "learning_rate": 4.909651257096513e-06, "loss": 0.7487, "step": 3983 }, { "epoch": 0.11631777174389069, "grad_norm": 0.8302869142464017, "learning_rate": 4.909489051094891e-06, "loss": 0.7038, "step": 3984 }, { "epoch": 0.11634696797173805, "grad_norm": 0.7736478326521249, "learning_rate": 4.909326845093269e-06, "loss": 0.6934, "step": 3985 }, { "epoch": 0.11637616419958541, "grad_norm": 0.7525492257655854, "learning_rate": 4.909164639091647e-06, "loss": 0.7071, "step": 3986 }, { "epoch": 0.11640536042743277, "grad_norm": 0.680005129870568, "learning_rate": 4.909002433090025e-06, "loss": 0.6086, "step": 3987 }, { "epoch": 0.11643455665528014, "grad_norm": 0.7427631861273293, "learning_rate": 4.908840227088403e-06, "loss": 0.6802, "step": 3988 }, { "epoch": 0.1164637528831275, "grad_norm": 0.7784740444102503, "learning_rate": 4.908678021086781e-06, "loss": 0.724, "step": 3989 }, { "epoch": 0.11649294911097487, "grad_norm": 0.8054922693358764, "learning_rate": 4.908515815085159e-06, "loss": 0.6663, "step": 3990 }, { "epoch": 0.11652214533882223, "grad_norm": 1.6177171215524844, "learning_rate": 4.908353609083536e-06, "loss": 0.753, "step": 3991 }, { "epoch": 0.11655134156666959, "grad_norm": 0.7257449222560542, "learning_rate": 4.908191403081914e-06, "loss": 0.5953, "step": 3992 }, { "epoch": 0.11658053779451695, "grad_norm": 0.776092590035302, "learning_rate": 4.908029197080292e-06, "loss": 0.7068, "step": 3993 }, { "epoch": 0.11660973402236431, "grad_norm": 0.7423111727326359, "learning_rate": 4.90786699107867e-06, "loss": 0.6137, "step": 3994 }, { "epoch": 0.11663893025021167, "grad_norm": 0.8677285025105163, "learning_rate": 4.907704785077048e-06, "loss": 0.6826, "step": 3995 }, { "epoch": 0.11666812647805903, "grad_norm": 0.6933870964130797, "learning_rate": 4.907542579075426e-06, "loss": 0.6216, "step": 3996 }, { "epoch": 0.1166973227059064, "grad_norm": 1.0498190683378226, "learning_rate": 4.907380373073804e-06, "loss": 0.8136, "step": 3997 }, { "epoch": 0.11672651893375376, "grad_norm": 0.7026269663857249, "learning_rate": 4.907218167072182e-06, "loss": 0.6402, "step": 3998 }, { "epoch": 0.11675571516160112, "grad_norm": 0.7393700692771034, "learning_rate": 4.90705596107056e-06, "loss": 0.6695, "step": 3999 }, { "epoch": 0.11678491138944848, "grad_norm": 0.7629270109268579, "learning_rate": 4.906893755068937e-06, "loss": 0.7144, "step": 4000 }, { "epoch": 0.11681410761729584, "grad_norm": 0.8009526509350989, "learning_rate": 4.906731549067315e-06, "loss": 0.7466, "step": 4001 }, { "epoch": 0.1168433038451432, "grad_norm": 0.7780739884369845, "learning_rate": 4.906569343065694e-06, "loss": 0.7627, "step": 4002 }, { "epoch": 0.11687250007299058, "grad_norm": 0.6857652340761823, "learning_rate": 4.906407137064072e-06, "loss": 0.579, "step": 4003 }, { "epoch": 0.11690169630083794, "grad_norm": 0.77114534589214, "learning_rate": 4.90624493106245e-06, "loss": 0.6386, "step": 4004 }, { "epoch": 0.1169308925286853, "grad_norm": 0.7746631836757292, "learning_rate": 4.906082725060828e-06, "loss": 0.6754, "step": 4005 }, { "epoch": 0.11696008875653266, "grad_norm": 1.0712616473380037, "learning_rate": 4.905920519059205e-06, "loss": 0.8131, "step": 4006 }, { "epoch": 0.11698928498438002, "grad_norm": 0.7495414873672334, "learning_rate": 4.905758313057583e-06, "loss": 0.7198, "step": 4007 }, { "epoch": 0.11701848121222738, "grad_norm": 0.7568134997035924, "learning_rate": 4.905596107055961e-06, "loss": 0.7494, "step": 4008 }, { "epoch": 0.11704767744007474, "grad_norm": 0.7938348547608137, "learning_rate": 4.905433901054339e-06, "loss": 0.6963, "step": 4009 }, { "epoch": 0.1170768736679221, "grad_norm": 0.7644450454483624, "learning_rate": 4.905271695052717e-06, "loss": 0.6548, "step": 4010 }, { "epoch": 0.11710606989576947, "grad_norm": 0.7288464882456539, "learning_rate": 4.905109489051095e-06, "loss": 0.6689, "step": 4011 }, { "epoch": 0.11713526612361683, "grad_norm": 1.007092318123142, "learning_rate": 4.9049472830494734e-06, "loss": 0.6763, "step": 4012 }, { "epoch": 0.11716446235146419, "grad_norm": 0.7985269673844624, "learning_rate": 4.9047850770478514e-06, "loss": 0.7561, "step": 4013 }, { "epoch": 0.11719365857931155, "grad_norm": 0.8403273059177435, "learning_rate": 4.9046228710462294e-06, "loss": 0.7699, "step": 4014 }, { "epoch": 0.11722285480715891, "grad_norm": 0.7506812252734494, "learning_rate": 4.904460665044607e-06, "loss": 0.7017, "step": 4015 }, { "epoch": 0.11725205103500627, "grad_norm": 0.8019795316504426, "learning_rate": 4.904298459042985e-06, "loss": 0.7599, "step": 4016 }, { "epoch": 0.11728124726285363, "grad_norm": 0.7658254808899586, "learning_rate": 4.904136253041363e-06, "loss": 0.7088, "step": 4017 }, { "epoch": 0.11731044349070101, "grad_norm": 0.8124430886485887, "learning_rate": 4.903974047039741e-06, "loss": 0.6457, "step": 4018 }, { "epoch": 0.11733963971854837, "grad_norm": 0.7491809430566201, "learning_rate": 4.903811841038119e-06, "loss": 0.6944, "step": 4019 }, { "epoch": 0.11736883594639573, "grad_norm": 0.7564813940475366, "learning_rate": 4.903649635036497e-06, "loss": 0.7277, "step": 4020 }, { "epoch": 0.11739803217424309, "grad_norm": 0.7830470680922028, "learning_rate": 4.903487429034875e-06, "loss": 0.7798, "step": 4021 }, { "epoch": 0.11742722840209045, "grad_norm": 0.8024800863792069, "learning_rate": 4.903325223033253e-06, "loss": 0.6893, "step": 4022 }, { "epoch": 0.11745642462993781, "grad_norm": 0.8853003988045154, "learning_rate": 4.903163017031631e-06, "loss": 0.7673, "step": 4023 }, { "epoch": 0.11748562085778518, "grad_norm": 0.7947446351968708, "learning_rate": 4.903000811030009e-06, "loss": 0.7034, "step": 4024 }, { "epoch": 0.11751481708563254, "grad_norm": 0.7669152216297158, "learning_rate": 4.902838605028387e-06, "loss": 0.7259, "step": 4025 }, { "epoch": 0.1175440133134799, "grad_norm": 0.8031197357105261, "learning_rate": 4.902676399026765e-06, "loss": 0.6802, "step": 4026 }, { "epoch": 0.11757320954132726, "grad_norm": 1.883096993785619, "learning_rate": 4.902514193025143e-06, "loss": 0.7756, "step": 4027 }, { "epoch": 0.11760240576917462, "grad_norm": 0.7949913296874386, "learning_rate": 4.902351987023521e-06, "loss": 0.7238, "step": 4028 }, { "epoch": 0.11763160199702198, "grad_norm": 0.7731123590374287, "learning_rate": 4.902189781021898e-06, "loss": 0.7014, "step": 4029 }, { "epoch": 0.11766079822486934, "grad_norm": 0.7553895409404943, "learning_rate": 4.902027575020276e-06, "loss": 0.6482, "step": 4030 }, { "epoch": 0.1176899944527167, "grad_norm": 0.7774232976160401, "learning_rate": 4.901865369018654e-06, "loss": 0.7023, "step": 4031 }, { "epoch": 0.11771919068056406, "grad_norm": 0.7769778770755525, "learning_rate": 4.901703163017032e-06, "loss": 0.7224, "step": 4032 }, { "epoch": 0.11774838690841144, "grad_norm": 0.737330638036911, "learning_rate": 4.90154095701541e-06, "loss": 0.6549, "step": 4033 }, { "epoch": 0.1177775831362588, "grad_norm": 0.7707027969706587, "learning_rate": 4.901378751013788e-06, "loss": 0.7581, "step": 4034 }, { "epoch": 0.11780677936410616, "grad_norm": 1.0517831718131934, "learning_rate": 4.901216545012166e-06, "loss": 0.7554, "step": 4035 }, { "epoch": 0.11783597559195352, "grad_norm": 0.9106895460730157, "learning_rate": 4.901054339010544e-06, "loss": 0.723, "step": 4036 }, { "epoch": 0.11786517181980088, "grad_norm": 0.7877911001138678, "learning_rate": 4.900892133008922e-06, "loss": 0.6691, "step": 4037 }, { "epoch": 0.11789436804764825, "grad_norm": 0.8769061522853016, "learning_rate": 4.900729927007299e-06, "loss": 0.8571, "step": 4038 }, { "epoch": 0.11792356427549561, "grad_norm": 0.7576088593492993, "learning_rate": 4.900567721005677e-06, "loss": 0.6958, "step": 4039 }, { "epoch": 0.11795276050334297, "grad_norm": 0.8457590986717194, "learning_rate": 4.900405515004056e-06, "loss": 0.8233, "step": 4040 }, { "epoch": 0.11798195673119033, "grad_norm": 0.7298348506565583, "learning_rate": 4.900243309002434e-06, "loss": 0.6917, "step": 4041 }, { "epoch": 0.11801115295903769, "grad_norm": 0.811241607459665, "learning_rate": 4.900081103000812e-06, "loss": 0.7156, "step": 4042 }, { "epoch": 0.11804034918688505, "grad_norm": 0.8621603360025345, "learning_rate": 4.89991889699919e-06, "loss": 0.7544, "step": 4043 }, { "epoch": 0.11806954541473241, "grad_norm": 0.7889993525732836, "learning_rate": 4.899756690997567e-06, "loss": 0.6633, "step": 4044 }, { "epoch": 0.11809874164257977, "grad_norm": 0.7934568192523498, "learning_rate": 4.899594484995945e-06, "loss": 0.7276, "step": 4045 }, { "epoch": 0.11812793787042714, "grad_norm": 0.7314370150170783, "learning_rate": 4.899432278994323e-06, "loss": 0.663, "step": 4046 }, { "epoch": 0.1181571340982745, "grad_norm": 0.7328366099709612, "learning_rate": 4.899270072992701e-06, "loss": 0.6733, "step": 4047 }, { "epoch": 0.11818633032612187, "grad_norm": 0.8095411790072947, "learning_rate": 4.899107866991079e-06, "loss": 0.7264, "step": 4048 }, { "epoch": 0.11821552655396923, "grad_norm": 0.8503559798366587, "learning_rate": 4.898945660989457e-06, "loss": 0.7733, "step": 4049 }, { "epoch": 0.1182447227818166, "grad_norm": 0.8096194336425683, "learning_rate": 4.898783454987835e-06, "loss": 0.716, "step": 4050 }, { "epoch": 0.11827391900966396, "grad_norm": 0.7810990524486707, "learning_rate": 4.898621248986213e-06, "loss": 0.6605, "step": 4051 }, { "epoch": 0.11830311523751132, "grad_norm": 0.7967959958221106, "learning_rate": 4.898459042984591e-06, "loss": 0.7331, "step": 4052 }, { "epoch": 0.11833231146535868, "grad_norm": 0.7275927836185766, "learning_rate": 4.898296836982968e-06, "loss": 0.6635, "step": 4053 }, { "epoch": 0.11836150769320604, "grad_norm": 0.7734283632791866, "learning_rate": 4.898134630981346e-06, "loss": 0.7399, "step": 4054 }, { "epoch": 0.1183907039210534, "grad_norm": 0.7971621458385535, "learning_rate": 4.897972424979724e-06, "loss": 0.6891, "step": 4055 }, { "epoch": 0.11841990014890076, "grad_norm": 0.7716894488552402, "learning_rate": 4.897810218978102e-06, "loss": 0.7257, "step": 4056 }, { "epoch": 0.11844909637674812, "grad_norm": 0.7965588795107127, "learning_rate": 4.89764801297648e-06, "loss": 0.7178, "step": 4057 }, { "epoch": 0.11847829260459548, "grad_norm": 0.8548792846159503, "learning_rate": 4.897485806974858e-06, "loss": 0.7978, "step": 4058 }, { "epoch": 0.11850748883244284, "grad_norm": 0.796477075015393, "learning_rate": 4.897323600973236e-06, "loss": 0.7666, "step": 4059 }, { "epoch": 0.1185366850602902, "grad_norm": 0.7435097751640983, "learning_rate": 4.897161394971614e-06, "loss": 0.6993, "step": 4060 }, { "epoch": 0.11856588128813757, "grad_norm": 0.8124019827721382, "learning_rate": 4.896999188969992e-06, "loss": 0.7635, "step": 4061 }, { "epoch": 0.11859507751598493, "grad_norm": 0.781321825277677, "learning_rate": 4.89683698296837e-06, "loss": 0.749, "step": 4062 }, { "epoch": 0.1186242737438323, "grad_norm": 0.7233995307815629, "learning_rate": 4.896674776966748e-06, "loss": 0.6039, "step": 4063 }, { "epoch": 0.11865346997167966, "grad_norm": 0.7858749226925018, "learning_rate": 4.896512570965126e-06, "loss": 0.6859, "step": 4064 }, { "epoch": 0.11868266619952703, "grad_norm": 0.8782085800558347, "learning_rate": 4.896350364963504e-06, "loss": 0.7452, "step": 4065 }, { "epoch": 0.11871186242737439, "grad_norm": 0.825215725994614, "learning_rate": 4.896188158961882e-06, "loss": 0.779, "step": 4066 }, { "epoch": 0.11874105865522175, "grad_norm": 0.7153025851551338, "learning_rate": 4.8960259529602596e-06, "loss": 0.6745, "step": 4067 }, { "epoch": 0.11877025488306911, "grad_norm": 0.7847812910960292, "learning_rate": 4.8958637469586376e-06, "loss": 0.7421, "step": 4068 }, { "epoch": 0.11879945111091647, "grad_norm": 0.7733675798609432, "learning_rate": 4.895701540957016e-06, "loss": 0.6846, "step": 4069 }, { "epoch": 0.11882864733876383, "grad_norm": 0.7796656203328437, "learning_rate": 4.895539334955394e-06, "loss": 0.6777, "step": 4070 }, { "epoch": 0.1188578435666112, "grad_norm": 0.7701954803422347, "learning_rate": 4.895377128953772e-06, "loss": 0.7014, "step": 4071 }, { "epoch": 0.11888703979445855, "grad_norm": 0.7887784134266586, "learning_rate": 4.89521492295215e-06, "loss": 0.7091, "step": 4072 }, { "epoch": 0.11891623602230592, "grad_norm": 0.709209381254268, "learning_rate": 4.895052716950528e-06, "loss": 0.6398, "step": 4073 }, { "epoch": 0.11894543225015328, "grad_norm": 0.9251920018730025, "learning_rate": 4.894890510948906e-06, "loss": 0.8322, "step": 4074 }, { "epoch": 0.11897462847800064, "grad_norm": 0.8231433463834336, "learning_rate": 4.894728304947284e-06, "loss": 0.7787, "step": 4075 }, { "epoch": 0.119003824705848, "grad_norm": 0.8080990916638783, "learning_rate": 4.894566098945661e-06, "loss": 0.7597, "step": 4076 }, { "epoch": 0.11903302093369536, "grad_norm": 0.7705998114496703, "learning_rate": 4.894403892944039e-06, "loss": 0.6778, "step": 4077 }, { "epoch": 0.11906221716154274, "grad_norm": 0.7549038033702438, "learning_rate": 4.894241686942418e-06, "loss": 0.68, "step": 4078 }, { "epoch": 0.1190914133893901, "grad_norm": 0.7350310899150696, "learning_rate": 4.894079480940796e-06, "loss": 0.6647, "step": 4079 }, { "epoch": 0.11912060961723746, "grad_norm": 0.810741193483065, "learning_rate": 4.893917274939174e-06, "loss": 0.7665, "step": 4080 }, { "epoch": 0.11914980584508482, "grad_norm": 0.7702355985661066, "learning_rate": 4.893755068937552e-06, "loss": 0.7051, "step": 4081 }, { "epoch": 0.11917900207293218, "grad_norm": 1.2824253003617072, "learning_rate": 4.893592862935929e-06, "loss": 0.7309, "step": 4082 }, { "epoch": 0.11920819830077954, "grad_norm": 0.830958161994564, "learning_rate": 4.893430656934307e-06, "loss": 0.7371, "step": 4083 }, { "epoch": 0.1192373945286269, "grad_norm": 0.7857109183274248, "learning_rate": 4.893268450932685e-06, "loss": 0.7558, "step": 4084 }, { "epoch": 0.11926659075647426, "grad_norm": 0.8022057148037398, "learning_rate": 4.893106244931063e-06, "loss": 0.7547, "step": 4085 }, { "epoch": 0.11929578698432163, "grad_norm": 0.784920794114983, "learning_rate": 4.892944038929441e-06, "loss": 0.6969, "step": 4086 }, { "epoch": 0.11932498321216899, "grad_norm": 0.7706605331264368, "learning_rate": 4.892781832927819e-06, "loss": 0.619, "step": 4087 }, { "epoch": 0.11935417944001635, "grad_norm": 0.8261987606150147, "learning_rate": 4.892619626926197e-06, "loss": 0.7179, "step": 4088 }, { "epoch": 0.11938337566786371, "grad_norm": 0.9721759011595394, "learning_rate": 4.892457420924575e-06, "loss": 0.6517, "step": 4089 }, { "epoch": 0.11941257189571107, "grad_norm": 0.7606401004253915, "learning_rate": 4.892295214922952e-06, "loss": 0.7417, "step": 4090 }, { "epoch": 0.11944176812355843, "grad_norm": 0.7485243265145877, "learning_rate": 4.89213300892133e-06, "loss": 0.7148, "step": 4091 }, { "epoch": 0.11947096435140579, "grad_norm": 0.745522760327425, "learning_rate": 4.891970802919708e-06, "loss": 0.7166, "step": 4092 }, { "epoch": 0.11950016057925317, "grad_norm": 0.7654227370508861, "learning_rate": 4.891808596918086e-06, "loss": 0.7764, "step": 4093 }, { "epoch": 0.11952935680710053, "grad_norm": 0.7550760144791434, "learning_rate": 4.891646390916464e-06, "loss": 0.6984, "step": 4094 }, { "epoch": 0.11955855303494789, "grad_norm": 0.7331386160719647, "learning_rate": 4.891484184914842e-06, "loss": 0.6429, "step": 4095 }, { "epoch": 0.11958774926279525, "grad_norm": 0.7207240439041913, "learning_rate": 4.89132197891322e-06, "loss": 0.6423, "step": 4096 }, { "epoch": 0.11961694549064261, "grad_norm": 0.8032182888161423, "learning_rate": 4.891159772911598e-06, "loss": 0.7497, "step": 4097 }, { "epoch": 0.11964614171848997, "grad_norm": 0.7803199705940753, "learning_rate": 4.890997566909976e-06, "loss": 0.7089, "step": 4098 }, { "epoch": 0.11967533794633733, "grad_norm": 0.9033038642539196, "learning_rate": 4.890835360908354e-06, "loss": 0.741, "step": 4099 }, { "epoch": 0.1197045341741847, "grad_norm": 0.7490340168280198, "learning_rate": 4.890673154906732e-06, "loss": 0.6467, "step": 4100 }, { "epoch": 0.11973373040203206, "grad_norm": 0.6994791135106311, "learning_rate": 4.89051094890511e-06, "loss": 0.6364, "step": 4101 }, { "epoch": 0.11976292662987942, "grad_norm": 0.7760423834618689, "learning_rate": 4.890348742903488e-06, "loss": 0.7558, "step": 4102 }, { "epoch": 0.11979212285772678, "grad_norm": 0.7962221069486618, "learning_rate": 4.890186536901866e-06, "loss": 0.8449, "step": 4103 }, { "epoch": 0.11982131908557414, "grad_norm": 0.8143789501533909, "learning_rate": 4.890024330900244e-06, "loss": 0.7861, "step": 4104 }, { "epoch": 0.1198505153134215, "grad_norm": 0.7974038000380382, "learning_rate": 4.889862124898621e-06, "loss": 0.7974, "step": 4105 }, { "epoch": 0.11987971154126886, "grad_norm": 0.7316154938942867, "learning_rate": 4.889699918896999e-06, "loss": 0.6102, "step": 4106 }, { "epoch": 0.11990890776911622, "grad_norm": 0.838047744927454, "learning_rate": 4.889537712895377e-06, "loss": 0.7429, "step": 4107 }, { "epoch": 0.11993810399696359, "grad_norm": 0.728395808593125, "learning_rate": 4.889375506893755e-06, "loss": 0.6931, "step": 4108 }, { "epoch": 0.11996730022481096, "grad_norm": 0.7924421984186689, "learning_rate": 4.889213300892133e-06, "loss": 0.7347, "step": 4109 }, { "epoch": 0.11999649645265832, "grad_norm": 0.8687443779988316, "learning_rate": 4.889051094890511e-06, "loss": 0.8067, "step": 4110 }, { "epoch": 0.12002569268050568, "grad_norm": 1.3365205989810738, "learning_rate": 4.888888888888889e-06, "loss": 0.7375, "step": 4111 }, { "epoch": 0.12005488890835304, "grad_norm": 0.8014460727812593, "learning_rate": 4.888726682887267e-06, "loss": 0.7763, "step": 4112 }, { "epoch": 0.1200840851362004, "grad_norm": 0.7395267393487445, "learning_rate": 4.888564476885645e-06, "loss": 0.6848, "step": 4113 }, { "epoch": 0.12011328136404777, "grad_norm": 0.7675238844365386, "learning_rate": 4.8884022708840225e-06, "loss": 0.718, "step": 4114 }, { "epoch": 0.12014247759189513, "grad_norm": 0.9512897835045546, "learning_rate": 4.888240064882401e-06, "loss": 0.7519, "step": 4115 }, { "epoch": 0.12017167381974249, "grad_norm": 0.7389348761234072, "learning_rate": 4.888077858880779e-06, "loss": 0.5951, "step": 4116 }, { "epoch": 0.12020087004758985, "grad_norm": 0.8606614228089567, "learning_rate": 4.887915652879157e-06, "loss": 0.7418, "step": 4117 }, { "epoch": 0.12023006627543721, "grad_norm": 0.7992751497232314, "learning_rate": 4.887753446877535e-06, "loss": 0.7559, "step": 4118 }, { "epoch": 0.12025926250328457, "grad_norm": 0.8285843114701538, "learning_rate": 4.887591240875913e-06, "loss": 0.7838, "step": 4119 }, { "epoch": 0.12028845873113193, "grad_norm": 0.8107103875859578, "learning_rate": 4.8874290348742905e-06, "loss": 0.7357, "step": 4120 }, { "epoch": 0.1203176549589793, "grad_norm": 0.7884291448849946, "learning_rate": 4.8872668288726685e-06, "loss": 0.6919, "step": 4121 }, { "epoch": 0.12034685118682666, "grad_norm": 0.7028919454350504, "learning_rate": 4.8871046228710466e-06, "loss": 0.6683, "step": 4122 }, { "epoch": 0.12037604741467402, "grad_norm": 0.7635252638649204, "learning_rate": 4.8869424168694246e-06, "loss": 0.6716, "step": 4123 }, { "epoch": 0.12040524364252139, "grad_norm": 0.7743032101113342, "learning_rate": 4.8867802108678026e-06, "loss": 0.7643, "step": 4124 }, { "epoch": 0.12043443987036875, "grad_norm": 0.7926092925785506, "learning_rate": 4.8866180048661806e-06, "loss": 0.7694, "step": 4125 }, { "epoch": 0.12046363609821611, "grad_norm": 0.7543144406227134, "learning_rate": 4.886455798864559e-06, "loss": 0.6909, "step": 4126 }, { "epoch": 0.12049283232606348, "grad_norm": 0.7957281263173127, "learning_rate": 4.886293592862937e-06, "loss": 0.7309, "step": 4127 }, { "epoch": 0.12052202855391084, "grad_norm": 0.7678121752151221, "learning_rate": 4.886131386861314e-06, "loss": 0.6883, "step": 4128 }, { "epoch": 0.1205512247817582, "grad_norm": 0.7254522431618126, "learning_rate": 4.885969180859692e-06, "loss": 0.6811, "step": 4129 }, { "epoch": 0.12058042100960556, "grad_norm": 0.8016549713499846, "learning_rate": 4.88580697485807e-06, "loss": 0.6868, "step": 4130 }, { "epoch": 0.12060961723745292, "grad_norm": 0.7819942044647898, "learning_rate": 4.885644768856448e-06, "loss": 0.7405, "step": 4131 }, { "epoch": 0.12063881346530028, "grad_norm": 0.8739481245826873, "learning_rate": 4.885482562854826e-06, "loss": 0.7282, "step": 4132 }, { "epoch": 0.12066800969314764, "grad_norm": 0.8171516313881267, "learning_rate": 4.885320356853204e-06, "loss": 0.7026, "step": 4133 }, { "epoch": 0.120697205920995, "grad_norm": 0.7307539285036639, "learning_rate": 4.885158150851582e-06, "loss": 0.6303, "step": 4134 }, { "epoch": 0.12072640214884237, "grad_norm": 0.8139752531625467, "learning_rate": 4.88499594484996e-06, "loss": 0.7385, "step": 4135 }, { "epoch": 0.12075559837668973, "grad_norm": 0.7420050888274101, "learning_rate": 4.884833738848338e-06, "loss": 0.6787, "step": 4136 }, { "epoch": 0.12078479460453709, "grad_norm": 0.7887075973822322, "learning_rate": 4.884671532846716e-06, "loss": 0.7154, "step": 4137 }, { "epoch": 0.12081399083238445, "grad_norm": 0.7214519580641688, "learning_rate": 4.884509326845094e-06, "loss": 0.6273, "step": 4138 }, { "epoch": 0.12084318706023182, "grad_norm": 0.776615463509072, "learning_rate": 4.884347120843472e-06, "loss": 0.7132, "step": 4139 }, { "epoch": 0.12087238328807919, "grad_norm": 0.8389438380440419, "learning_rate": 4.88418491484185e-06, "loss": 0.8215, "step": 4140 }, { "epoch": 0.12090157951592655, "grad_norm": 0.8885741605719808, "learning_rate": 4.884022708840228e-06, "loss": 0.7515, "step": 4141 }, { "epoch": 0.12093077574377391, "grad_norm": 0.8212398706390077, "learning_rate": 4.883860502838606e-06, "loss": 0.6637, "step": 4142 }, { "epoch": 0.12095997197162127, "grad_norm": 0.7956099099095049, "learning_rate": 4.883698296836983e-06, "loss": 0.7191, "step": 4143 }, { "epoch": 0.12098916819946863, "grad_norm": 0.7005376321531744, "learning_rate": 4.883536090835361e-06, "loss": 0.6219, "step": 4144 }, { "epoch": 0.12101836442731599, "grad_norm": 0.7969687452042004, "learning_rate": 4.883373884833739e-06, "loss": 0.6769, "step": 4145 }, { "epoch": 0.12104756065516335, "grad_norm": 0.8604736974548576, "learning_rate": 4.883211678832117e-06, "loss": 0.74, "step": 4146 }, { "epoch": 0.12107675688301071, "grad_norm": 0.7691880368356544, "learning_rate": 4.883049472830495e-06, "loss": 0.7291, "step": 4147 }, { "epoch": 0.12110595311085808, "grad_norm": 0.749113881145696, "learning_rate": 4.882887266828873e-06, "loss": 0.6697, "step": 4148 }, { "epoch": 0.12113514933870544, "grad_norm": 0.7722144008997519, "learning_rate": 4.882725060827251e-06, "loss": 0.7559, "step": 4149 }, { "epoch": 0.1211643455665528, "grad_norm": 0.8194488434443211, "learning_rate": 4.882562854825629e-06, "loss": 0.6623, "step": 4150 }, { "epoch": 0.12119354179440016, "grad_norm": 0.6690864265321191, "learning_rate": 4.882400648824007e-06, "loss": 0.5898, "step": 4151 }, { "epoch": 0.12122273802224752, "grad_norm": 0.8092603404338365, "learning_rate": 4.882238442822384e-06, "loss": 0.7373, "step": 4152 }, { "epoch": 0.12125193425009488, "grad_norm": 0.7772195681799748, "learning_rate": 4.882076236820763e-06, "loss": 0.7072, "step": 4153 }, { "epoch": 0.12128113047794226, "grad_norm": 0.7245342124715503, "learning_rate": 4.881914030819141e-06, "loss": 0.6746, "step": 4154 }, { "epoch": 0.12131032670578962, "grad_norm": 0.7324610373315907, "learning_rate": 4.881751824817519e-06, "loss": 0.6535, "step": 4155 }, { "epoch": 0.12133952293363698, "grad_norm": 0.9151504836324686, "learning_rate": 4.881589618815897e-06, "loss": 0.8119, "step": 4156 }, { "epoch": 0.12136871916148434, "grad_norm": 0.8373754638582465, "learning_rate": 4.881427412814275e-06, "loss": 0.8045, "step": 4157 }, { "epoch": 0.1213979153893317, "grad_norm": 1.2028970139010242, "learning_rate": 4.881265206812652e-06, "loss": 0.8528, "step": 4158 }, { "epoch": 0.12142711161717906, "grad_norm": 0.7874546055854191, "learning_rate": 4.88110300081103e-06, "loss": 0.7785, "step": 4159 }, { "epoch": 0.12145630784502642, "grad_norm": 0.826975071779429, "learning_rate": 4.880940794809408e-06, "loss": 0.7737, "step": 4160 }, { "epoch": 0.12148550407287378, "grad_norm": 0.7906579599878633, "learning_rate": 4.880778588807786e-06, "loss": 0.8037, "step": 4161 }, { "epoch": 0.12151470030072115, "grad_norm": 0.7915648451867066, "learning_rate": 4.880616382806164e-06, "loss": 0.6816, "step": 4162 }, { "epoch": 0.1215438965285685, "grad_norm": 0.8038899038673539, "learning_rate": 4.880454176804542e-06, "loss": 0.7041, "step": 4163 }, { "epoch": 0.12157309275641587, "grad_norm": 1.1071634918655298, "learning_rate": 4.88029197080292e-06, "loss": 0.8579, "step": 4164 }, { "epoch": 0.12160228898426323, "grad_norm": 0.7221148962398254, "learning_rate": 4.880129764801298e-06, "loss": 0.6798, "step": 4165 }, { "epoch": 0.12163148521211059, "grad_norm": 0.7888521900999593, "learning_rate": 4.8799675587996755e-06, "loss": 0.7131, "step": 4166 }, { "epoch": 0.12166068143995795, "grad_norm": 0.9147345919287728, "learning_rate": 4.8798053527980535e-06, "loss": 0.7842, "step": 4167 }, { "epoch": 0.12168987766780531, "grad_norm": 0.8083087697138746, "learning_rate": 4.8796431467964315e-06, "loss": 0.6691, "step": 4168 }, { "epoch": 0.12171907389565269, "grad_norm": 0.7201579938203259, "learning_rate": 4.8794809407948095e-06, "loss": 0.6582, "step": 4169 }, { "epoch": 0.12174827012350005, "grad_norm": 0.8099806629917645, "learning_rate": 4.8793187347931875e-06, "loss": 0.8022, "step": 4170 }, { "epoch": 0.12177746635134741, "grad_norm": 0.7180260491299982, "learning_rate": 4.8791565287915655e-06, "loss": 0.689, "step": 4171 }, { "epoch": 0.12180666257919477, "grad_norm": 0.9557329589535869, "learning_rate": 4.8789943227899435e-06, "loss": 0.7485, "step": 4172 }, { "epoch": 0.12183585880704213, "grad_norm": 0.7682074793934784, "learning_rate": 4.8788321167883215e-06, "loss": 0.7209, "step": 4173 }, { "epoch": 0.1218650550348895, "grad_norm": 0.8047831138817693, "learning_rate": 4.8786699107866995e-06, "loss": 0.7256, "step": 4174 }, { "epoch": 0.12189425126273686, "grad_norm": 0.8622683917795001, "learning_rate": 4.8785077047850775e-06, "loss": 0.7589, "step": 4175 }, { "epoch": 0.12192344749058422, "grad_norm": 1.140581434353281, "learning_rate": 4.8783454987834555e-06, "loss": 0.7988, "step": 4176 }, { "epoch": 0.12195264371843158, "grad_norm": 0.7449489228284689, "learning_rate": 4.8781832927818335e-06, "loss": 0.6514, "step": 4177 }, { "epoch": 0.12198183994627894, "grad_norm": 0.7699514137664674, "learning_rate": 4.8780210867802115e-06, "loss": 0.7004, "step": 4178 }, { "epoch": 0.1220110361741263, "grad_norm": 0.8200812840293846, "learning_rate": 4.8778588807785896e-06, "loss": 0.6985, "step": 4179 }, { "epoch": 0.12204023240197366, "grad_norm": 0.7499463359779016, "learning_rate": 4.8776966747769676e-06, "loss": 0.6411, "step": 4180 }, { "epoch": 0.12206942862982102, "grad_norm": 0.7684562220473894, "learning_rate": 4.877534468775345e-06, "loss": 0.6715, "step": 4181 }, { "epoch": 0.12209862485766838, "grad_norm": 0.9256181633964863, "learning_rate": 4.877372262773723e-06, "loss": 0.7137, "step": 4182 }, { "epoch": 0.12212782108551574, "grad_norm": 0.7637783415421523, "learning_rate": 4.877210056772101e-06, "loss": 0.6658, "step": 4183 }, { "epoch": 0.12215701731336312, "grad_norm": 0.7497878423616361, "learning_rate": 4.877047850770479e-06, "loss": 0.6759, "step": 4184 }, { "epoch": 0.12218621354121048, "grad_norm": 0.7268439127023464, "learning_rate": 4.876885644768857e-06, "loss": 0.7078, "step": 4185 }, { "epoch": 0.12221540976905784, "grad_norm": 0.7755487964486951, "learning_rate": 4.876723438767235e-06, "loss": 0.6532, "step": 4186 }, { "epoch": 0.1222446059969052, "grad_norm": 0.7851262024966952, "learning_rate": 4.876561232765613e-06, "loss": 0.7722, "step": 4187 }, { "epoch": 0.12227380222475256, "grad_norm": 0.8000759887737029, "learning_rate": 4.876399026763991e-06, "loss": 0.8252, "step": 4188 }, { "epoch": 0.12230299845259993, "grad_norm": 0.7093118970722638, "learning_rate": 4.876236820762369e-06, "loss": 0.5975, "step": 4189 }, { "epoch": 0.12233219468044729, "grad_norm": 0.7119626936510076, "learning_rate": 4.876074614760746e-06, "loss": 0.6524, "step": 4190 }, { "epoch": 0.12236139090829465, "grad_norm": 0.7943662111251248, "learning_rate": 4.875912408759125e-06, "loss": 0.7892, "step": 4191 }, { "epoch": 0.12239058713614201, "grad_norm": 0.7551921194375836, "learning_rate": 4.875750202757503e-06, "loss": 0.708, "step": 4192 }, { "epoch": 0.12241978336398937, "grad_norm": 0.8075939304125578, "learning_rate": 4.875587996755881e-06, "loss": 0.799, "step": 4193 }, { "epoch": 0.12244897959183673, "grad_norm": 0.7773792148907729, "learning_rate": 4.875425790754259e-06, "loss": 0.701, "step": 4194 }, { "epoch": 0.1224781758196841, "grad_norm": 0.7930070603059253, "learning_rate": 4.875263584752636e-06, "loss": 0.7253, "step": 4195 }, { "epoch": 0.12250737204753145, "grad_norm": 0.8778018998643505, "learning_rate": 4.875101378751014e-06, "loss": 0.7706, "step": 4196 }, { "epoch": 0.12253656827537882, "grad_norm": 0.8095935939701827, "learning_rate": 4.874939172749392e-06, "loss": 0.6826, "step": 4197 }, { "epoch": 0.12256576450322618, "grad_norm": 0.8042456780096543, "learning_rate": 4.87477696674777e-06, "loss": 0.7491, "step": 4198 }, { "epoch": 0.12259496073107355, "grad_norm": 0.8525309651696104, "learning_rate": 4.874614760746148e-06, "loss": 0.7462, "step": 4199 }, { "epoch": 0.12262415695892091, "grad_norm": 0.6722524220232797, "learning_rate": 4.874452554744526e-06, "loss": 0.573, "step": 4200 }, { "epoch": 0.12265335318676827, "grad_norm": 0.7498239765906018, "learning_rate": 4.874290348742904e-06, "loss": 0.7077, "step": 4201 }, { "epoch": 0.12268254941461564, "grad_norm": 0.762355491760465, "learning_rate": 4.874128142741282e-06, "loss": 0.6853, "step": 4202 }, { "epoch": 0.122711745642463, "grad_norm": 0.7362574507510045, "learning_rate": 4.87396593673966e-06, "loss": 0.6365, "step": 4203 }, { "epoch": 0.12274094187031036, "grad_norm": 0.7719389904457785, "learning_rate": 4.873803730738037e-06, "loss": 0.6705, "step": 4204 }, { "epoch": 0.12277013809815772, "grad_norm": 0.8997410428035201, "learning_rate": 4.873641524736415e-06, "loss": 0.7838, "step": 4205 }, { "epoch": 0.12279933432600508, "grad_norm": 0.8287434463299649, "learning_rate": 4.873479318734793e-06, "loss": 0.7329, "step": 4206 }, { "epoch": 0.12282853055385244, "grad_norm": 10.952312103713878, "learning_rate": 4.873317112733171e-06, "loss": 0.8253, "step": 4207 }, { "epoch": 0.1228577267816998, "grad_norm": 0.7507452359742939, "learning_rate": 4.873154906731549e-06, "loss": 0.6897, "step": 4208 }, { "epoch": 0.12288692300954716, "grad_norm": 0.7536365204483317, "learning_rate": 4.872992700729927e-06, "loss": 0.7372, "step": 4209 }, { "epoch": 0.12291611923739452, "grad_norm": 0.7720123685685315, "learning_rate": 4.872830494728305e-06, "loss": 0.6815, "step": 4210 }, { "epoch": 0.12294531546524189, "grad_norm": 0.7334681581101796, "learning_rate": 4.872668288726683e-06, "loss": 0.6574, "step": 4211 }, { "epoch": 0.12297451169308925, "grad_norm": 0.7611199711437903, "learning_rate": 4.872506082725061e-06, "loss": 0.7056, "step": 4212 }, { "epoch": 0.12300370792093661, "grad_norm": 0.8538709397111371, "learning_rate": 4.872343876723439e-06, "loss": 0.6908, "step": 4213 }, { "epoch": 0.12303290414878398, "grad_norm": 0.7007501144249825, "learning_rate": 4.872181670721817e-06, "loss": 0.6724, "step": 4214 }, { "epoch": 0.12306210037663134, "grad_norm": 0.8244773211073774, "learning_rate": 4.872019464720195e-06, "loss": 0.7899, "step": 4215 }, { "epoch": 0.1230912966044787, "grad_norm": 0.6993818601622387, "learning_rate": 4.871857258718573e-06, "loss": 0.7038, "step": 4216 }, { "epoch": 0.12312049283232607, "grad_norm": 0.780597767189265, "learning_rate": 4.871695052716951e-06, "loss": 0.7301, "step": 4217 }, { "epoch": 0.12314968906017343, "grad_norm": 0.8347744490880253, "learning_rate": 4.871532846715329e-06, "loss": 0.8068, "step": 4218 }, { "epoch": 0.12317888528802079, "grad_norm": 0.827283649950857, "learning_rate": 4.8713706407137064e-06, "loss": 0.7451, "step": 4219 }, { "epoch": 0.12320808151586815, "grad_norm": 0.800214456651011, "learning_rate": 4.8712084347120844e-06, "loss": 0.6874, "step": 4220 }, { "epoch": 0.12323727774371551, "grad_norm": 0.7491728241270424, "learning_rate": 4.8710462287104625e-06, "loss": 0.6722, "step": 4221 }, { "epoch": 0.12326647397156287, "grad_norm": 0.8640272921374631, "learning_rate": 4.8708840227088405e-06, "loss": 0.7396, "step": 4222 }, { "epoch": 0.12329567019941023, "grad_norm": 0.7191363789725802, "learning_rate": 4.8707218167072185e-06, "loss": 0.6099, "step": 4223 }, { "epoch": 0.1233248664272576, "grad_norm": 0.8052394185570467, "learning_rate": 4.8705596107055965e-06, "loss": 0.7071, "step": 4224 }, { "epoch": 0.12335406265510496, "grad_norm": 0.8122312299609964, "learning_rate": 4.8703974047039745e-06, "loss": 0.7488, "step": 4225 }, { "epoch": 0.12338325888295232, "grad_norm": 0.799635522340617, "learning_rate": 4.8702351987023525e-06, "loss": 0.6866, "step": 4226 }, { "epoch": 0.12341245511079968, "grad_norm": 0.7491512944337898, "learning_rate": 4.8700729927007305e-06, "loss": 0.7157, "step": 4227 }, { "epoch": 0.12344165133864704, "grad_norm": 0.7313873181729659, "learning_rate": 4.869910786699108e-06, "loss": 0.6681, "step": 4228 }, { "epoch": 0.12347084756649442, "grad_norm": 0.7380008319472715, "learning_rate": 4.8697485806974865e-06, "loss": 0.6566, "step": 4229 }, { "epoch": 0.12350004379434178, "grad_norm": 0.7818477394207616, "learning_rate": 4.8695863746958645e-06, "loss": 0.7216, "step": 4230 }, { "epoch": 0.12352924002218914, "grad_norm": 0.7128220519696903, "learning_rate": 4.8694241686942425e-06, "loss": 0.6125, "step": 4231 }, { "epoch": 0.1235584362500365, "grad_norm": 0.7230577960347576, "learning_rate": 4.8692619626926205e-06, "loss": 0.6223, "step": 4232 }, { "epoch": 0.12358763247788386, "grad_norm": 0.8977482408247142, "learning_rate": 4.869099756690998e-06, "loss": 0.7837, "step": 4233 }, { "epoch": 0.12361682870573122, "grad_norm": 0.766993638608617, "learning_rate": 4.868937550689376e-06, "loss": 0.7387, "step": 4234 }, { "epoch": 0.12364602493357858, "grad_norm": 0.7580949244359783, "learning_rate": 4.868775344687754e-06, "loss": 0.6806, "step": 4235 }, { "epoch": 0.12367522116142594, "grad_norm": 0.7794355029040376, "learning_rate": 4.868613138686132e-06, "loss": 0.7044, "step": 4236 }, { "epoch": 0.1237044173892733, "grad_norm": 0.7973692494014791, "learning_rate": 4.86845093268451e-06, "loss": 0.6309, "step": 4237 }, { "epoch": 0.12373361361712067, "grad_norm": 0.796412352677773, "learning_rate": 4.868288726682888e-06, "loss": 0.7898, "step": 4238 }, { "epoch": 0.12376280984496803, "grad_norm": 0.7614913128816736, "learning_rate": 4.868126520681266e-06, "loss": 0.7144, "step": 4239 }, { "epoch": 0.12379200607281539, "grad_norm": 0.7758458801189759, "learning_rate": 4.867964314679644e-06, "loss": 0.7599, "step": 4240 }, { "epoch": 0.12382120230066275, "grad_norm": 0.7161881097625337, "learning_rate": 4.867802108678022e-06, "loss": 0.6369, "step": 4241 }, { "epoch": 0.12385039852851011, "grad_norm": 0.7969043657561707, "learning_rate": 4.867639902676399e-06, "loss": 0.8026, "step": 4242 }, { "epoch": 0.12387959475635747, "grad_norm": 0.7667072106231485, "learning_rate": 4.867477696674777e-06, "loss": 0.7168, "step": 4243 }, { "epoch": 0.12390879098420485, "grad_norm": 0.8370117227208527, "learning_rate": 4.867315490673155e-06, "loss": 0.7706, "step": 4244 }, { "epoch": 0.12393798721205221, "grad_norm": 0.7709206807037011, "learning_rate": 4.867153284671533e-06, "loss": 0.7013, "step": 4245 }, { "epoch": 0.12396718343989957, "grad_norm": 0.7875429611805488, "learning_rate": 4.866991078669911e-06, "loss": 0.7067, "step": 4246 }, { "epoch": 0.12399637966774693, "grad_norm": 0.70783874546361, "learning_rate": 4.866828872668289e-06, "loss": 0.6949, "step": 4247 }, { "epoch": 0.12402557589559429, "grad_norm": 0.7092359536566071, "learning_rate": 4.866666666666667e-06, "loss": 0.6291, "step": 4248 }, { "epoch": 0.12405477212344165, "grad_norm": 0.7839156150268153, "learning_rate": 4.866504460665045e-06, "loss": 0.685, "step": 4249 }, { "epoch": 0.12408396835128901, "grad_norm": 0.8398327000474978, "learning_rate": 4.866342254663423e-06, "loss": 0.6218, "step": 4250 }, { "epoch": 0.12411316457913638, "grad_norm": 0.7252599499180045, "learning_rate": 4.866180048661801e-06, "loss": 0.673, "step": 4251 }, { "epoch": 0.12414236080698374, "grad_norm": 0.7631414532536341, "learning_rate": 4.866017842660179e-06, "loss": 0.714, "step": 4252 }, { "epoch": 0.1241715570348311, "grad_norm": 0.8276664253266802, "learning_rate": 4.865855636658557e-06, "loss": 0.7588, "step": 4253 }, { "epoch": 0.12420075326267846, "grad_norm": 0.9696655336066906, "learning_rate": 4.865693430656935e-06, "loss": 0.7335, "step": 4254 }, { "epoch": 0.12422994949052582, "grad_norm": 0.7566735937258134, "learning_rate": 4.865531224655313e-06, "loss": 0.7303, "step": 4255 }, { "epoch": 0.12425914571837318, "grad_norm": 0.8152211557235022, "learning_rate": 4.865369018653691e-06, "loss": 0.7355, "step": 4256 }, { "epoch": 0.12428834194622054, "grad_norm": 0.8655355838073236, "learning_rate": 4.865206812652068e-06, "loss": 0.686, "step": 4257 }, { "epoch": 0.1243175381740679, "grad_norm": 0.7369613236367469, "learning_rate": 4.865044606650446e-06, "loss": 0.6344, "step": 4258 }, { "epoch": 0.12434673440191528, "grad_norm": 0.7910294985094509, "learning_rate": 4.864882400648824e-06, "loss": 0.7934, "step": 4259 }, { "epoch": 0.12437593062976264, "grad_norm": 0.8598633874706947, "learning_rate": 4.864720194647202e-06, "loss": 0.8404, "step": 4260 }, { "epoch": 0.12440512685761, "grad_norm": 0.7380647078786933, "learning_rate": 4.86455798864558e-06, "loss": 0.6685, "step": 4261 }, { "epoch": 0.12443432308545736, "grad_norm": 0.8733619288913622, "learning_rate": 4.864395782643958e-06, "loss": 0.6858, "step": 4262 }, { "epoch": 0.12446351931330472, "grad_norm": 0.7514180775484848, "learning_rate": 4.864233576642336e-06, "loss": 0.7021, "step": 4263 }, { "epoch": 0.12449271554115209, "grad_norm": 0.8365359338536007, "learning_rate": 4.864071370640714e-06, "loss": 0.7091, "step": 4264 }, { "epoch": 0.12452191176899945, "grad_norm": 0.8082993360802188, "learning_rate": 4.863909164639092e-06, "loss": 0.7799, "step": 4265 }, { "epoch": 0.12455110799684681, "grad_norm": 0.7417502636009006, "learning_rate": 4.86374695863747e-06, "loss": 0.6092, "step": 4266 }, { "epoch": 0.12458030422469417, "grad_norm": 0.7065691634540713, "learning_rate": 4.863584752635848e-06, "loss": 0.5837, "step": 4267 }, { "epoch": 0.12460950045254153, "grad_norm": 0.6917817683402167, "learning_rate": 4.863422546634226e-06, "loss": 0.5925, "step": 4268 }, { "epoch": 0.12463869668038889, "grad_norm": 0.7839404290707628, "learning_rate": 4.863260340632604e-06, "loss": 0.7139, "step": 4269 }, { "epoch": 0.12466789290823625, "grad_norm": 0.7326022320606093, "learning_rate": 4.863098134630982e-06, "loss": 0.642, "step": 4270 }, { "epoch": 0.12469708913608361, "grad_norm": 0.9274490013225213, "learning_rate": 4.862935928629359e-06, "loss": 0.772, "step": 4271 }, { "epoch": 0.12472628536393097, "grad_norm": 1.1434111763445456, "learning_rate": 4.862773722627737e-06, "loss": 0.6775, "step": 4272 }, { "epoch": 0.12475548159177834, "grad_norm": 0.7295584638377453, "learning_rate": 4.862611516626115e-06, "loss": 0.6304, "step": 4273 }, { "epoch": 0.12478467781962571, "grad_norm": 0.7522726297530046, "learning_rate": 4.862449310624493e-06, "loss": 0.6938, "step": 4274 }, { "epoch": 0.12481387404747307, "grad_norm": 0.7903259766210773, "learning_rate": 4.8622871046228714e-06, "loss": 0.7054, "step": 4275 }, { "epoch": 0.12484307027532043, "grad_norm": 0.8218392060850029, "learning_rate": 4.8621248986212494e-06, "loss": 0.879, "step": 4276 }, { "epoch": 0.1248722665031678, "grad_norm": 0.7732490253770603, "learning_rate": 4.8619626926196274e-06, "loss": 0.7761, "step": 4277 }, { "epoch": 0.12490146273101516, "grad_norm": 1.0077986009707705, "learning_rate": 4.8618004866180055e-06, "loss": 0.7149, "step": 4278 }, { "epoch": 0.12493065895886252, "grad_norm": 0.7257356146619417, "learning_rate": 4.8616382806163835e-06, "loss": 0.6758, "step": 4279 }, { "epoch": 0.12495985518670988, "grad_norm": 0.8198220619932234, "learning_rate": 4.861476074614761e-06, "loss": 0.7506, "step": 4280 }, { "epoch": 0.12498905141455724, "grad_norm": 0.7794904074381093, "learning_rate": 4.861313868613139e-06, "loss": 0.6522, "step": 4281 }, { "epoch": 0.1250182476424046, "grad_norm": 0.9411551911707038, "learning_rate": 4.861151662611517e-06, "loss": 0.7705, "step": 4282 }, { "epoch": 0.12504744387025196, "grad_norm": 0.7741980366096245, "learning_rate": 4.860989456609895e-06, "loss": 0.7164, "step": 4283 }, { "epoch": 0.12507664009809932, "grad_norm": 0.8050009894359631, "learning_rate": 4.860827250608273e-06, "loss": 0.7234, "step": 4284 }, { "epoch": 0.12510583632594668, "grad_norm": 0.7934043782037008, "learning_rate": 4.8606650446066515e-06, "loss": 0.7345, "step": 4285 }, { "epoch": 0.12513503255379405, "grad_norm": 0.7665276402005761, "learning_rate": 4.860502838605029e-06, "loss": 0.6438, "step": 4286 }, { "epoch": 0.1251642287816414, "grad_norm": 0.792975515184483, "learning_rate": 4.860340632603407e-06, "loss": 0.6709, "step": 4287 }, { "epoch": 0.12519342500948877, "grad_norm": 0.7336871482703972, "learning_rate": 4.860178426601785e-06, "loss": 0.6627, "step": 4288 }, { "epoch": 0.12522262123733613, "grad_norm": 0.7711257386337699, "learning_rate": 4.860016220600163e-06, "loss": 0.7557, "step": 4289 }, { "epoch": 0.1252518174651835, "grad_norm": 0.8380241868809954, "learning_rate": 4.859854014598541e-06, "loss": 0.7794, "step": 4290 }, { "epoch": 0.12528101369303085, "grad_norm": 0.765403255583107, "learning_rate": 4.859691808596919e-06, "loss": 0.6654, "step": 4291 }, { "epoch": 0.1253102099208782, "grad_norm": 0.841849800714977, "learning_rate": 4.859529602595297e-06, "loss": 0.7205, "step": 4292 }, { "epoch": 0.12533940614872557, "grad_norm": 0.8158960684795429, "learning_rate": 4.859367396593675e-06, "loss": 0.8304, "step": 4293 }, { "epoch": 0.12536860237657294, "grad_norm": 0.757001808006053, "learning_rate": 4.859205190592053e-06, "loss": 0.6811, "step": 4294 }, { "epoch": 0.1253977986044203, "grad_norm": 0.7184557658530069, "learning_rate": 4.85904298459043e-06, "loss": 0.6042, "step": 4295 }, { "epoch": 0.12542699483226766, "grad_norm": 0.8281940507670765, "learning_rate": 4.858880778588808e-06, "loss": 0.8068, "step": 4296 }, { "epoch": 0.12545619106011505, "grad_norm": 0.7212548459913408, "learning_rate": 4.858718572587186e-06, "loss": 0.6711, "step": 4297 }, { "epoch": 0.1254853872879624, "grad_norm": 0.7973853544876386, "learning_rate": 4.858556366585564e-06, "loss": 0.7296, "step": 4298 }, { "epoch": 0.12551458351580977, "grad_norm": 0.728338243506623, "learning_rate": 4.858394160583942e-06, "loss": 0.6618, "step": 4299 }, { "epoch": 0.12554377974365713, "grad_norm": 1.0448791823080628, "learning_rate": 4.85823195458232e-06, "loss": 0.7917, "step": 4300 }, { "epoch": 0.1255729759715045, "grad_norm": 0.6962243022861507, "learning_rate": 4.858069748580698e-06, "loss": 0.6243, "step": 4301 }, { "epoch": 0.12560217219935185, "grad_norm": 0.7931372824981507, "learning_rate": 4.857907542579076e-06, "loss": 0.7355, "step": 4302 }, { "epoch": 0.1256313684271992, "grad_norm": 0.7106666649032422, "learning_rate": 4.857745336577454e-06, "loss": 0.6978, "step": 4303 }, { "epoch": 0.12566056465504657, "grad_norm": 0.7949567021480699, "learning_rate": 4.857583130575832e-06, "loss": 0.7008, "step": 4304 }, { "epoch": 0.12568976088289394, "grad_norm": 0.8333111632837883, "learning_rate": 4.85742092457421e-06, "loss": 0.7909, "step": 4305 }, { "epoch": 0.1257189571107413, "grad_norm": 0.8786384774926074, "learning_rate": 4.857258718572588e-06, "loss": 0.8654, "step": 4306 }, { "epoch": 0.12574815333858866, "grad_norm": 0.7683543981974391, "learning_rate": 4.857096512570966e-06, "loss": 0.709, "step": 4307 }, { "epoch": 0.12577734956643602, "grad_norm": 0.9082692364515814, "learning_rate": 4.856934306569344e-06, "loss": 0.732, "step": 4308 }, { "epoch": 0.12580654579428338, "grad_norm": 0.7490433547094263, "learning_rate": 4.856772100567721e-06, "loss": 0.6764, "step": 4309 }, { "epoch": 0.12583574202213074, "grad_norm": 0.7678984518254062, "learning_rate": 4.856609894566099e-06, "loss": 0.7304, "step": 4310 }, { "epoch": 0.1258649382499781, "grad_norm": 1.0321825278989827, "learning_rate": 4.856447688564477e-06, "loss": 0.6385, "step": 4311 }, { "epoch": 0.12589413447782546, "grad_norm": 0.7307946634215393, "learning_rate": 4.856285482562855e-06, "loss": 0.6325, "step": 4312 }, { "epoch": 0.12592333070567283, "grad_norm": 0.829931082710092, "learning_rate": 4.856123276561233e-06, "loss": 0.7216, "step": 4313 }, { "epoch": 0.1259525269335202, "grad_norm": 0.7242719410864212, "learning_rate": 4.855961070559611e-06, "loss": 0.7085, "step": 4314 }, { "epoch": 0.12598172316136755, "grad_norm": 0.7798025655002768, "learning_rate": 4.855798864557989e-06, "loss": 0.7584, "step": 4315 }, { "epoch": 0.1260109193892149, "grad_norm": 0.8143787142190387, "learning_rate": 4.855636658556367e-06, "loss": 0.6749, "step": 4316 }, { "epoch": 0.12604011561706227, "grad_norm": 0.6999773226800241, "learning_rate": 4.855474452554745e-06, "loss": 0.6234, "step": 4317 }, { "epoch": 0.12606931184490963, "grad_norm": 0.7365761471397766, "learning_rate": 4.855312246553122e-06, "loss": 0.7194, "step": 4318 }, { "epoch": 0.126098508072757, "grad_norm": 0.7756102849059854, "learning_rate": 4.8551500405515e-06, "loss": 0.7016, "step": 4319 }, { "epoch": 0.12612770430060435, "grad_norm": 0.8867834257942354, "learning_rate": 4.854987834549878e-06, "loss": 0.8058, "step": 4320 }, { "epoch": 0.12615690052845172, "grad_norm": 0.6960048404989052, "learning_rate": 4.854825628548256e-06, "loss": 0.6266, "step": 4321 }, { "epoch": 0.12618609675629908, "grad_norm": 0.9336989136774295, "learning_rate": 4.854663422546634e-06, "loss": 0.7079, "step": 4322 }, { "epoch": 0.12621529298414644, "grad_norm": 0.8101331054989549, "learning_rate": 4.854501216545013e-06, "loss": 0.6996, "step": 4323 }, { "epoch": 0.1262444892119938, "grad_norm": 0.7865081966838671, "learning_rate": 4.85433901054339e-06, "loss": 0.6195, "step": 4324 }, { "epoch": 0.12627368543984116, "grad_norm": 0.719043616525066, "learning_rate": 4.854176804541768e-06, "loss": 0.6419, "step": 4325 }, { "epoch": 0.12630288166768852, "grad_norm": 0.7842928420176779, "learning_rate": 4.854014598540146e-06, "loss": 0.7451, "step": 4326 }, { "epoch": 0.1263320778955359, "grad_norm": 0.7062218091036426, "learning_rate": 4.853852392538524e-06, "loss": 0.6286, "step": 4327 }, { "epoch": 0.12636127412338327, "grad_norm": 0.83964318791527, "learning_rate": 4.853690186536902e-06, "loss": 0.7836, "step": 4328 }, { "epoch": 0.12639047035123063, "grad_norm": 0.8772190385843341, "learning_rate": 4.85352798053528e-06, "loss": 0.7373, "step": 4329 }, { "epoch": 0.126419666579078, "grad_norm": 0.8122608849448754, "learning_rate": 4.853365774533658e-06, "loss": 0.7463, "step": 4330 }, { "epoch": 0.12644886280692536, "grad_norm": 0.7308309859227383, "learning_rate": 4.853203568532036e-06, "loss": 0.6441, "step": 4331 }, { "epoch": 0.12647805903477272, "grad_norm": 0.7642707685235011, "learning_rate": 4.8530413625304144e-06, "loss": 0.7249, "step": 4332 }, { "epoch": 0.12650725526262008, "grad_norm": 0.8049439570074836, "learning_rate": 4.852879156528792e-06, "loss": 0.6974, "step": 4333 }, { "epoch": 0.12653645149046744, "grad_norm": 0.7026470751993412, "learning_rate": 4.85271695052717e-06, "loss": 0.6443, "step": 4334 }, { "epoch": 0.1265656477183148, "grad_norm": 0.7860178372625604, "learning_rate": 4.852554744525548e-06, "loss": 0.7135, "step": 4335 }, { "epoch": 0.12659484394616216, "grad_norm": 0.7493448304239116, "learning_rate": 4.852392538523926e-06, "loss": 0.6148, "step": 4336 }, { "epoch": 0.12662404017400952, "grad_norm": 0.841846631701699, "learning_rate": 4.852230332522304e-06, "loss": 0.7315, "step": 4337 }, { "epoch": 0.12665323640185688, "grad_norm": 0.734424885225807, "learning_rate": 4.852068126520682e-06, "loss": 0.7167, "step": 4338 }, { "epoch": 0.12668243262970424, "grad_norm": 0.7614242250821072, "learning_rate": 4.85190592051906e-06, "loss": 0.6067, "step": 4339 }, { "epoch": 0.1267116288575516, "grad_norm": 0.7115198063403373, "learning_rate": 4.851743714517438e-06, "loss": 0.6986, "step": 4340 }, { "epoch": 0.12674082508539897, "grad_norm": 0.8655546474822441, "learning_rate": 4.851581508515815e-06, "loss": 0.7407, "step": 4341 }, { "epoch": 0.12677002131324633, "grad_norm": 0.8585967377355134, "learning_rate": 4.851419302514194e-06, "loss": 0.7793, "step": 4342 }, { "epoch": 0.1267992175410937, "grad_norm": 0.8552928189241487, "learning_rate": 4.851257096512572e-06, "loss": 0.7102, "step": 4343 }, { "epoch": 0.12682841376894105, "grad_norm": 0.7293921331112614, "learning_rate": 4.85109489051095e-06, "loss": 0.6848, "step": 4344 }, { "epoch": 0.1268576099967884, "grad_norm": 0.6782272068932104, "learning_rate": 4.850932684509328e-06, "loss": 0.6017, "step": 4345 }, { "epoch": 0.12688680622463577, "grad_norm": 0.7979852540565302, "learning_rate": 4.850770478507706e-06, "loss": 0.7139, "step": 4346 }, { "epoch": 0.12691600245248313, "grad_norm": 0.8585631977238138, "learning_rate": 4.850608272506083e-06, "loss": 0.7268, "step": 4347 }, { "epoch": 0.1269451986803305, "grad_norm": 0.9062565861007061, "learning_rate": 4.850446066504461e-06, "loss": 0.7401, "step": 4348 }, { "epoch": 0.12697439490817786, "grad_norm": 0.7362593065687739, "learning_rate": 4.850283860502839e-06, "loss": 0.6197, "step": 4349 }, { "epoch": 0.12700359113602522, "grad_norm": 0.732333379919213, "learning_rate": 4.850121654501217e-06, "loss": 0.6753, "step": 4350 }, { "epoch": 0.12703278736387258, "grad_norm": 0.777007795687868, "learning_rate": 4.849959448499595e-06, "loss": 0.7571, "step": 4351 }, { "epoch": 0.12706198359171994, "grad_norm": 0.899783083212732, "learning_rate": 4.849797242497973e-06, "loss": 0.6683, "step": 4352 }, { "epoch": 0.1270911798195673, "grad_norm": 0.8491881349577858, "learning_rate": 4.849635036496351e-06, "loss": 0.8811, "step": 4353 }, { "epoch": 0.12712037604741466, "grad_norm": 0.814706194664199, "learning_rate": 4.849472830494729e-06, "loss": 0.6933, "step": 4354 }, { "epoch": 0.12714957227526202, "grad_norm": 0.7771934685406444, "learning_rate": 4.849310624493107e-06, "loss": 0.6689, "step": 4355 }, { "epoch": 0.12717876850310939, "grad_norm": 0.9266474052499794, "learning_rate": 4.849148418491484e-06, "loss": 0.7393, "step": 4356 }, { "epoch": 0.12720796473095677, "grad_norm": 0.7924984802619087, "learning_rate": 4.848986212489862e-06, "loss": 0.7238, "step": 4357 }, { "epoch": 0.12723716095880414, "grad_norm": 1.1331804937985204, "learning_rate": 4.84882400648824e-06, "loss": 0.7152, "step": 4358 }, { "epoch": 0.1272663571866515, "grad_norm": 0.7373566382288741, "learning_rate": 4.848661800486618e-06, "loss": 0.6929, "step": 4359 }, { "epoch": 0.12729555341449886, "grad_norm": 0.7924584082543635, "learning_rate": 4.848499594484996e-06, "loss": 0.7196, "step": 4360 }, { "epoch": 0.12732474964234622, "grad_norm": 0.7553461387026853, "learning_rate": 4.848337388483375e-06, "loss": 0.7494, "step": 4361 }, { "epoch": 0.12735394587019358, "grad_norm": 0.9875682149980304, "learning_rate": 4.848175182481752e-06, "loss": 0.6821, "step": 4362 }, { "epoch": 0.12738314209804094, "grad_norm": 0.7817968159347447, "learning_rate": 4.84801297648013e-06, "loss": 0.6904, "step": 4363 }, { "epoch": 0.1274123383258883, "grad_norm": 0.8606207546390315, "learning_rate": 4.847850770478508e-06, "loss": 0.6287, "step": 4364 }, { "epoch": 0.12744153455373566, "grad_norm": 0.6869874122166284, "learning_rate": 4.847688564476886e-06, "loss": 0.5981, "step": 4365 }, { "epoch": 0.12747073078158302, "grad_norm": 0.8019612911075579, "learning_rate": 4.847526358475264e-06, "loss": 0.7971, "step": 4366 }, { "epoch": 0.12749992700943039, "grad_norm": 0.7643816113601899, "learning_rate": 4.847364152473642e-06, "loss": 0.7292, "step": 4367 }, { "epoch": 0.12752912323727775, "grad_norm": 0.7403786990713825, "learning_rate": 4.84720194647202e-06, "loss": 0.7186, "step": 4368 }, { "epoch": 0.1275583194651251, "grad_norm": 0.8696069617276392, "learning_rate": 4.847039740470398e-06, "loss": 0.777, "step": 4369 }, { "epoch": 0.12758751569297247, "grad_norm": 0.7084291117463296, "learning_rate": 4.846877534468776e-06, "loss": 0.6394, "step": 4370 }, { "epoch": 0.12761671192081983, "grad_norm": 1.114559668447956, "learning_rate": 4.846715328467153e-06, "loss": 0.7735, "step": 4371 }, { "epoch": 0.1276459081486672, "grad_norm": 0.801770089336576, "learning_rate": 4.846553122465531e-06, "loss": 0.7974, "step": 4372 }, { "epoch": 0.12767510437651455, "grad_norm": 0.7810781044632733, "learning_rate": 4.846390916463909e-06, "loss": 0.7279, "step": 4373 }, { "epoch": 0.12770430060436191, "grad_norm": 0.7319151235944185, "learning_rate": 4.846228710462287e-06, "loss": 0.6429, "step": 4374 }, { "epoch": 0.12773349683220928, "grad_norm": 0.7923061506609255, "learning_rate": 4.846066504460665e-06, "loss": 0.7034, "step": 4375 }, { "epoch": 0.12776269306005664, "grad_norm": 0.7451609925226376, "learning_rate": 4.845904298459043e-06, "loss": 0.6655, "step": 4376 }, { "epoch": 0.127791889287904, "grad_norm": 0.7260552432717302, "learning_rate": 4.845742092457421e-06, "loss": 0.619, "step": 4377 }, { "epoch": 0.12782108551575136, "grad_norm": 0.692678129866514, "learning_rate": 4.845579886455799e-06, "loss": 0.6187, "step": 4378 }, { "epoch": 0.12785028174359872, "grad_norm": 0.705954837838401, "learning_rate": 4.8454176804541765e-06, "loss": 0.6229, "step": 4379 }, { "epoch": 0.12787947797144608, "grad_norm": 0.7344112513045854, "learning_rate": 4.845255474452555e-06, "loss": 0.716, "step": 4380 }, { "epoch": 0.12790867419929344, "grad_norm": 0.7820961243151237, "learning_rate": 4.845093268450933e-06, "loss": 0.601, "step": 4381 }, { "epoch": 0.1279378704271408, "grad_norm": 1.0772875059140519, "learning_rate": 4.844931062449311e-06, "loss": 0.6199, "step": 4382 }, { "epoch": 0.12796706665498817, "grad_norm": 0.7536542237758134, "learning_rate": 4.844768856447689e-06, "loss": 0.6933, "step": 4383 }, { "epoch": 0.12799626288283553, "grad_norm": 0.7858832713621142, "learning_rate": 4.844606650446067e-06, "loss": 0.6657, "step": 4384 }, { "epoch": 0.1280254591106829, "grad_norm": 0.8249127567617837, "learning_rate": 4.8444444444444446e-06, "loss": 0.7713, "step": 4385 }, { "epoch": 0.12805465533853025, "grad_norm": 0.8068237026824914, "learning_rate": 4.8442822384428226e-06, "loss": 0.7541, "step": 4386 }, { "epoch": 0.12808385156637764, "grad_norm": 0.8064841500860881, "learning_rate": 4.8441200324412006e-06, "loss": 0.6446, "step": 4387 }, { "epoch": 0.128113047794225, "grad_norm": 0.7031594957076962, "learning_rate": 4.8439578264395786e-06, "loss": 0.617, "step": 4388 }, { "epoch": 0.12814224402207236, "grad_norm": 0.7328656793130788, "learning_rate": 4.843795620437957e-06, "loss": 0.679, "step": 4389 }, { "epoch": 0.12817144024991972, "grad_norm": 0.7364783037495866, "learning_rate": 4.843633414436335e-06, "loss": 0.6686, "step": 4390 }, { "epoch": 0.12820063647776708, "grad_norm": 0.7406418757665375, "learning_rate": 4.843471208434713e-06, "loss": 0.7087, "step": 4391 }, { "epoch": 0.12822983270561444, "grad_norm": 0.7521176861311251, "learning_rate": 4.843309002433091e-06, "loss": 0.6778, "step": 4392 }, { "epoch": 0.1282590289334618, "grad_norm": 0.8331853281811296, "learning_rate": 4.843146796431469e-06, "loss": 0.7374, "step": 4393 }, { "epoch": 0.12828822516130917, "grad_norm": 0.8918382844395937, "learning_rate": 4.842984590429846e-06, "loss": 0.6358, "step": 4394 }, { "epoch": 0.12831742138915653, "grad_norm": 0.8413480627580646, "learning_rate": 4.842822384428224e-06, "loss": 0.6883, "step": 4395 }, { "epoch": 0.1283466176170039, "grad_norm": 1.2561374456462373, "learning_rate": 4.842660178426602e-06, "loss": 0.8074, "step": 4396 }, { "epoch": 0.12837581384485125, "grad_norm": 0.7716821355242524, "learning_rate": 4.84249797242498e-06, "loss": 0.723, "step": 4397 }, { "epoch": 0.1284050100726986, "grad_norm": 0.7221528648683053, "learning_rate": 4.842335766423358e-06, "loss": 0.6575, "step": 4398 }, { "epoch": 0.12843420630054597, "grad_norm": 0.7613413281118607, "learning_rate": 4.842173560421737e-06, "loss": 0.715, "step": 4399 }, { "epoch": 0.12846340252839333, "grad_norm": 0.7222793408005161, "learning_rate": 4.842011354420114e-06, "loss": 0.63, "step": 4400 }, { "epoch": 0.1284925987562407, "grad_norm": 0.7408337210940654, "learning_rate": 4.841849148418492e-06, "loss": 0.7096, "step": 4401 }, { "epoch": 0.12852179498408806, "grad_norm": 0.8203634054520672, "learning_rate": 4.84168694241687e-06, "loss": 0.7385, "step": 4402 }, { "epoch": 0.12855099121193542, "grad_norm": 0.8433554051747256, "learning_rate": 4.841524736415248e-06, "loss": 0.7496, "step": 4403 }, { "epoch": 0.12858018743978278, "grad_norm": 0.7893040478176523, "learning_rate": 4.841362530413626e-06, "loss": 0.6754, "step": 4404 }, { "epoch": 0.12860938366763014, "grad_norm": 0.7860075808293915, "learning_rate": 4.841200324412004e-06, "loss": 0.7555, "step": 4405 }, { "epoch": 0.1286385798954775, "grad_norm": 0.7743608723794466, "learning_rate": 4.841038118410382e-06, "loss": 0.7573, "step": 4406 }, { "epoch": 0.12866777612332486, "grad_norm": 0.8157086815572063, "learning_rate": 4.84087591240876e-06, "loss": 0.8286, "step": 4407 }, { "epoch": 0.12869697235117222, "grad_norm": 0.7120841594820687, "learning_rate": 4.840713706407138e-06, "loss": 0.6341, "step": 4408 }, { "epoch": 0.12872616857901958, "grad_norm": 0.8657560966825465, "learning_rate": 4.840551500405515e-06, "loss": 0.7748, "step": 4409 }, { "epoch": 0.12875536480686695, "grad_norm": 0.7444843519374382, "learning_rate": 4.840389294403893e-06, "loss": 0.6224, "step": 4410 }, { "epoch": 0.1287845610347143, "grad_norm": 0.7500906112051259, "learning_rate": 4.840227088402271e-06, "loss": 0.6746, "step": 4411 }, { "epoch": 0.12881375726256167, "grad_norm": 0.7150251054816812, "learning_rate": 4.840064882400649e-06, "loss": 0.6631, "step": 4412 }, { "epoch": 0.12884295349040903, "grad_norm": 0.7571892936138355, "learning_rate": 4.839902676399027e-06, "loss": 0.7195, "step": 4413 }, { "epoch": 0.1288721497182564, "grad_norm": 0.7704316697074359, "learning_rate": 4.839740470397405e-06, "loss": 0.6867, "step": 4414 }, { "epoch": 0.12890134594610375, "grad_norm": 0.6845721900606718, "learning_rate": 4.839578264395783e-06, "loss": 0.5772, "step": 4415 }, { "epoch": 0.1289305421739511, "grad_norm": 0.8204274309900658, "learning_rate": 4.839416058394161e-06, "loss": 0.7288, "step": 4416 }, { "epoch": 0.12895973840179847, "grad_norm": 0.8536123022910074, "learning_rate": 4.839253852392538e-06, "loss": 0.7525, "step": 4417 }, { "epoch": 0.12898893462964586, "grad_norm": 0.7734952029186909, "learning_rate": 4.839091646390917e-06, "loss": 0.7039, "step": 4418 }, { "epoch": 0.12901813085749322, "grad_norm": 0.8304895381419783, "learning_rate": 4.838929440389295e-06, "loss": 0.7357, "step": 4419 }, { "epoch": 0.12904732708534059, "grad_norm": 0.7355819049734559, "learning_rate": 4.838767234387673e-06, "loss": 0.6164, "step": 4420 }, { "epoch": 0.12907652331318795, "grad_norm": 0.8277102852243565, "learning_rate": 4.838605028386051e-06, "loss": 0.7154, "step": 4421 }, { "epoch": 0.1291057195410353, "grad_norm": 0.8246885465839149, "learning_rate": 4.838442822384429e-06, "loss": 0.7329, "step": 4422 }, { "epoch": 0.12913491576888267, "grad_norm": 0.8009742461000131, "learning_rate": 4.838280616382806e-06, "loss": 0.6562, "step": 4423 }, { "epoch": 0.12916411199673003, "grad_norm": 0.8094059028230625, "learning_rate": 4.838118410381184e-06, "loss": 0.7103, "step": 4424 }, { "epoch": 0.1291933082245774, "grad_norm": 0.9101997579429013, "learning_rate": 4.837956204379562e-06, "loss": 0.7089, "step": 4425 }, { "epoch": 0.12922250445242475, "grad_norm": 0.8777210637070606, "learning_rate": 4.83779399837794e-06, "loss": 0.7459, "step": 4426 }, { "epoch": 0.1292517006802721, "grad_norm": 0.8379337613649833, "learning_rate": 4.837631792376318e-06, "loss": 0.7444, "step": 4427 }, { "epoch": 0.12928089690811947, "grad_norm": 0.7404653043359842, "learning_rate": 4.837469586374696e-06, "loss": 0.688, "step": 4428 }, { "epoch": 0.12931009313596684, "grad_norm": 0.7687115303639732, "learning_rate": 4.837307380373074e-06, "loss": 0.679, "step": 4429 }, { "epoch": 0.1293392893638142, "grad_norm": 0.785321146851463, "learning_rate": 4.837145174371452e-06, "loss": 0.6919, "step": 4430 }, { "epoch": 0.12936848559166156, "grad_norm": 0.7995712121937968, "learning_rate": 4.83698296836983e-06, "loss": 0.7862, "step": 4431 }, { "epoch": 0.12939768181950892, "grad_norm": 0.7507034918021104, "learning_rate": 4.8368207623682075e-06, "loss": 0.6675, "step": 4432 }, { "epoch": 0.12942687804735628, "grad_norm": 0.7154881160363439, "learning_rate": 4.8366585563665855e-06, "loss": 0.6777, "step": 4433 }, { "epoch": 0.12945607427520364, "grad_norm": 0.8323422119043898, "learning_rate": 4.8364963503649635e-06, "loss": 0.6505, "step": 4434 }, { "epoch": 0.129485270503051, "grad_norm": 0.8863706180433263, "learning_rate": 4.8363341443633415e-06, "loss": 0.7657, "step": 4435 }, { "epoch": 0.12951446673089836, "grad_norm": 0.8239490037880842, "learning_rate": 4.83617193836172e-06, "loss": 0.7276, "step": 4436 }, { "epoch": 0.12954366295874573, "grad_norm": 0.7592006296117572, "learning_rate": 4.836009732360098e-06, "loss": 0.6859, "step": 4437 }, { "epoch": 0.1295728591865931, "grad_norm": 0.7891574593677277, "learning_rate": 4.8358475263584755e-06, "loss": 0.7359, "step": 4438 }, { "epoch": 0.12960205541444045, "grad_norm": 0.756566164628013, "learning_rate": 4.8356853203568535e-06, "loss": 0.6506, "step": 4439 }, { "epoch": 0.1296312516422878, "grad_norm": 0.7377153811008987, "learning_rate": 4.8355231143552315e-06, "loss": 0.6668, "step": 4440 }, { "epoch": 0.12966044787013517, "grad_norm": 0.7835922631149254, "learning_rate": 4.8353609083536095e-06, "loss": 0.691, "step": 4441 }, { "epoch": 0.12968964409798253, "grad_norm": 0.7426550661536647, "learning_rate": 4.8351987023519876e-06, "loss": 0.7565, "step": 4442 }, { "epoch": 0.1297188403258299, "grad_norm": 0.7981577762332341, "learning_rate": 4.8350364963503656e-06, "loss": 0.8273, "step": 4443 }, { "epoch": 0.12974803655367725, "grad_norm": 0.8098662900448018, "learning_rate": 4.8348742903487436e-06, "loss": 0.6522, "step": 4444 }, { "epoch": 0.12977723278152462, "grad_norm": 0.7349967630815496, "learning_rate": 4.8347120843471216e-06, "loss": 0.6554, "step": 4445 }, { "epoch": 0.12980642900937198, "grad_norm": 0.9485514726898424, "learning_rate": 4.8345498783455e-06, "loss": 0.7949, "step": 4446 }, { "epoch": 0.12983562523721934, "grad_norm": 0.7590500468328718, "learning_rate": 4.834387672343877e-06, "loss": 0.6524, "step": 4447 }, { "epoch": 0.12986482146506673, "grad_norm": 0.7630966424251717, "learning_rate": 4.834225466342255e-06, "loss": 0.6541, "step": 4448 }, { "epoch": 0.1298940176929141, "grad_norm": 0.7131308972993434, "learning_rate": 4.834063260340633e-06, "loss": 0.6732, "step": 4449 }, { "epoch": 0.12992321392076145, "grad_norm": 0.694086289170439, "learning_rate": 4.833901054339011e-06, "loss": 0.5914, "step": 4450 }, { "epoch": 0.1299524101486088, "grad_norm": 0.8082860252509595, "learning_rate": 4.833738848337389e-06, "loss": 0.7618, "step": 4451 }, { "epoch": 0.12998160637645617, "grad_norm": 0.8112195203723254, "learning_rate": 4.833576642335767e-06, "loss": 0.6791, "step": 4452 }, { "epoch": 0.13001080260430353, "grad_norm": 0.7695934475645538, "learning_rate": 4.833414436334145e-06, "loss": 0.6503, "step": 4453 }, { "epoch": 0.1300399988321509, "grad_norm": 0.7496996722406919, "learning_rate": 4.833252230332523e-06, "loss": 0.6858, "step": 4454 }, { "epoch": 0.13006919505999825, "grad_norm": 0.7240774705079429, "learning_rate": 4.833090024330901e-06, "loss": 0.6501, "step": 4455 }, { "epoch": 0.13009839128784562, "grad_norm": 0.7595520009254749, "learning_rate": 4.832927818329279e-06, "loss": 0.6533, "step": 4456 }, { "epoch": 0.13012758751569298, "grad_norm": 0.7290009839137614, "learning_rate": 4.832765612327657e-06, "loss": 0.5844, "step": 4457 }, { "epoch": 0.13015678374354034, "grad_norm": 0.8367146716381801, "learning_rate": 4.832603406326035e-06, "loss": 0.8118, "step": 4458 }, { "epoch": 0.1301859799713877, "grad_norm": 0.7880732328462697, "learning_rate": 4.832441200324413e-06, "loss": 0.6726, "step": 4459 }, { "epoch": 0.13021517619923506, "grad_norm": 0.7592789647871182, "learning_rate": 4.832278994322791e-06, "loss": 0.7005, "step": 4460 }, { "epoch": 0.13024437242708242, "grad_norm": 0.7102345019765952, "learning_rate": 4.832116788321168e-06, "loss": 0.632, "step": 4461 }, { "epoch": 0.13027356865492978, "grad_norm": 0.7688898733427952, "learning_rate": 4.831954582319546e-06, "loss": 0.7029, "step": 4462 }, { "epoch": 0.13030276488277714, "grad_norm": 0.8213079312819558, "learning_rate": 4.831792376317924e-06, "loss": 0.7359, "step": 4463 }, { "epoch": 0.1303319611106245, "grad_norm": 0.8091772103202535, "learning_rate": 4.831630170316302e-06, "loss": 0.7181, "step": 4464 }, { "epoch": 0.13036115733847187, "grad_norm": 0.7566915532174452, "learning_rate": 4.83146796431468e-06, "loss": 0.7142, "step": 4465 }, { "epoch": 0.13039035356631923, "grad_norm": 0.8089018122950947, "learning_rate": 4.831305758313058e-06, "loss": 0.7537, "step": 4466 }, { "epoch": 0.1304195497941666, "grad_norm": 0.7519918495747546, "learning_rate": 4.831143552311436e-06, "loss": 0.7553, "step": 4467 }, { "epoch": 0.13044874602201395, "grad_norm": 0.8050369794665048, "learning_rate": 4.830981346309814e-06, "loss": 0.7646, "step": 4468 }, { "epoch": 0.1304779422498613, "grad_norm": 1.0318953303399332, "learning_rate": 4.830819140308192e-06, "loss": 0.6913, "step": 4469 }, { "epoch": 0.13050713847770867, "grad_norm": 0.7091866071999164, "learning_rate": 4.830656934306569e-06, "loss": 0.672, "step": 4470 }, { "epoch": 0.13053633470555603, "grad_norm": 0.7263274885511779, "learning_rate": 4.830494728304947e-06, "loss": 0.7058, "step": 4471 }, { "epoch": 0.1305655309334034, "grad_norm": 0.7397628098028562, "learning_rate": 4.830332522303325e-06, "loss": 0.6863, "step": 4472 }, { "epoch": 0.13059472716125076, "grad_norm": 0.8273386528706778, "learning_rate": 4.830170316301703e-06, "loss": 0.7347, "step": 4473 }, { "epoch": 0.13062392338909812, "grad_norm": 0.6978743883411612, "learning_rate": 4.830008110300082e-06, "loss": 0.6301, "step": 4474 }, { "epoch": 0.13065311961694548, "grad_norm": 0.7280621751142793, "learning_rate": 4.82984590429846e-06, "loss": 0.6589, "step": 4475 }, { "epoch": 0.13068231584479284, "grad_norm": 0.7774685968677877, "learning_rate": 4.829683698296837e-06, "loss": 0.6757, "step": 4476 }, { "epoch": 0.1307115120726402, "grad_norm": 0.7209550547411311, "learning_rate": 4.829521492295215e-06, "loss": 0.6375, "step": 4477 }, { "epoch": 0.1307407083004876, "grad_norm": 0.781831207213686, "learning_rate": 4.829359286293593e-06, "loss": 0.7447, "step": 4478 }, { "epoch": 0.13076990452833495, "grad_norm": 0.7442517438211084, "learning_rate": 4.829197080291971e-06, "loss": 0.6572, "step": 4479 }, { "epoch": 0.1307991007561823, "grad_norm": 0.9566603191117947, "learning_rate": 4.829034874290349e-06, "loss": 0.8109, "step": 4480 }, { "epoch": 0.13082829698402967, "grad_norm": 0.8131836518587626, "learning_rate": 4.828872668288727e-06, "loss": 0.6427, "step": 4481 }, { "epoch": 0.13085749321187704, "grad_norm": 0.8567868455500703, "learning_rate": 4.828710462287105e-06, "loss": 0.8236, "step": 4482 }, { "epoch": 0.1308866894397244, "grad_norm": 0.763285009677897, "learning_rate": 4.828548256285483e-06, "loss": 0.6801, "step": 4483 }, { "epoch": 0.13091588566757176, "grad_norm": 0.766075133738811, "learning_rate": 4.8283860502838605e-06, "loss": 0.7096, "step": 4484 }, { "epoch": 0.13094508189541912, "grad_norm": 0.8131297007880386, "learning_rate": 4.8282238442822385e-06, "loss": 0.8194, "step": 4485 }, { "epoch": 0.13097427812326648, "grad_norm": 0.7604057326821702, "learning_rate": 4.8280616382806165e-06, "loss": 0.7114, "step": 4486 }, { "epoch": 0.13100347435111384, "grad_norm": 0.7925274155173756, "learning_rate": 4.8278994322789945e-06, "loss": 0.6949, "step": 4487 }, { "epoch": 0.1310326705789612, "grad_norm": 0.8400169042477063, "learning_rate": 4.8277372262773725e-06, "loss": 0.751, "step": 4488 }, { "epoch": 0.13106186680680856, "grad_norm": 0.7570131162909194, "learning_rate": 4.8275750202757505e-06, "loss": 0.7128, "step": 4489 }, { "epoch": 0.13109106303465592, "grad_norm": 0.8056053560039715, "learning_rate": 4.8274128142741285e-06, "loss": 0.729, "step": 4490 }, { "epoch": 0.13112025926250329, "grad_norm": 0.7918049020941536, "learning_rate": 4.8272506082725065e-06, "loss": 0.7116, "step": 4491 }, { "epoch": 0.13114945549035065, "grad_norm": 0.7862961229049833, "learning_rate": 4.8270884022708845e-06, "loss": 0.7102, "step": 4492 }, { "epoch": 0.131178651718198, "grad_norm": 0.7937759981291937, "learning_rate": 4.8269261962692625e-06, "loss": 0.693, "step": 4493 }, { "epoch": 0.13120784794604537, "grad_norm": 0.7504563582333779, "learning_rate": 4.8267639902676405e-06, "loss": 0.7075, "step": 4494 }, { "epoch": 0.13123704417389273, "grad_norm": 0.8480831473830632, "learning_rate": 4.8266017842660185e-06, "loss": 0.6468, "step": 4495 }, { "epoch": 0.1312662404017401, "grad_norm": 1.1477847030706374, "learning_rate": 4.8264395782643965e-06, "loss": 0.9001, "step": 4496 }, { "epoch": 0.13129543662958745, "grad_norm": 0.714267272748668, "learning_rate": 4.8262773722627745e-06, "loss": 0.6359, "step": 4497 }, { "epoch": 0.13132463285743481, "grad_norm": 0.7712678415607148, "learning_rate": 4.8261151662611525e-06, "loss": 0.7759, "step": 4498 }, { "epoch": 0.13135382908528218, "grad_norm": 0.7365787176844032, "learning_rate": 4.82595296025953e-06, "loss": 0.7013, "step": 4499 }, { "epoch": 0.13138302531312954, "grad_norm": 0.7471777366925259, "learning_rate": 4.825790754257908e-06, "loss": 0.6836, "step": 4500 }, { "epoch": 0.1314122215409769, "grad_norm": 0.7716867932920042, "learning_rate": 4.825628548256286e-06, "loss": 0.7128, "step": 4501 }, { "epoch": 0.13144141776882426, "grad_norm": 0.7831204281269664, "learning_rate": 4.825466342254664e-06, "loss": 0.6082, "step": 4502 }, { "epoch": 0.13147061399667162, "grad_norm": 0.7648007165565547, "learning_rate": 4.825304136253042e-06, "loss": 0.6453, "step": 4503 }, { "epoch": 0.13149981022451898, "grad_norm": 0.7986190216077194, "learning_rate": 4.82514193025142e-06, "loss": 0.8195, "step": 4504 }, { "epoch": 0.13152900645236634, "grad_norm": 0.7375261776405567, "learning_rate": 4.824979724249798e-06, "loss": 0.6522, "step": 4505 }, { "epoch": 0.1315582026802137, "grad_norm": 0.6994304295081771, "learning_rate": 4.824817518248176e-06, "loss": 0.6174, "step": 4506 }, { "epoch": 0.13158739890806107, "grad_norm": 0.7422107344635274, "learning_rate": 4.824655312246554e-06, "loss": 0.7263, "step": 4507 }, { "epoch": 0.13161659513590845, "grad_norm": 0.7502190405889696, "learning_rate": 4.824493106244931e-06, "loss": 0.5832, "step": 4508 }, { "epoch": 0.13164579136375582, "grad_norm": 0.8101377787585471, "learning_rate": 4.824330900243309e-06, "loss": 0.757, "step": 4509 }, { "epoch": 0.13167498759160318, "grad_norm": 0.7479861577100972, "learning_rate": 4.824168694241687e-06, "loss": 0.6721, "step": 4510 }, { "epoch": 0.13170418381945054, "grad_norm": 0.8696717387325441, "learning_rate": 4.824006488240065e-06, "loss": 0.7908, "step": 4511 }, { "epoch": 0.1317333800472979, "grad_norm": 0.7442609705945306, "learning_rate": 4.823844282238444e-06, "loss": 0.6457, "step": 4512 }, { "epoch": 0.13176257627514526, "grad_norm": 0.7704161581341802, "learning_rate": 4.823682076236822e-06, "loss": 0.7246, "step": 4513 }, { "epoch": 0.13179177250299262, "grad_norm": 0.8413309754722487, "learning_rate": 4.823519870235199e-06, "loss": 0.7622, "step": 4514 }, { "epoch": 0.13182096873083998, "grad_norm": 0.8278396232157184, "learning_rate": 4.823357664233577e-06, "loss": 0.7469, "step": 4515 }, { "epoch": 0.13185016495868734, "grad_norm": 0.744255529333219, "learning_rate": 4.823195458231955e-06, "loss": 0.6596, "step": 4516 }, { "epoch": 0.1318793611865347, "grad_norm": 0.732081342958162, "learning_rate": 4.823033252230333e-06, "loss": 0.6134, "step": 4517 }, { "epoch": 0.13190855741438207, "grad_norm": 0.8007861840643948, "learning_rate": 4.822871046228711e-06, "loss": 0.7294, "step": 4518 }, { "epoch": 0.13193775364222943, "grad_norm": 0.7598795886032627, "learning_rate": 4.822708840227089e-06, "loss": 0.7001, "step": 4519 }, { "epoch": 0.1319669498700768, "grad_norm": 0.7236008915630904, "learning_rate": 4.822546634225467e-06, "loss": 0.6587, "step": 4520 }, { "epoch": 0.13199614609792415, "grad_norm": 0.8022261958415159, "learning_rate": 4.822384428223845e-06, "loss": 0.7224, "step": 4521 }, { "epoch": 0.1320253423257715, "grad_norm": 0.7424798488408437, "learning_rate": 4.822222222222222e-06, "loss": 0.7096, "step": 4522 }, { "epoch": 0.13205453855361887, "grad_norm": 0.7915588228769187, "learning_rate": 4.8220600162206e-06, "loss": 0.6344, "step": 4523 }, { "epoch": 0.13208373478146623, "grad_norm": 0.8691493807367769, "learning_rate": 4.821897810218978e-06, "loss": 0.7679, "step": 4524 }, { "epoch": 0.1321129310093136, "grad_norm": 0.7385832112261425, "learning_rate": 4.821735604217356e-06, "loss": 0.684, "step": 4525 }, { "epoch": 0.13214212723716096, "grad_norm": 0.7635716222708467, "learning_rate": 4.821573398215734e-06, "loss": 0.6726, "step": 4526 }, { "epoch": 0.13217132346500832, "grad_norm": 0.7268745214747441, "learning_rate": 4.821411192214112e-06, "loss": 0.6656, "step": 4527 }, { "epoch": 0.13220051969285568, "grad_norm": 0.7430230876623378, "learning_rate": 4.82124898621249e-06, "loss": 0.7338, "step": 4528 }, { "epoch": 0.13222971592070304, "grad_norm": 0.7625066542743276, "learning_rate": 4.821086780210868e-06, "loss": 0.7388, "step": 4529 }, { "epoch": 0.1322589121485504, "grad_norm": 0.8436308069255746, "learning_rate": 4.820924574209246e-06, "loss": 0.8512, "step": 4530 }, { "epoch": 0.13228810837639776, "grad_norm": 0.8142646842343754, "learning_rate": 4.820762368207624e-06, "loss": 0.726, "step": 4531 }, { "epoch": 0.13231730460424512, "grad_norm": 0.7488384431477634, "learning_rate": 4.820600162206002e-06, "loss": 0.7101, "step": 4532 }, { "epoch": 0.13234650083209248, "grad_norm": 0.7930351809306987, "learning_rate": 4.82043795620438e-06, "loss": 0.7708, "step": 4533 }, { "epoch": 0.13237569705993985, "grad_norm": 0.7556005910591088, "learning_rate": 4.820275750202758e-06, "loss": 0.6547, "step": 4534 }, { "epoch": 0.1324048932877872, "grad_norm": 0.8133100844366331, "learning_rate": 4.820113544201136e-06, "loss": 0.7609, "step": 4535 }, { "epoch": 0.13243408951563457, "grad_norm": 0.8044117242015001, "learning_rate": 4.819951338199514e-06, "loss": 0.7923, "step": 4536 }, { "epoch": 0.13246328574348193, "grad_norm": 0.7389918731463521, "learning_rate": 4.8197891321978914e-06, "loss": 0.6914, "step": 4537 }, { "epoch": 0.13249248197132932, "grad_norm": 0.9541077964994452, "learning_rate": 4.8196269261962694e-06, "loss": 0.7588, "step": 4538 }, { "epoch": 0.13252167819917668, "grad_norm": 0.7639995538412155, "learning_rate": 4.8194647201946474e-06, "loss": 0.656, "step": 4539 }, { "epoch": 0.13255087442702404, "grad_norm": 0.7851511127464045, "learning_rate": 4.8193025141930254e-06, "loss": 0.6786, "step": 4540 }, { "epoch": 0.1325800706548714, "grad_norm": 0.7964772752250227, "learning_rate": 4.8191403081914035e-06, "loss": 0.7717, "step": 4541 }, { "epoch": 0.13260926688271876, "grad_norm": 0.8182436489090049, "learning_rate": 4.8189781021897815e-06, "loss": 0.694, "step": 4542 }, { "epoch": 0.13263846311056612, "grad_norm": 0.7444357151431144, "learning_rate": 4.8188158961881595e-06, "loss": 0.6794, "step": 4543 }, { "epoch": 0.13266765933841349, "grad_norm": 0.7445897526398676, "learning_rate": 4.8186536901865375e-06, "loss": 0.6587, "step": 4544 }, { "epoch": 0.13269685556626085, "grad_norm": 0.7143377465486903, "learning_rate": 4.8184914841849155e-06, "loss": 0.6345, "step": 4545 }, { "epoch": 0.1327260517941082, "grad_norm": 0.7032669019635672, "learning_rate": 4.818329278183293e-06, "loss": 0.6828, "step": 4546 }, { "epoch": 0.13275524802195557, "grad_norm": 0.7921966214372311, "learning_rate": 4.818167072181671e-06, "loss": 0.7241, "step": 4547 }, { "epoch": 0.13278444424980293, "grad_norm": 0.7990928198018369, "learning_rate": 4.818004866180049e-06, "loss": 0.7126, "step": 4548 }, { "epoch": 0.1328136404776503, "grad_norm": 0.7566809047101613, "learning_rate": 4.817842660178427e-06, "loss": 0.6958, "step": 4549 }, { "epoch": 0.13284283670549765, "grad_norm": 0.7725273473566818, "learning_rate": 4.8176804541768055e-06, "loss": 0.6829, "step": 4550 }, { "epoch": 0.132872032933345, "grad_norm": 0.7208616180827765, "learning_rate": 4.8175182481751835e-06, "loss": 0.6091, "step": 4551 }, { "epoch": 0.13290122916119237, "grad_norm": 0.736467126340655, "learning_rate": 4.817356042173561e-06, "loss": 0.6751, "step": 4552 }, { "epoch": 0.13293042538903974, "grad_norm": 0.7238903100598673, "learning_rate": 4.817193836171939e-06, "loss": 0.6364, "step": 4553 }, { "epoch": 0.1329596216168871, "grad_norm": 0.85647987259693, "learning_rate": 4.817031630170317e-06, "loss": 0.8091, "step": 4554 }, { "epoch": 0.13298881784473446, "grad_norm": 0.7255434587222556, "learning_rate": 4.816869424168695e-06, "loss": 0.6296, "step": 4555 }, { "epoch": 0.13301801407258182, "grad_norm": 0.7749981002313873, "learning_rate": 4.816707218167073e-06, "loss": 0.6639, "step": 4556 }, { "epoch": 0.13304721030042918, "grad_norm": 0.801739843643689, "learning_rate": 4.816545012165451e-06, "loss": 0.6883, "step": 4557 }, { "epoch": 0.13307640652827654, "grad_norm": 0.8416655920493356, "learning_rate": 4.816382806163829e-06, "loss": 0.6421, "step": 4558 }, { "epoch": 0.1331056027561239, "grad_norm": 0.793365376132425, "learning_rate": 4.816220600162207e-06, "loss": 0.6963, "step": 4559 }, { "epoch": 0.13313479898397126, "grad_norm": 0.6906613300428192, "learning_rate": 4.816058394160584e-06, "loss": 0.5963, "step": 4560 }, { "epoch": 0.13316399521181863, "grad_norm": 0.6839182948216148, "learning_rate": 4.815896188158962e-06, "loss": 0.6242, "step": 4561 }, { "epoch": 0.133193191439666, "grad_norm": 0.7417492131490442, "learning_rate": 4.81573398215734e-06, "loss": 0.6307, "step": 4562 }, { "epoch": 0.13322238766751335, "grad_norm": 0.7527819613855342, "learning_rate": 4.815571776155718e-06, "loss": 0.6899, "step": 4563 }, { "epoch": 0.1332515838953607, "grad_norm": 0.7614104110431096, "learning_rate": 4.815409570154096e-06, "loss": 0.6924, "step": 4564 }, { "epoch": 0.13328078012320807, "grad_norm": 0.7812623023215404, "learning_rate": 4.815247364152474e-06, "loss": 0.7221, "step": 4565 }, { "epoch": 0.13330997635105543, "grad_norm": 0.9156704762767529, "learning_rate": 4.815085158150852e-06, "loss": 0.7739, "step": 4566 }, { "epoch": 0.1333391725789028, "grad_norm": 0.8525174347691091, "learning_rate": 4.81492295214923e-06, "loss": 0.824, "step": 4567 }, { "epoch": 0.13336836880675018, "grad_norm": 0.7225008735739997, "learning_rate": 4.814760746147608e-06, "loss": 0.6864, "step": 4568 }, { "epoch": 0.13339756503459754, "grad_norm": 0.7625736320013908, "learning_rate": 4.814598540145986e-06, "loss": 0.7243, "step": 4569 }, { "epoch": 0.1334267612624449, "grad_norm": 0.7886247259617075, "learning_rate": 4.814436334144364e-06, "loss": 0.7263, "step": 4570 }, { "epoch": 0.13345595749029227, "grad_norm": 0.7555012546831416, "learning_rate": 4.814274128142742e-06, "loss": 0.7125, "step": 4571 }, { "epoch": 0.13348515371813963, "grad_norm": 0.7941501139087119, "learning_rate": 4.81411192214112e-06, "loss": 0.7578, "step": 4572 }, { "epoch": 0.133514349945987, "grad_norm": 0.7956031919050907, "learning_rate": 4.813949716139498e-06, "loss": 0.7074, "step": 4573 }, { "epoch": 0.13354354617383435, "grad_norm": 0.7422992522167187, "learning_rate": 4.813787510137876e-06, "loss": 0.6972, "step": 4574 }, { "epoch": 0.1335727424016817, "grad_norm": 0.7042732148178119, "learning_rate": 4.813625304136253e-06, "loss": 0.6383, "step": 4575 }, { "epoch": 0.13360193862952907, "grad_norm": 0.722468592791989, "learning_rate": 4.813463098134631e-06, "loss": 0.6267, "step": 4576 }, { "epoch": 0.13363113485737643, "grad_norm": 0.730236070633087, "learning_rate": 4.813300892133009e-06, "loss": 0.6937, "step": 4577 }, { "epoch": 0.1336603310852238, "grad_norm": 0.7787508210088865, "learning_rate": 4.813138686131387e-06, "loss": 0.6699, "step": 4578 }, { "epoch": 0.13368952731307115, "grad_norm": 0.7996691394444836, "learning_rate": 4.812976480129765e-06, "loss": 0.788, "step": 4579 }, { "epoch": 0.13371872354091852, "grad_norm": 0.7902105116791647, "learning_rate": 4.812814274128143e-06, "loss": 0.7196, "step": 4580 }, { "epoch": 0.13374791976876588, "grad_norm": 0.8012612109619429, "learning_rate": 4.812652068126521e-06, "loss": 0.7622, "step": 4581 }, { "epoch": 0.13377711599661324, "grad_norm": 0.7645277067334578, "learning_rate": 4.812489862124899e-06, "loss": 0.7697, "step": 4582 }, { "epoch": 0.1338063122244606, "grad_norm": 0.7020833843438375, "learning_rate": 4.812327656123277e-06, "loss": 0.625, "step": 4583 }, { "epoch": 0.13383550845230796, "grad_norm": 0.7608813114353954, "learning_rate": 4.812165450121654e-06, "loss": 0.7332, "step": 4584 }, { "epoch": 0.13386470468015532, "grad_norm": 0.8517530340682185, "learning_rate": 4.812003244120032e-06, "loss": 0.7371, "step": 4585 }, { "epoch": 0.13389390090800268, "grad_norm": 0.7772467757574816, "learning_rate": 4.81184103811841e-06, "loss": 0.6287, "step": 4586 }, { "epoch": 0.13392309713585004, "grad_norm": 0.8278475264081884, "learning_rate": 4.811678832116789e-06, "loss": 0.7365, "step": 4587 }, { "epoch": 0.1339522933636974, "grad_norm": 0.7017793978430679, "learning_rate": 4.811516626115167e-06, "loss": 0.589, "step": 4588 }, { "epoch": 0.13398148959154477, "grad_norm": 0.9925999675733017, "learning_rate": 4.811354420113544e-06, "loss": 0.7087, "step": 4589 }, { "epoch": 0.13401068581939213, "grad_norm": 1.1361554163683953, "learning_rate": 4.811192214111922e-06, "loss": 0.711, "step": 4590 }, { "epoch": 0.1340398820472395, "grad_norm": 0.7528731141262454, "learning_rate": 4.8110300081103e-06, "loss": 0.7043, "step": 4591 }, { "epoch": 0.13406907827508685, "grad_norm": 0.7634525863487329, "learning_rate": 4.810867802108678e-06, "loss": 0.6394, "step": 4592 }, { "epoch": 0.1340982745029342, "grad_norm": 0.7428417883034721, "learning_rate": 4.810705596107056e-06, "loss": 0.7542, "step": 4593 }, { "epoch": 0.13412747073078157, "grad_norm": 0.8019073733731659, "learning_rate": 4.810543390105434e-06, "loss": 0.7332, "step": 4594 }, { "epoch": 0.13415666695862893, "grad_norm": 0.8816534860303132, "learning_rate": 4.8103811841038124e-06, "loss": 0.7259, "step": 4595 }, { "epoch": 0.1341858631864763, "grad_norm": 0.7455845627692774, "learning_rate": 4.8102189781021904e-06, "loss": 0.6958, "step": 4596 }, { "epoch": 0.13421505941432366, "grad_norm": 0.8292584159859591, "learning_rate": 4.8100567721005684e-06, "loss": 0.7076, "step": 4597 }, { "epoch": 0.13424425564217105, "grad_norm": 0.8099247531970865, "learning_rate": 4.809894566098946e-06, "loss": 0.6856, "step": 4598 }, { "epoch": 0.1342734518700184, "grad_norm": 0.827650398662282, "learning_rate": 4.809732360097324e-06, "loss": 0.8021, "step": 4599 }, { "epoch": 0.13430264809786577, "grad_norm": 0.7099753337958471, "learning_rate": 4.809570154095702e-06, "loss": 0.6175, "step": 4600 }, { "epoch": 0.13433184432571313, "grad_norm": 0.7507856231263563, "learning_rate": 4.80940794809408e-06, "loss": 0.7271, "step": 4601 }, { "epoch": 0.1343610405535605, "grad_norm": 0.7377195816503587, "learning_rate": 4.809245742092458e-06, "loss": 0.6827, "step": 4602 }, { "epoch": 0.13439023678140785, "grad_norm": 0.7846781631645082, "learning_rate": 4.809083536090836e-06, "loss": 0.735, "step": 4603 }, { "epoch": 0.1344194330092552, "grad_norm": 0.7313240618789018, "learning_rate": 4.808921330089214e-06, "loss": 0.7028, "step": 4604 }, { "epoch": 0.13444862923710257, "grad_norm": 0.7631617661885496, "learning_rate": 4.808759124087592e-06, "loss": 0.7254, "step": 4605 }, { "epoch": 0.13447782546494993, "grad_norm": 0.7598877457141603, "learning_rate": 4.80859691808597e-06, "loss": 0.6622, "step": 4606 }, { "epoch": 0.1345070216927973, "grad_norm": 0.7567805231347935, "learning_rate": 4.808434712084348e-06, "loss": 0.6428, "step": 4607 }, { "epoch": 0.13453621792064466, "grad_norm": 0.8031343505130462, "learning_rate": 4.808272506082726e-06, "loss": 0.7234, "step": 4608 }, { "epoch": 0.13456541414849202, "grad_norm": 0.7033421154405236, "learning_rate": 4.808110300081104e-06, "loss": 0.6167, "step": 4609 }, { "epoch": 0.13459461037633938, "grad_norm": 0.6956318711034243, "learning_rate": 4.807948094079482e-06, "loss": 0.5964, "step": 4610 }, { "epoch": 0.13462380660418674, "grad_norm": 0.7470771832813264, "learning_rate": 4.80778588807786e-06, "loss": 0.6466, "step": 4611 }, { "epoch": 0.1346530028320341, "grad_norm": 0.7577391432161836, "learning_rate": 4.807623682076238e-06, "loss": 0.6537, "step": 4612 }, { "epoch": 0.13468219905988146, "grad_norm": 0.8010123706904053, "learning_rate": 4.807461476074615e-06, "loss": 0.7506, "step": 4613 }, { "epoch": 0.13471139528772882, "grad_norm": 0.7967499302317875, "learning_rate": 4.807299270072993e-06, "loss": 0.7137, "step": 4614 }, { "epoch": 0.13474059151557619, "grad_norm": 0.8028077770249045, "learning_rate": 4.807137064071371e-06, "loss": 0.7194, "step": 4615 }, { "epoch": 0.13476978774342355, "grad_norm": 0.8028603080506168, "learning_rate": 4.806974858069749e-06, "loss": 0.7009, "step": 4616 }, { "epoch": 0.1347989839712709, "grad_norm": 0.7672502971335766, "learning_rate": 4.806812652068127e-06, "loss": 0.6291, "step": 4617 }, { "epoch": 0.13482818019911827, "grad_norm": 0.7319660525500619, "learning_rate": 4.806650446066505e-06, "loss": 0.644, "step": 4618 }, { "epoch": 0.13485737642696563, "grad_norm": 0.788696381403037, "learning_rate": 4.806488240064883e-06, "loss": 0.747, "step": 4619 }, { "epoch": 0.134886572654813, "grad_norm": 0.8897837398495441, "learning_rate": 4.806326034063261e-06, "loss": 0.8193, "step": 4620 }, { "epoch": 0.13491576888266035, "grad_norm": 0.9848598345927057, "learning_rate": 4.806163828061639e-06, "loss": 0.7388, "step": 4621 }, { "epoch": 0.13494496511050771, "grad_norm": 0.7557472446518614, "learning_rate": 4.806001622060016e-06, "loss": 0.6529, "step": 4622 }, { "epoch": 0.13497416133835508, "grad_norm": 0.792902562267596, "learning_rate": 4.805839416058394e-06, "loss": 0.7266, "step": 4623 }, { "epoch": 0.13500335756620244, "grad_norm": 0.9857187646155138, "learning_rate": 4.805677210056772e-06, "loss": 0.7781, "step": 4624 }, { "epoch": 0.1350325537940498, "grad_norm": 0.6970115852646046, "learning_rate": 4.805515004055151e-06, "loss": 0.6422, "step": 4625 }, { "epoch": 0.13506175002189716, "grad_norm": 0.9298670860067201, "learning_rate": 4.805352798053529e-06, "loss": 0.7021, "step": 4626 }, { "epoch": 0.13509094624974452, "grad_norm": 0.7756774856990287, "learning_rate": 4.805190592051906e-06, "loss": 0.6604, "step": 4627 }, { "epoch": 0.13512014247759188, "grad_norm": 0.6758904532427494, "learning_rate": 4.805028386050284e-06, "loss": 0.5473, "step": 4628 }, { "epoch": 0.13514933870543927, "grad_norm": 0.7443078087850439, "learning_rate": 4.804866180048662e-06, "loss": 0.67, "step": 4629 }, { "epoch": 0.13517853493328663, "grad_norm": 0.827604536870323, "learning_rate": 4.80470397404704e-06, "loss": 0.6973, "step": 4630 }, { "epoch": 0.135207731161134, "grad_norm": 0.7754864365287807, "learning_rate": 4.804541768045418e-06, "loss": 0.6628, "step": 4631 }, { "epoch": 0.13523692738898135, "grad_norm": 0.7672946495567481, "learning_rate": 4.804379562043796e-06, "loss": 0.7226, "step": 4632 }, { "epoch": 0.13526612361682872, "grad_norm": 0.7472057029310668, "learning_rate": 4.804217356042174e-06, "loss": 0.6866, "step": 4633 }, { "epoch": 0.13529531984467608, "grad_norm": 0.7898363872580013, "learning_rate": 4.804055150040552e-06, "loss": 0.7251, "step": 4634 }, { "epoch": 0.13532451607252344, "grad_norm": 0.7601126803663056, "learning_rate": 4.80389294403893e-06, "loss": 0.6684, "step": 4635 }, { "epoch": 0.1353537123003708, "grad_norm": 0.7818414701838927, "learning_rate": 4.803730738037307e-06, "loss": 0.6899, "step": 4636 }, { "epoch": 0.13538290852821816, "grad_norm": 1.2027253897815595, "learning_rate": 4.803568532035685e-06, "loss": 0.7817, "step": 4637 }, { "epoch": 0.13541210475606552, "grad_norm": 0.7189384634931768, "learning_rate": 4.803406326034063e-06, "loss": 0.6306, "step": 4638 }, { "epoch": 0.13544130098391288, "grad_norm": 0.7814709647606854, "learning_rate": 4.803244120032441e-06, "loss": 0.725, "step": 4639 }, { "epoch": 0.13547049721176024, "grad_norm": 0.7650118811103137, "learning_rate": 4.803081914030819e-06, "loss": 0.7673, "step": 4640 }, { "epoch": 0.1354996934396076, "grad_norm": 0.8681579982887547, "learning_rate": 4.802919708029197e-06, "loss": 0.7598, "step": 4641 }, { "epoch": 0.13552888966745497, "grad_norm": 0.8373581857238284, "learning_rate": 4.802757502027575e-06, "loss": 0.8008, "step": 4642 }, { "epoch": 0.13555808589530233, "grad_norm": 0.7594501403812266, "learning_rate": 4.802595296025953e-06, "loss": 0.7297, "step": 4643 }, { "epoch": 0.1355872821231497, "grad_norm": 0.8477559733317157, "learning_rate": 4.802433090024331e-06, "loss": 0.7328, "step": 4644 }, { "epoch": 0.13561647835099705, "grad_norm": 0.7600720239882537, "learning_rate": 4.802270884022709e-06, "loss": 0.7411, "step": 4645 }, { "epoch": 0.1356456745788444, "grad_norm": 0.8227837502542017, "learning_rate": 4.802108678021087e-06, "loss": 0.7264, "step": 4646 }, { "epoch": 0.13567487080669177, "grad_norm": 0.8939588189211373, "learning_rate": 4.801946472019465e-06, "loss": 0.8998, "step": 4647 }, { "epoch": 0.13570406703453913, "grad_norm": 0.807058576697493, "learning_rate": 4.801784266017843e-06, "loss": 0.7175, "step": 4648 }, { "epoch": 0.1357332632623865, "grad_norm": 0.7928098384206486, "learning_rate": 4.801622060016221e-06, "loss": 0.7249, "step": 4649 }, { "epoch": 0.13576245949023386, "grad_norm": 0.7666814045110046, "learning_rate": 4.801459854014599e-06, "loss": 0.7384, "step": 4650 }, { "epoch": 0.13579165571808122, "grad_norm": 0.7690978728740987, "learning_rate": 4.8012976480129766e-06, "loss": 0.7232, "step": 4651 }, { "epoch": 0.13582085194592858, "grad_norm": 0.7511696699882559, "learning_rate": 4.801135442011355e-06, "loss": 0.6925, "step": 4652 }, { "epoch": 0.13585004817377594, "grad_norm": 0.8212250420802772, "learning_rate": 4.800973236009733e-06, "loss": 0.7335, "step": 4653 }, { "epoch": 0.1358792444016233, "grad_norm": 0.7307023840676742, "learning_rate": 4.800811030008111e-06, "loss": 0.682, "step": 4654 }, { "epoch": 0.13590844062947066, "grad_norm": 0.7440886960469544, "learning_rate": 4.800648824006489e-06, "loss": 0.685, "step": 4655 }, { "epoch": 0.13593763685731802, "grad_norm": 0.7988283568392917, "learning_rate": 4.800486618004867e-06, "loss": 0.7034, "step": 4656 }, { "epoch": 0.13596683308516538, "grad_norm": 0.9677201868339205, "learning_rate": 4.800324412003245e-06, "loss": 0.6705, "step": 4657 }, { "epoch": 0.13599602931301275, "grad_norm": 0.7241544193282976, "learning_rate": 4.800162206001623e-06, "loss": 0.653, "step": 4658 }, { "epoch": 0.13602522554086013, "grad_norm": 1.0212546458544876, "learning_rate": 4.800000000000001e-06, "loss": 0.83, "step": 4659 }, { "epoch": 0.1360544217687075, "grad_norm": 0.7763230358523522, "learning_rate": 4.799837793998378e-06, "loss": 0.6948, "step": 4660 }, { "epoch": 0.13608361799655486, "grad_norm": 0.7415312114634038, "learning_rate": 4.799675587996756e-06, "loss": 0.7025, "step": 4661 }, { "epoch": 0.13611281422440222, "grad_norm": 0.769089279604514, "learning_rate": 4.799513381995134e-06, "loss": 0.7076, "step": 4662 }, { "epoch": 0.13614201045224958, "grad_norm": 0.7881365854530284, "learning_rate": 4.799351175993513e-06, "loss": 0.7629, "step": 4663 }, { "epoch": 0.13617120668009694, "grad_norm": 0.7609843115928944, "learning_rate": 4.799188969991891e-06, "loss": 0.739, "step": 4664 }, { "epoch": 0.1362004029079443, "grad_norm": 0.980262347270757, "learning_rate": 4.799026763990268e-06, "loss": 0.7665, "step": 4665 }, { "epoch": 0.13622959913579166, "grad_norm": 0.6748804328897442, "learning_rate": 4.798864557988646e-06, "loss": 0.628, "step": 4666 }, { "epoch": 0.13625879536363902, "grad_norm": 0.7063354331598379, "learning_rate": 4.798702351987024e-06, "loss": 0.6413, "step": 4667 }, { "epoch": 0.13628799159148638, "grad_norm": 0.7593816613432758, "learning_rate": 4.798540145985402e-06, "loss": 0.7038, "step": 4668 }, { "epoch": 0.13631718781933375, "grad_norm": 0.6677976290313613, "learning_rate": 4.79837793998378e-06, "loss": 0.5706, "step": 4669 }, { "epoch": 0.1363463840471811, "grad_norm": 0.8530152027282084, "learning_rate": 4.798215733982158e-06, "loss": 0.813, "step": 4670 }, { "epoch": 0.13637558027502847, "grad_norm": 0.7348429376752372, "learning_rate": 4.798053527980536e-06, "loss": 0.6396, "step": 4671 }, { "epoch": 0.13640477650287583, "grad_norm": 0.8711151173998013, "learning_rate": 4.797891321978914e-06, "loss": 0.7878, "step": 4672 }, { "epoch": 0.1364339727307232, "grad_norm": 0.7856694364326432, "learning_rate": 4.797729115977292e-06, "loss": 0.7506, "step": 4673 }, { "epoch": 0.13646316895857055, "grad_norm": 0.7067332934436762, "learning_rate": 4.797566909975669e-06, "loss": 0.6257, "step": 4674 }, { "epoch": 0.1364923651864179, "grad_norm": 0.7996618850398286, "learning_rate": 4.797404703974047e-06, "loss": 0.7825, "step": 4675 }, { "epoch": 0.13652156141426527, "grad_norm": 0.7432465799703706, "learning_rate": 4.797242497972425e-06, "loss": 0.6843, "step": 4676 }, { "epoch": 0.13655075764211264, "grad_norm": 0.7073437025573518, "learning_rate": 4.797080291970803e-06, "loss": 0.5579, "step": 4677 }, { "epoch": 0.13657995386996, "grad_norm": 0.6992667642863784, "learning_rate": 4.796918085969181e-06, "loss": 0.6344, "step": 4678 }, { "epoch": 0.13660915009780736, "grad_norm": 0.8254189559942976, "learning_rate": 4.796755879967559e-06, "loss": 0.8083, "step": 4679 }, { "epoch": 0.13663834632565472, "grad_norm": 0.7003839119048583, "learning_rate": 4.796593673965937e-06, "loss": 0.6153, "step": 4680 }, { "epoch": 0.13666754255350208, "grad_norm": 0.8142025993162398, "learning_rate": 4.796431467964315e-06, "loss": 0.7555, "step": 4681 }, { "epoch": 0.13669673878134944, "grad_norm": 0.8360576303464713, "learning_rate": 4.796269261962693e-06, "loss": 0.7897, "step": 4682 }, { "epoch": 0.1367259350091968, "grad_norm": 0.7571035688206464, "learning_rate": 4.796107055961071e-06, "loss": 0.6994, "step": 4683 }, { "epoch": 0.13675513123704416, "grad_norm": 0.7696624947060242, "learning_rate": 4.795944849959449e-06, "loss": 0.6371, "step": 4684 }, { "epoch": 0.13678432746489153, "grad_norm": 0.7193690709146672, "learning_rate": 4.795782643957827e-06, "loss": 0.6226, "step": 4685 }, { "epoch": 0.1368135236927389, "grad_norm": 0.7351996752111395, "learning_rate": 4.795620437956205e-06, "loss": 0.6593, "step": 4686 }, { "epoch": 0.13684271992058625, "grad_norm": 0.780832465903053, "learning_rate": 4.795458231954583e-06, "loss": 0.7597, "step": 4687 }, { "epoch": 0.1368719161484336, "grad_norm": 0.7952004193598089, "learning_rate": 4.795296025952961e-06, "loss": 0.7324, "step": 4688 }, { "epoch": 0.136901112376281, "grad_norm": 0.7506447492524213, "learning_rate": 4.795133819951338e-06, "loss": 0.5747, "step": 4689 }, { "epoch": 0.13693030860412836, "grad_norm": 0.7975459392897836, "learning_rate": 4.794971613949716e-06, "loss": 0.7709, "step": 4690 }, { "epoch": 0.13695950483197572, "grad_norm": 0.7232925944967252, "learning_rate": 4.794809407948094e-06, "loss": 0.6367, "step": 4691 }, { "epoch": 0.13698870105982308, "grad_norm": 1.0729785198840736, "learning_rate": 4.794647201946472e-06, "loss": 0.7734, "step": 4692 }, { "epoch": 0.13701789728767044, "grad_norm": 0.7484598002054664, "learning_rate": 4.79448499594485e-06, "loss": 0.6859, "step": 4693 }, { "epoch": 0.1370470935155178, "grad_norm": 0.7570284861490725, "learning_rate": 4.794322789943228e-06, "loss": 0.6931, "step": 4694 }, { "epoch": 0.13707628974336517, "grad_norm": 0.8368921706603537, "learning_rate": 4.794160583941606e-06, "loss": 0.7366, "step": 4695 }, { "epoch": 0.13710548597121253, "grad_norm": 0.8056937333907901, "learning_rate": 4.793998377939984e-06, "loss": 0.8095, "step": 4696 }, { "epoch": 0.1371346821990599, "grad_norm": 0.7433727608811687, "learning_rate": 4.793836171938362e-06, "loss": 0.7123, "step": 4697 }, { "epoch": 0.13716387842690725, "grad_norm": 0.7417599750906241, "learning_rate": 4.7936739659367395e-06, "loss": 0.6866, "step": 4698 }, { "epoch": 0.1371930746547546, "grad_norm": 0.7089751334752309, "learning_rate": 4.7935117599351175e-06, "loss": 0.6928, "step": 4699 }, { "epoch": 0.13722227088260197, "grad_norm": 0.7675256384280836, "learning_rate": 4.7933495539334955e-06, "loss": 0.758, "step": 4700 }, { "epoch": 0.13725146711044933, "grad_norm": 0.7398309716649293, "learning_rate": 4.793187347931874e-06, "loss": 0.6855, "step": 4701 }, { "epoch": 0.1372806633382967, "grad_norm": 0.7456197795543296, "learning_rate": 4.793025141930252e-06, "loss": 0.7027, "step": 4702 }, { "epoch": 0.13730985956614405, "grad_norm": 1.1700718974320505, "learning_rate": 4.7928629359286295e-06, "loss": 0.7554, "step": 4703 }, { "epoch": 0.13733905579399142, "grad_norm": 0.7493081145688333, "learning_rate": 4.7927007299270075e-06, "loss": 0.6732, "step": 4704 }, { "epoch": 0.13736825202183878, "grad_norm": 0.7800055412039577, "learning_rate": 4.7925385239253856e-06, "loss": 0.717, "step": 4705 }, { "epoch": 0.13739744824968614, "grad_norm": 0.8071281828903092, "learning_rate": 4.7923763179237636e-06, "loss": 0.7276, "step": 4706 }, { "epoch": 0.1374266444775335, "grad_norm": 0.8040566860295588, "learning_rate": 4.7922141119221416e-06, "loss": 0.7625, "step": 4707 }, { "epoch": 0.13745584070538086, "grad_norm": 0.7727060709554712, "learning_rate": 4.7920519059205196e-06, "loss": 0.7408, "step": 4708 }, { "epoch": 0.13748503693322822, "grad_norm": 0.740458647333078, "learning_rate": 4.791889699918898e-06, "loss": 0.7039, "step": 4709 }, { "epoch": 0.13751423316107558, "grad_norm": 0.8306497105506033, "learning_rate": 4.791727493917276e-06, "loss": 0.7651, "step": 4710 }, { "epoch": 0.13754342938892294, "grad_norm": 0.758556930118292, "learning_rate": 4.791565287915654e-06, "loss": 0.7557, "step": 4711 }, { "epoch": 0.1375726256167703, "grad_norm": 0.6981915046756959, "learning_rate": 4.791403081914031e-06, "loss": 0.6174, "step": 4712 }, { "epoch": 0.13760182184461767, "grad_norm": 0.8563440909477743, "learning_rate": 4.791240875912409e-06, "loss": 0.7708, "step": 4713 }, { "epoch": 0.13763101807246503, "grad_norm": 0.7521008759854086, "learning_rate": 4.791078669910787e-06, "loss": 0.7146, "step": 4714 }, { "epoch": 0.1376602143003124, "grad_norm": 0.7911593084344504, "learning_rate": 4.790916463909165e-06, "loss": 0.7402, "step": 4715 }, { "epoch": 0.13768941052815975, "grad_norm": 0.7603123517233674, "learning_rate": 4.790754257907543e-06, "loss": 0.7002, "step": 4716 }, { "epoch": 0.1377186067560071, "grad_norm": 0.7663216628850941, "learning_rate": 4.790592051905921e-06, "loss": 0.7494, "step": 4717 }, { "epoch": 0.13774780298385447, "grad_norm": 0.8067965028292795, "learning_rate": 4.790429845904299e-06, "loss": 0.7663, "step": 4718 }, { "epoch": 0.13777699921170186, "grad_norm": 0.7899358565947059, "learning_rate": 4.790267639902677e-06, "loss": 0.6402, "step": 4719 }, { "epoch": 0.13780619543954922, "grad_norm": 0.8111269357624149, "learning_rate": 4.790105433901055e-06, "loss": 0.7751, "step": 4720 }, { "epoch": 0.13783539166739658, "grad_norm": 0.785714348020226, "learning_rate": 4.789943227899433e-06, "loss": 0.7551, "step": 4721 }, { "epoch": 0.13786458789524395, "grad_norm": 0.747220220414597, "learning_rate": 4.789781021897811e-06, "loss": 0.6401, "step": 4722 }, { "epoch": 0.1378937841230913, "grad_norm": 0.9742666645435826, "learning_rate": 4.789618815896189e-06, "loss": 0.7488, "step": 4723 }, { "epoch": 0.13792298035093867, "grad_norm": 0.8666069735747923, "learning_rate": 4.789456609894567e-06, "loss": 0.7253, "step": 4724 }, { "epoch": 0.13795217657878603, "grad_norm": 0.8131125920243778, "learning_rate": 4.789294403892945e-06, "loss": 0.7648, "step": 4725 }, { "epoch": 0.1379813728066334, "grad_norm": 0.7893905950371233, "learning_rate": 4.789132197891323e-06, "loss": 0.7686, "step": 4726 }, { "epoch": 0.13801056903448075, "grad_norm": 0.7735228142803798, "learning_rate": 4.7889699918897e-06, "loss": 0.7161, "step": 4727 }, { "epoch": 0.1380397652623281, "grad_norm": 0.7362823816345272, "learning_rate": 4.788807785888078e-06, "loss": 0.6052, "step": 4728 }, { "epoch": 0.13806896149017547, "grad_norm": 0.6943297784675493, "learning_rate": 4.788645579886456e-06, "loss": 0.5717, "step": 4729 }, { "epoch": 0.13809815771802283, "grad_norm": 0.6789259327695576, "learning_rate": 4.788483373884834e-06, "loss": 0.6021, "step": 4730 }, { "epoch": 0.1381273539458702, "grad_norm": 0.7120855594529191, "learning_rate": 4.788321167883212e-06, "loss": 0.6317, "step": 4731 }, { "epoch": 0.13815655017371756, "grad_norm": 0.7441055222536418, "learning_rate": 4.78815896188159e-06, "loss": 0.7243, "step": 4732 }, { "epoch": 0.13818574640156492, "grad_norm": 0.664362375627983, "learning_rate": 4.787996755879968e-06, "loss": 0.6007, "step": 4733 }, { "epoch": 0.13821494262941228, "grad_norm": 0.7566481279766041, "learning_rate": 4.787834549878346e-06, "loss": 0.6442, "step": 4734 }, { "epoch": 0.13824413885725964, "grad_norm": 0.8097146576907017, "learning_rate": 4.787672343876723e-06, "loss": 0.7225, "step": 4735 }, { "epoch": 0.138273335085107, "grad_norm": 0.7244689040266714, "learning_rate": 4.787510137875101e-06, "loss": 0.6408, "step": 4736 }, { "epoch": 0.13830253131295436, "grad_norm": 0.7796988379157543, "learning_rate": 4.787347931873479e-06, "loss": 0.6963, "step": 4737 }, { "epoch": 0.13833172754080172, "grad_norm": 0.9031708831294408, "learning_rate": 4.787185725871858e-06, "loss": 0.6903, "step": 4738 }, { "epoch": 0.13836092376864909, "grad_norm": 0.8200559527376697, "learning_rate": 4.787023519870236e-06, "loss": 0.77, "step": 4739 }, { "epoch": 0.13839011999649645, "grad_norm": 0.8278696742026126, "learning_rate": 4.786861313868614e-06, "loss": 0.7033, "step": 4740 }, { "epoch": 0.1384193162243438, "grad_norm": 0.7631774698813307, "learning_rate": 4.786699107866991e-06, "loss": 0.6947, "step": 4741 }, { "epoch": 0.13844851245219117, "grad_norm": 0.6988415562509237, "learning_rate": 4.786536901865369e-06, "loss": 0.5785, "step": 4742 }, { "epoch": 0.13847770868003853, "grad_norm": 0.9252319429656433, "learning_rate": 4.786374695863747e-06, "loss": 0.7221, "step": 4743 }, { "epoch": 0.1385069049078859, "grad_norm": 0.8185304192187387, "learning_rate": 4.786212489862125e-06, "loss": 0.7164, "step": 4744 }, { "epoch": 0.13853610113573325, "grad_norm": 0.7632531111436651, "learning_rate": 4.786050283860503e-06, "loss": 0.6167, "step": 4745 }, { "epoch": 0.13856529736358061, "grad_norm": 0.8234873834660686, "learning_rate": 4.785888077858881e-06, "loss": 0.7448, "step": 4746 }, { "epoch": 0.13859449359142798, "grad_norm": 0.77637738422306, "learning_rate": 4.785725871857259e-06, "loss": 0.7851, "step": 4747 }, { "epoch": 0.13862368981927534, "grad_norm": 0.7071434474132788, "learning_rate": 4.785563665855637e-06, "loss": 0.6323, "step": 4748 }, { "epoch": 0.13865288604712273, "grad_norm": 0.7676330197450238, "learning_rate": 4.785401459854015e-06, "loss": 0.6662, "step": 4749 }, { "epoch": 0.1386820822749701, "grad_norm": 0.837410807180781, "learning_rate": 4.7852392538523925e-06, "loss": 0.7908, "step": 4750 }, { "epoch": 0.13871127850281745, "grad_norm": 0.7805552181951849, "learning_rate": 4.7850770478507705e-06, "loss": 0.7869, "step": 4751 }, { "epoch": 0.1387404747306648, "grad_norm": 0.7515998548084672, "learning_rate": 4.7849148418491485e-06, "loss": 0.5824, "step": 4752 }, { "epoch": 0.13876967095851217, "grad_norm": 0.7130316410407064, "learning_rate": 4.7847526358475265e-06, "loss": 0.6554, "step": 4753 }, { "epoch": 0.13879886718635953, "grad_norm": 0.7256894307877196, "learning_rate": 4.7845904298459045e-06, "loss": 0.6661, "step": 4754 }, { "epoch": 0.1388280634142069, "grad_norm": 0.7821821832311038, "learning_rate": 4.7844282238442825e-06, "loss": 0.7123, "step": 4755 }, { "epoch": 0.13885725964205425, "grad_norm": 0.6868872270926558, "learning_rate": 4.7842660178426605e-06, "loss": 0.5997, "step": 4756 }, { "epoch": 0.13888645586990161, "grad_norm": 0.9651590499367664, "learning_rate": 4.7841038118410385e-06, "loss": 0.7386, "step": 4757 }, { "epoch": 0.13891565209774898, "grad_norm": 0.8216808311809881, "learning_rate": 4.7839416058394165e-06, "loss": 0.8541, "step": 4758 }, { "epoch": 0.13894484832559634, "grad_norm": 0.7460503212382039, "learning_rate": 4.7837793998377945e-06, "loss": 0.6775, "step": 4759 }, { "epoch": 0.1389740445534437, "grad_norm": 0.7382558755413842, "learning_rate": 4.7836171938361725e-06, "loss": 0.6485, "step": 4760 }, { "epoch": 0.13900324078129106, "grad_norm": 0.7661267553269628, "learning_rate": 4.7834549878345505e-06, "loss": 0.6331, "step": 4761 }, { "epoch": 0.13903243700913842, "grad_norm": 0.7356040868390215, "learning_rate": 4.7832927818329286e-06, "loss": 0.6947, "step": 4762 }, { "epoch": 0.13906163323698578, "grad_norm": 0.7594097616699859, "learning_rate": 4.7831305758313066e-06, "loss": 0.6228, "step": 4763 }, { "epoch": 0.13909082946483314, "grad_norm": 0.7282521463364305, "learning_rate": 4.7829683698296846e-06, "loss": 0.6537, "step": 4764 }, { "epoch": 0.1391200256926805, "grad_norm": 0.7268571429135029, "learning_rate": 4.782806163828062e-06, "loss": 0.6329, "step": 4765 }, { "epoch": 0.13914922192052787, "grad_norm": 0.7724227822567977, "learning_rate": 4.78264395782644e-06, "loss": 0.7227, "step": 4766 }, { "epoch": 0.13917841814837523, "grad_norm": 0.7224635717705556, "learning_rate": 4.782481751824818e-06, "loss": 0.691, "step": 4767 }, { "epoch": 0.1392076143762226, "grad_norm": 0.7259819715904129, "learning_rate": 4.782319545823196e-06, "loss": 0.6035, "step": 4768 }, { "epoch": 0.13923681060406995, "grad_norm": 0.7953845663680473, "learning_rate": 4.782157339821574e-06, "loss": 0.7189, "step": 4769 }, { "epoch": 0.1392660068319173, "grad_norm": 0.7448928447094225, "learning_rate": 4.781995133819952e-06, "loss": 0.789, "step": 4770 }, { "epoch": 0.13929520305976467, "grad_norm": 0.7558493375391541, "learning_rate": 4.78183292781833e-06, "loss": 0.7151, "step": 4771 }, { "epoch": 0.13932439928761203, "grad_norm": 0.8039139497950909, "learning_rate": 4.781670721816708e-06, "loss": 0.7135, "step": 4772 }, { "epoch": 0.1393535955154594, "grad_norm": 0.8182127710329499, "learning_rate": 4.781508515815085e-06, "loss": 0.6908, "step": 4773 }, { "epoch": 0.13938279174330676, "grad_norm": 0.7709993993546665, "learning_rate": 4.781346309813463e-06, "loss": 0.6989, "step": 4774 }, { "epoch": 0.13941198797115412, "grad_norm": 0.8097779407285882, "learning_rate": 4.781184103811841e-06, "loss": 0.7475, "step": 4775 }, { "epoch": 0.13944118419900148, "grad_norm": 0.7065101126460759, "learning_rate": 4.78102189781022e-06, "loss": 0.6914, "step": 4776 }, { "epoch": 0.13947038042684884, "grad_norm": 0.809206324754449, "learning_rate": 4.780859691808598e-06, "loss": 0.7724, "step": 4777 }, { "epoch": 0.1394995766546962, "grad_norm": 0.7449438302059469, "learning_rate": 4.780697485806976e-06, "loss": 0.6839, "step": 4778 }, { "epoch": 0.1395287728825436, "grad_norm": 0.7338559466050526, "learning_rate": 4.780535279805353e-06, "loss": 0.6355, "step": 4779 }, { "epoch": 0.13955796911039095, "grad_norm": 0.8205476252626805, "learning_rate": 4.780373073803731e-06, "loss": 0.7614, "step": 4780 }, { "epoch": 0.1395871653382383, "grad_norm": 0.8095977316932826, "learning_rate": 4.780210867802109e-06, "loss": 0.7027, "step": 4781 }, { "epoch": 0.13961636156608567, "grad_norm": 0.8760344949963417, "learning_rate": 4.780048661800487e-06, "loss": 0.8194, "step": 4782 }, { "epoch": 0.13964555779393303, "grad_norm": 0.8110575352988081, "learning_rate": 4.779886455798865e-06, "loss": 0.715, "step": 4783 }, { "epoch": 0.1396747540217804, "grad_norm": 0.6980336682548329, "learning_rate": 4.779724249797243e-06, "loss": 0.6295, "step": 4784 }, { "epoch": 0.13970395024962776, "grad_norm": 0.7478371551556386, "learning_rate": 4.779562043795621e-06, "loss": 0.6681, "step": 4785 }, { "epoch": 0.13973314647747512, "grad_norm": 0.649584951535835, "learning_rate": 4.779399837793999e-06, "loss": 0.5493, "step": 4786 }, { "epoch": 0.13976234270532248, "grad_norm": 0.7975648709331962, "learning_rate": 4.779237631792377e-06, "loss": 0.6845, "step": 4787 }, { "epoch": 0.13979153893316984, "grad_norm": 0.7936353972243891, "learning_rate": 4.779075425790754e-06, "loss": 0.7665, "step": 4788 }, { "epoch": 0.1398207351610172, "grad_norm": 0.7994241628933159, "learning_rate": 4.778913219789132e-06, "loss": 0.7503, "step": 4789 }, { "epoch": 0.13984993138886456, "grad_norm": 0.8806328768080283, "learning_rate": 4.77875101378751e-06, "loss": 0.6634, "step": 4790 }, { "epoch": 0.13987912761671192, "grad_norm": 0.8096224956100406, "learning_rate": 4.778588807785888e-06, "loss": 0.7547, "step": 4791 }, { "epoch": 0.13990832384455928, "grad_norm": 0.7759030742972631, "learning_rate": 4.778426601784266e-06, "loss": 0.6944, "step": 4792 }, { "epoch": 0.13993752007240665, "grad_norm": 0.7556307865606247, "learning_rate": 4.778264395782644e-06, "loss": 0.6787, "step": 4793 }, { "epoch": 0.139966716300254, "grad_norm": 0.7429783068830794, "learning_rate": 4.778102189781022e-06, "loss": 0.675, "step": 4794 }, { "epoch": 0.13999591252810137, "grad_norm": 0.7606229286536084, "learning_rate": 4.7779399837794e-06, "loss": 0.7241, "step": 4795 }, { "epoch": 0.14002510875594873, "grad_norm": 0.7771335787915843, "learning_rate": 4.777777777777778e-06, "loss": 0.7271, "step": 4796 }, { "epoch": 0.1400543049837961, "grad_norm": 0.9579943988180885, "learning_rate": 4.777615571776156e-06, "loss": 0.6582, "step": 4797 }, { "epoch": 0.14008350121164345, "grad_norm": 0.8872654741623718, "learning_rate": 4.777453365774534e-06, "loss": 0.7699, "step": 4798 }, { "epoch": 0.1401126974394908, "grad_norm": 0.9060448493093577, "learning_rate": 4.777291159772912e-06, "loss": 0.7362, "step": 4799 }, { "epoch": 0.14014189366733817, "grad_norm": 0.7335261176363428, "learning_rate": 4.77712895377129e-06, "loss": 0.6838, "step": 4800 }, { "epoch": 0.14017108989518554, "grad_norm": 0.8136744824290295, "learning_rate": 4.776966747769668e-06, "loss": 0.7874, "step": 4801 }, { "epoch": 0.1402002861230329, "grad_norm": 0.7784672050169372, "learning_rate": 4.776804541768046e-06, "loss": 0.664, "step": 4802 }, { "epoch": 0.14022948235088026, "grad_norm": 0.7668015252995036, "learning_rate": 4.7766423357664234e-06, "loss": 0.6842, "step": 4803 }, { "epoch": 0.14025867857872762, "grad_norm": 0.7106114619823615, "learning_rate": 4.7764801297648015e-06, "loss": 0.6231, "step": 4804 }, { "epoch": 0.14028787480657498, "grad_norm": 0.6933472727696851, "learning_rate": 4.7763179237631795e-06, "loss": 0.6294, "step": 4805 }, { "epoch": 0.14031707103442234, "grad_norm": 0.7324301530337399, "learning_rate": 4.7761557177615575e-06, "loss": 0.6894, "step": 4806 }, { "epoch": 0.1403462672622697, "grad_norm": 0.7374096489558116, "learning_rate": 4.7759935117599355e-06, "loss": 0.6838, "step": 4807 }, { "epoch": 0.14037546349011706, "grad_norm": 0.7264467864650033, "learning_rate": 4.7758313057583135e-06, "loss": 0.6488, "step": 4808 }, { "epoch": 0.14040465971796443, "grad_norm": 0.790918306841599, "learning_rate": 4.7756690997566915e-06, "loss": 0.7052, "step": 4809 }, { "epoch": 0.14043385594581181, "grad_norm": 0.8493723638373972, "learning_rate": 4.7755068937550695e-06, "loss": 0.8388, "step": 4810 }, { "epoch": 0.14046305217365918, "grad_norm": 0.7812978770835086, "learning_rate": 4.775344687753447e-06, "loss": 0.7229, "step": 4811 }, { "epoch": 0.14049224840150654, "grad_norm": 0.7555848233958131, "learning_rate": 4.775182481751825e-06, "loss": 0.6601, "step": 4812 }, { "epoch": 0.1405214446293539, "grad_norm": 0.7668747229752875, "learning_rate": 4.775020275750203e-06, "loss": 0.7265, "step": 4813 }, { "epoch": 0.14055064085720126, "grad_norm": 0.7133089375788079, "learning_rate": 4.7748580697485815e-06, "loss": 0.6047, "step": 4814 }, { "epoch": 0.14057983708504862, "grad_norm": 0.7722494980910918, "learning_rate": 4.7746958637469595e-06, "loss": 0.7234, "step": 4815 }, { "epoch": 0.14060903331289598, "grad_norm": 0.7794266583258455, "learning_rate": 4.7745336577453375e-06, "loss": 0.7056, "step": 4816 }, { "epoch": 0.14063822954074334, "grad_norm": 0.8057221725693711, "learning_rate": 4.774371451743715e-06, "loss": 0.7601, "step": 4817 }, { "epoch": 0.1406674257685907, "grad_norm": 0.7389325857149329, "learning_rate": 4.774209245742093e-06, "loss": 0.7017, "step": 4818 }, { "epoch": 0.14069662199643806, "grad_norm": 0.9810031798186019, "learning_rate": 4.774047039740471e-06, "loss": 0.7364, "step": 4819 }, { "epoch": 0.14072581822428543, "grad_norm": 0.8506093200014669, "learning_rate": 4.773884833738849e-06, "loss": 0.8334, "step": 4820 }, { "epoch": 0.1407550144521328, "grad_norm": 0.8506164323605545, "learning_rate": 4.773722627737227e-06, "loss": 0.8369, "step": 4821 }, { "epoch": 0.14078421067998015, "grad_norm": 0.7632468341271778, "learning_rate": 4.773560421735605e-06, "loss": 0.6422, "step": 4822 }, { "epoch": 0.1408134069078275, "grad_norm": 0.7352033722896587, "learning_rate": 4.773398215733983e-06, "loss": 0.6516, "step": 4823 }, { "epoch": 0.14084260313567487, "grad_norm": 0.7642834774508319, "learning_rate": 4.773236009732361e-06, "loss": 0.6916, "step": 4824 }, { "epoch": 0.14087179936352223, "grad_norm": 1.1079716239169601, "learning_rate": 4.773073803730739e-06, "loss": 0.6829, "step": 4825 }, { "epoch": 0.1409009955913696, "grad_norm": 0.7324231662958789, "learning_rate": 4.772911597729116e-06, "loss": 0.6472, "step": 4826 }, { "epoch": 0.14093019181921695, "grad_norm": 0.7580531972218016, "learning_rate": 4.772749391727494e-06, "loss": 0.6204, "step": 4827 }, { "epoch": 0.14095938804706432, "grad_norm": 0.9590888322556941, "learning_rate": 4.772587185725872e-06, "loss": 0.6825, "step": 4828 }, { "epoch": 0.14098858427491168, "grad_norm": 1.1394184004024492, "learning_rate": 4.77242497972425e-06, "loss": 0.7192, "step": 4829 }, { "epoch": 0.14101778050275904, "grad_norm": 0.8018554460647672, "learning_rate": 4.772262773722628e-06, "loss": 0.7015, "step": 4830 }, { "epoch": 0.1410469767306064, "grad_norm": 0.7834255805973539, "learning_rate": 4.772100567721006e-06, "loss": 0.6856, "step": 4831 }, { "epoch": 0.14107617295845376, "grad_norm": 0.7872883984708086, "learning_rate": 4.771938361719384e-06, "loss": 0.7126, "step": 4832 }, { "epoch": 0.14110536918630112, "grad_norm": 0.8105467371068236, "learning_rate": 4.771776155717762e-06, "loss": 0.7351, "step": 4833 }, { "epoch": 0.14113456541414848, "grad_norm": 0.6487595345901852, "learning_rate": 4.77161394971614e-06, "loss": 0.5665, "step": 4834 }, { "epoch": 0.14116376164199584, "grad_norm": 0.8498048343574235, "learning_rate": 4.771451743714518e-06, "loss": 0.7392, "step": 4835 }, { "epoch": 0.1411929578698432, "grad_norm": 0.8015033295823556, "learning_rate": 4.771289537712896e-06, "loss": 0.7816, "step": 4836 }, { "epoch": 0.14122215409769057, "grad_norm": 0.8275393648649064, "learning_rate": 4.771127331711274e-06, "loss": 0.747, "step": 4837 }, { "epoch": 0.14125135032553793, "grad_norm": 0.7769572146035478, "learning_rate": 4.770965125709652e-06, "loss": 0.7698, "step": 4838 }, { "epoch": 0.1412805465533853, "grad_norm": 0.721398678000737, "learning_rate": 4.77080291970803e-06, "loss": 0.5591, "step": 4839 }, { "epoch": 0.14130974278123268, "grad_norm": 0.7731983396386609, "learning_rate": 4.770640713706407e-06, "loss": 0.7075, "step": 4840 }, { "epoch": 0.14133893900908004, "grad_norm": 0.7390262368818831, "learning_rate": 4.770478507704785e-06, "loss": 0.695, "step": 4841 }, { "epoch": 0.1413681352369274, "grad_norm": 0.862314105753729, "learning_rate": 4.770316301703163e-06, "loss": 0.7495, "step": 4842 }, { "epoch": 0.14139733146477476, "grad_norm": 0.8023620674389595, "learning_rate": 4.770154095701541e-06, "loss": 0.6612, "step": 4843 }, { "epoch": 0.14142652769262212, "grad_norm": 0.7280855380448962, "learning_rate": 4.769991889699919e-06, "loss": 0.6421, "step": 4844 }, { "epoch": 0.14145572392046948, "grad_norm": 0.7855128951431771, "learning_rate": 4.769829683698297e-06, "loss": 0.7552, "step": 4845 }, { "epoch": 0.14148492014831685, "grad_norm": 0.7340480153348358, "learning_rate": 4.769667477696675e-06, "loss": 0.7077, "step": 4846 }, { "epoch": 0.1415141163761642, "grad_norm": 0.8429555582279402, "learning_rate": 4.769505271695053e-06, "loss": 0.8378, "step": 4847 }, { "epoch": 0.14154331260401157, "grad_norm": 0.8715124329106999, "learning_rate": 4.769343065693431e-06, "loss": 0.7122, "step": 4848 }, { "epoch": 0.14157250883185893, "grad_norm": 0.8503031225867314, "learning_rate": 4.769180859691808e-06, "loss": 0.7738, "step": 4849 }, { "epoch": 0.1416017050597063, "grad_norm": 0.9334255344290388, "learning_rate": 4.769018653690186e-06, "loss": 0.7954, "step": 4850 }, { "epoch": 0.14163090128755365, "grad_norm": 0.7768732357957574, "learning_rate": 4.768856447688564e-06, "loss": 0.7332, "step": 4851 }, { "epoch": 0.141660097515401, "grad_norm": 0.9217926352636026, "learning_rate": 4.768694241686943e-06, "loss": 0.7774, "step": 4852 }, { "epoch": 0.14168929374324837, "grad_norm": 0.7165153685193723, "learning_rate": 4.768532035685321e-06, "loss": 0.6868, "step": 4853 }, { "epoch": 0.14171848997109573, "grad_norm": 0.7607047052929548, "learning_rate": 4.768369829683699e-06, "loss": 0.6836, "step": 4854 }, { "epoch": 0.1417476861989431, "grad_norm": 0.7342486568192377, "learning_rate": 4.768207623682076e-06, "loss": 0.6596, "step": 4855 }, { "epoch": 0.14177688242679046, "grad_norm": 0.748851198539207, "learning_rate": 4.768045417680454e-06, "loss": 0.6996, "step": 4856 }, { "epoch": 0.14180607865463782, "grad_norm": 0.7972552980902706, "learning_rate": 4.767883211678832e-06, "loss": 0.685, "step": 4857 }, { "epoch": 0.14183527488248518, "grad_norm": 0.8596958871785622, "learning_rate": 4.7677210056772104e-06, "loss": 0.6814, "step": 4858 }, { "epoch": 0.14186447111033254, "grad_norm": 0.7878873830648606, "learning_rate": 4.7675587996755884e-06, "loss": 0.6982, "step": 4859 }, { "epoch": 0.1418936673381799, "grad_norm": 0.7702626735529903, "learning_rate": 4.7673965936739664e-06, "loss": 0.6973, "step": 4860 }, { "epoch": 0.14192286356602726, "grad_norm": 0.7100689330999919, "learning_rate": 4.7672343876723445e-06, "loss": 0.634, "step": 4861 }, { "epoch": 0.14195205979387462, "grad_norm": 0.8740017156222977, "learning_rate": 4.7670721816707225e-06, "loss": 0.813, "step": 4862 }, { "epoch": 0.14198125602172199, "grad_norm": 0.80905426366898, "learning_rate": 4.7669099756691005e-06, "loss": 0.71, "step": 4863 }, { "epoch": 0.14201045224956935, "grad_norm": 0.7297969203342337, "learning_rate": 4.766747769667478e-06, "loss": 0.6532, "step": 4864 }, { "epoch": 0.1420396484774167, "grad_norm": 0.7667597311883952, "learning_rate": 4.766585563665856e-06, "loss": 0.6974, "step": 4865 }, { "epoch": 0.14206884470526407, "grad_norm": 0.7529209102703074, "learning_rate": 4.766423357664234e-06, "loss": 0.6844, "step": 4866 }, { "epoch": 0.14209804093311143, "grad_norm": 0.7852637674945495, "learning_rate": 4.766261151662612e-06, "loss": 0.7022, "step": 4867 }, { "epoch": 0.1421272371609588, "grad_norm": 0.8033661882001635, "learning_rate": 4.76609894566099e-06, "loss": 0.7287, "step": 4868 }, { "epoch": 0.14215643338880615, "grad_norm": 0.7692141908106863, "learning_rate": 4.765936739659368e-06, "loss": 0.7381, "step": 4869 }, { "epoch": 0.14218562961665354, "grad_norm": 0.8618538333054807, "learning_rate": 4.765774533657746e-06, "loss": 0.7933, "step": 4870 }, { "epoch": 0.1422148258445009, "grad_norm": 0.7557039491588253, "learning_rate": 4.765612327656124e-06, "loss": 0.6854, "step": 4871 }, { "epoch": 0.14224402207234826, "grad_norm": 0.7419878022938089, "learning_rate": 4.765450121654502e-06, "loss": 0.6485, "step": 4872 }, { "epoch": 0.14227321830019563, "grad_norm": 0.7498218157717887, "learning_rate": 4.76528791565288e-06, "loss": 0.671, "step": 4873 }, { "epoch": 0.142302414528043, "grad_norm": 1.133112223805352, "learning_rate": 4.765125709651258e-06, "loss": 0.6839, "step": 4874 }, { "epoch": 0.14233161075589035, "grad_norm": 0.7247790841403525, "learning_rate": 4.764963503649636e-06, "loss": 0.6456, "step": 4875 }, { "epoch": 0.1423608069837377, "grad_norm": 0.8340417020632003, "learning_rate": 4.764801297648014e-06, "loss": 0.7778, "step": 4876 }, { "epoch": 0.14239000321158507, "grad_norm": 0.7877964240204925, "learning_rate": 4.764639091646392e-06, "loss": 0.7418, "step": 4877 }, { "epoch": 0.14241919943943243, "grad_norm": 0.800905362350201, "learning_rate": 4.764476885644769e-06, "loss": 0.7014, "step": 4878 }, { "epoch": 0.1424483956672798, "grad_norm": 0.7426004098878068, "learning_rate": 4.764314679643147e-06, "loss": 0.7153, "step": 4879 }, { "epoch": 0.14247759189512715, "grad_norm": 0.7621149933376663, "learning_rate": 4.764152473641525e-06, "loss": 0.711, "step": 4880 }, { "epoch": 0.14250678812297451, "grad_norm": 0.7820202347220906, "learning_rate": 4.763990267639903e-06, "loss": 0.6739, "step": 4881 }, { "epoch": 0.14253598435082188, "grad_norm": 0.7607200962925268, "learning_rate": 4.763828061638281e-06, "loss": 0.6609, "step": 4882 }, { "epoch": 0.14256518057866924, "grad_norm": 0.8238281370944311, "learning_rate": 4.763665855636659e-06, "loss": 0.7951, "step": 4883 }, { "epoch": 0.1425943768065166, "grad_norm": 0.762718090558235, "learning_rate": 4.763503649635037e-06, "loss": 0.7361, "step": 4884 }, { "epoch": 0.14262357303436396, "grad_norm": 0.7589298929584554, "learning_rate": 4.763341443633415e-06, "loss": 0.6933, "step": 4885 }, { "epoch": 0.14265276926221132, "grad_norm": 0.7383903713146045, "learning_rate": 4.763179237631793e-06, "loss": 0.7719, "step": 4886 }, { "epoch": 0.14268196549005868, "grad_norm": 0.8332306261125243, "learning_rate": 4.76301703163017e-06, "loss": 0.709, "step": 4887 }, { "epoch": 0.14271116171790604, "grad_norm": 0.9769260573245852, "learning_rate": 4.762854825628548e-06, "loss": 0.6878, "step": 4888 }, { "epoch": 0.1427403579457534, "grad_norm": 0.9951241175426021, "learning_rate": 4.762692619626927e-06, "loss": 0.7401, "step": 4889 }, { "epoch": 0.14276955417360077, "grad_norm": 0.7496516059281266, "learning_rate": 4.762530413625305e-06, "loss": 0.6557, "step": 4890 }, { "epoch": 0.14279875040144813, "grad_norm": 0.8157234526746273, "learning_rate": 4.762368207623683e-06, "loss": 0.7793, "step": 4891 }, { "epoch": 0.1428279466292955, "grad_norm": 0.9417600366120046, "learning_rate": 4.762206001622061e-06, "loss": 0.7404, "step": 4892 }, { "epoch": 0.14285714285714285, "grad_norm": 0.7992822824556202, "learning_rate": 4.762043795620438e-06, "loss": 0.7856, "step": 4893 }, { "epoch": 0.1428863390849902, "grad_norm": 0.8591609396540361, "learning_rate": 4.761881589618816e-06, "loss": 0.6943, "step": 4894 }, { "epoch": 0.14291553531283757, "grad_norm": 0.7054404265140328, "learning_rate": 4.761719383617194e-06, "loss": 0.6336, "step": 4895 }, { "epoch": 0.14294473154068493, "grad_norm": 0.8198346414429737, "learning_rate": 4.761557177615572e-06, "loss": 0.6265, "step": 4896 }, { "epoch": 0.1429739277685323, "grad_norm": 0.6848177803940088, "learning_rate": 4.76139497161395e-06, "loss": 0.5755, "step": 4897 }, { "epoch": 0.14300312399637966, "grad_norm": 0.8112828845252854, "learning_rate": 4.761232765612328e-06, "loss": 0.7426, "step": 4898 }, { "epoch": 0.14303232022422702, "grad_norm": 0.7440587625039381, "learning_rate": 4.761070559610706e-06, "loss": 0.7251, "step": 4899 }, { "epoch": 0.1430615164520744, "grad_norm": 0.7739068483526119, "learning_rate": 4.760908353609084e-06, "loss": 0.7099, "step": 4900 }, { "epoch": 0.14309071267992177, "grad_norm": 0.8216949665181477, "learning_rate": 4.760746147607462e-06, "loss": 0.7126, "step": 4901 }, { "epoch": 0.14311990890776913, "grad_norm": 0.6608459858126131, "learning_rate": 4.760583941605839e-06, "loss": 0.5691, "step": 4902 }, { "epoch": 0.1431491051356165, "grad_norm": 0.8431484076660303, "learning_rate": 4.760421735604217e-06, "loss": 0.7782, "step": 4903 }, { "epoch": 0.14317830136346385, "grad_norm": 0.8534693760508449, "learning_rate": 4.760259529602595e-06, "loss": 0.7304, "step": 4904 }, { "epoch": 0.1432074975913112, "grad_norm": 0.7063892739969851, "learning_rate": 4.760097323600973e-06, "loss": 0.6613, "step": 4905 }, { "epoch": 0.14323669381915857, "grad_norm": 0.704313220350594, "learning_rate": 4.759935117599351e-06, "loss": 0.608, "step": 4906 }, { "epoch": 0.14326589004700593, "grad_norm": 0.7179527118968699, "learning_rate": 4.759772911597729e-06, "loss": 0.5963, "step": 4907 }, { "epoch": 0.1432950862748533, "grad_norm": 0.8386615629980931, "learning_rate": 4.759610705596107e-06, "loss": 0.6852, "step": 4908 }, { "epoch": 0.14332428250270066, "grad_norm": 0.7525278161033679, "learning_rate": 4.759448499594485e-06, "loss": 0.675, "step": 4909 }, { "epoch": 0.14335347873054802, "grad_norm": 0.7368497879267663, "learning_rate": 4.759286293592863e-06, "loss": 0.6791, "step": 4910 }, { "epoch": 0.14338267495839538, "grad_norm": 0.7791251773143356, "learning_rate": 4.759124087591241e-06, "loss": 0.7067, "step": 4911 }, { "epoch": 0.14341187118624274, "grad_norm": 0.8281843579209646, "learning_rate": 4.758961881589619e-06, "loss": 0.6846, "step": 4912 }, { "epoch": 0.1434410674140901, "grad_norm": 0.7225632260552999, "learning_rate": 4.758799675587997e-06, "loss": 0.6683, "step": 4913 }, { "epoch": 0.14347026364193746, "grad_norm": 1.1525251455695713, "learning_rate": 4.758637469586375e-06, "loss": 0.8254, "step": 4914 }, { "epoch": 0.14349945986978482, "grad_norm": 0.7543415475980846, "learning_rate": 4.7584752635847534e-06, "loss": 0.6307, "step": 4915 }, { "epoch": 0.14352865609763218, "grad_norm": 0.7694796525478245, "learning_rate": 4.758313057583131e-06, "loss": 0.7738, "step": 4916 }, { "epoch": 0.14355785232547955, "grad_norm": 0.7218598685824438, "learning_rate": 4.758150851581509e-06, "loss": 0.6868, "step": 4917 }, { "epoch": 0.1435870485533269, "grad_norm": 0.7661294777991695, "learning_rate": 4.757988645579887e-06, "loss": 0.7518, "step": 4918 }, { "epoch": 0.14361624478117427, "grad_norm": 0.8065407591098234, "learning_rate": 4.757826439578265e-06, "loss": 0.7951, "step": 4919 }, { "epoch": 0.14364544100902163, "grad_norm": 0.7112949233207359, "learning_rate": 4.757664233576643e-06, "loss": 0.5688, "step": 4920 }, { "epoch": 0.143674637236869, "grad_norm": 0.7638448698538771, "learning_rate": 4.757502027575021e-06, "loss": 0.683, "step": 4921 }, { "epoch": 0.14370383346471635, "grad_norm": 0.8133979492399358, "learning_rate": 4.757339821573399e-06, "loss": 0.725, "step": 4922 }, { "epoch": 0.1437330296925637, "grad_norm": 0.7550074722612715, "learning_rate": 4.757177615571777e-06, "loss": 0.7206, "step": 4923 }, { "epoch": 0.14376222592041107, "grad_norm": 0.7565073698961586, "learning_rate": 4.757015409570155e-06, "loss": 0.6673, "step": 4924 }, { "epoch": 0.14379142214825844, "grad_norm": 0.7362484360172055, "learning_rate": 4.756853203568532e-06, "loss": 0.7156, "step": 4925 }, { "epoch": 0.1438206183761058, "grad_norm": 0.8102091235783577, "learning_rate": 4.75669099756691e-06, "loss": 0.7134, "step": 4926 }, { "epoch": 0.14384981460395316, "grad_norm": 0.7506444011663819, "learning_rate": 4.756528791565289e-06, "loss": 0.7028, "step": 4927 }, { "epoch": 0.14387901083180052, "grad_norm": 0.7708450734528538, "learning_rate": 4.756366585563667e-06, "loss": 0.7396, "step": 4928 }, { "epoch": 0.14390820705964788, "grad_norm": 0.9057518284562174, "learning_rate": 4.756204379562045e-06, "loss": 0.6656, "step": 4929 }, { "epoch": 0.14393740328749527, "grad_norm": 0.7051260395617589, "learning_rate": 4.756042173560423e-06, "loss": 0.6289, "step": 4930 }, { "epoch": 0.14396659951534263, "grad_norm": 0.7661917756476293, "learning_rate": 4.7558799675588e-06, "loss": 0.6657, "step": 4931 }, { "epoch": 0.14399579574319, "grad_norm": 0.738431747251216, "learning_rate": 4.755717761557178e-06, "loss": 0.6861, "step": 4932 }, { "epoch": 0.14402499197103735, "grad_norm": 0.7452457945849191, "learning_rate": 4.755555555555556e-06, "loss": 0.6578, "step": 4933 }, { "epoch": 0.14405418819888471, "grad_norm": 0.6960987834822888, "learning_rate": 4.755393349553934e-06, "loss": 0.6058, "step": 4934 }, { "epoch": 0.14408338442673208, "grad_norm": 0.7553010377073803, "learning_rate": 4.755231143552312e-06, "loss": 0.7052, "step": 4935 }, { "epoch": 0.14411258065457944, "grad_norm": 0.8004890328468532, "learning_rate": 4.75506893755069e-06, "loss": 0.7272, "step": 4936 }, { "epoch": 0.1441417768824268, "grad_norm": 1.015889947507502, "learning_rate": 4.754906731549068e-06, "loss": 0.8349, "step": 4937 }, { "epoch": 0.14417097311027416, "grad_norm": 0.7708328420698595, "learning_rate": 4.754744525547446e-06, "loss": 0.7499, "step": 4938 }, { "epoch": 0.14420016933812152, "grad_norm": 0.8536423322735669, "learning_rate": 4.754582319545824e-06, "loss": 0.7472, "step": 4939 }, { "epoch": 0.14422936556596888, "grad_norm": 0.74136428478494, "learning_rate": 4.754420113544201e-06, "loss": 0.6451, "step": 4940 }, { "epoch": 0.14425856179381624, "grad_norm": 0.909999795770593, "learning_rate": 4.754257907542579e-06, "loss": 0.716, "step": 4941 }, { "epoch": 0.1442877580216636, "grad_norm": 0.7613128177064903, "learning_rate": 4.754095701540957e-06, "loss": 0.6994, "step": 4942 }, { "epoch": 0.14431695424951096, "grad_norm": 0.7331931291912531, "learning_rate": 4.753933495539335e-06, "loss": 0.6699, "step": 4943 }, { "epoch": 0.14434615047735833, "grad_norm": 0.7985312679892205, "learning_rate": 4.753771289537713e-06, "loss": 0.7191, "step": 4944 }, { "epoch": 0.1443753467052057, "grad_norm": 0.744989119686606, "learning_rate": 4.753609083536091e-06, "loss": 0.6898, "step": 4945 }, { "epoch": 0.14440454293305305, "grad_norm": 0.7549009488578787, "learning_rate": 4.753446877534469e-06, "loss": 0.6652, "step": 4946 }, { "epoch": 0.1444337391609004, "grad_norm": 0.7848301387504618, "learning_rate": 4.753284671532847e-06, "loss": 0.728, "step": 4947 }, { "epoch": 0.14446293538874777, "grad_norm": 0.8839960908314661, "learning_rate": 4.753122465531225e-06, "loss": 0.7824, "step": 4948 }, { "epoch": 0.14449213161659513, "grad_norm": 0.8163783345604771, "learning_rate": 4.752960259529603e-06, "loss": 0.7823, "step": 4949 }, { "epoch": 0.1445213278444425, "grad_norm": 0.8106155146769524, "learning_rate": 4.752798053527981e-06, "loss": 0.7963, "step": 4950 }, { "epoch": 0.14455052407228985, "grad_norm": 1.58375217873452, "learning_rate": 4.752635847526359e-06, "loss": 0.9263, "step": 4951 }, { "epoch": 0.14457972030013722, "grad_norm": 0.7339536784453379, "learning_rate": 4.752473641524737e-06, "loss": 0.693, "step": 4952 }, { "epoch": 0.14460891652798458, "grad_norm": 0.7191063980697435, "learning_rate": 4.752311435523115e-06, "loss": 0.6274, "step": 4953 }, { "epoch": 0.14463811275583194, "grad_norm": 0.7633309600719528, "learning_rate": 4.752149229521492e-06, "loss": 0.746, "step": 4954 }, { "epoch": 0.1446673089836793, "grad_norm": 0.7483559425665599, "learning_rate": 4.75198702351987e-06, "loss": 0.6871, "step": 4955 }, { "epoch": 0.14469650521152666, "grad_norm": 0.800344154301681, "learning_rate": 4.751824817518248e-06, "loss": 0.7324, "step": 4956 }, { "epoch": 0.14472570143937402, "grad_norm": 0.6989197306644613, "learning_rate": 4.751662611516626e-06, "loss": 0.6133, "step": 4957 }, { "epoch": 0.14475489766722138, "grad_norm": 1.0391439603467227, "learning_rate": 4.751500405515004e-06, "loss": 0.7205, "step": 4958 }, { "epoch": 0.14478409389506874, "grad_norm": 0.7576197731169235, "learning_rate": 4.751338199513382e-06, "loss": 0.6672, "step": 4959 }, { "epoch": 0.14481329012291613, "grad_norm": 0.8574796776246825, "learning_rate": 4.75117599351176e-06, "loss": 0.76, "step": 4960 }, { "epoch": 0.1448424863507635, "grad_norm": 0.7866519784606979, "learning_rate": 4.751013787510138e-06, "loss": 0.7246, "step": 4961 }, { "epoch": 0.14487168257861086, "grad_norm": 0.7930055493210852, "learning_rate": 4.750851581508516e-06, "loss": 0.7501, "step": 4962 }, { "epoch": 0.14490087880645822, "grad_norm": 0.8154646841607864, "learning_rate": 4.7506893755068935e-06, "loss": 0.6505, "step": 4963 }, { "epoch": 0.14493007503430558, "grad_norm": 0.8609562307156327, "learning_rate": 4.7505271695052715e-06, "loss": 0.7414, "step": 4964 }, { "epoch": 0.14495927126215294, "grad_norm": 0.7229400089295629, "learning_rate": 4.75036496350365e-06, "loss": 0.6804, "step": 4965 }, { "epoch": 0.1449884674900003, "grad_norm": 1.9215244438723758, "learning_rate": 4.750202757502028e-06, "loss": 0.7651, "step": 4966 }, { "epoch": 0.14501766371784766, "grad_norm": 0.8215811530112608, "learning_rate": 4.750040551500406e-06, "loss": 0.7224, "step": 4967 }, { "epoch": 0.14504685994569502, "grad_norm": 0.8089503066739037, "learning_rate": 4.749878345498784e-06, "loss": 0.7569, "step": 4968 }, { "epoch": 0.14507605617354238, "grad_norm": 0.7546251846466204, "learning_rate": 4.7497161394971616e-06, "loss": 0.6911, "step": 4969 }, { "epoch": 0.14510525240138974, "grad_norm": 0.769684716301407, "learning_rate": 4.7495539334955396e-06, "loss": 0.7314, "step": 4970 }, { "epoch": 0.1451344486292371, "grad_norm": 0.7396568560570035, "learning_rate": 4.7493917274939176e-06, "loss": 0.6366, "step": 4971 }, { "epoch": 0.14516364485708447, "grad_norm": 0.7043279112990025, "learning_rate": 4.749229521492296e-06, "loss": 0.6532, "step": 4972 }, { "epoch": 0.14519284108493183, "grad_norm": 0.7586403580074157, "learning_rate": 4.749067315490674e-06, "loss": 0.7082, "step": 4973 }, { "epoch": 0.1452220373127792, "grad_norm": 0.7476526864165336, "learning_rate": 4.748905109489052e-06, "loss": 0.6363, "step": 4974 }, { "epoch": 0.14525123354062655, "grad_norm": 0.6420394975209448, "learning_rate": 4.74874290348743e-06, "loss": 0.561, "step": 4975 }, { "epoch": 0.1452804297684739, "grad_norm": 0.7954843318818301, "learning_rate": 4.748580697485808e-06, "loss": 0.703, "step": 4976 }, { "epoch": 0.14530962599632127, "grad_norm": 0.7588498836919487, "learning_rate": 4.748418491484186e-06, "loss": 0.7364, "step": 4977 }, { "epoch": 0.14533882222416863, "grad_norm": 0.7816179018216042, "learning_rate": 4.748256285482563e-06, "loss": 0.7673, "step": 4978 }, { "epoch": 0.145368018452016, "grad_norm": 0.8648754057075648, "learning_rate": 4.748094079480941e-06, "loss": 0.8567, "step": 4979 }, { "epoch": 0.14539721467986336, "grad_norm": 0.7297496074180077, "learning_rate": 4.747931873479319e-06, "loss": 0.5987, "step": 4980 }, { "epoch": 0.14542641090771072, "grad_norm": 0.7899247213430397, "learning_rate": 4.747769667477697e-06, "loss": 0.736, "step": 4981 }, { "epoch": 0.14545560713555808, "grad_norm": 0.7806254922568434, "learning_rate": 4.747607461476075e-06, "loss": 0.7493, "step": 4982 }, { "epoch": 0.14548480336340544, "grad_norm": 0.7178679017732866, "learning_rate": 4.747445255474453e-06, "loss": 0.6702, "step": 4983 }, { "epoch": 0.1455139995912528, "grad_norm": 0.7285179352896594, "learning_rate": 4.747283049472831e-06, "loss": 0.684, "step": 4984 }, { "epoch": 0.14554319581910016, "grad_norm": 0.7682282744299744, "learning_rate": 4.747120843471209e-06, "loss": 0.7291, "step": 4985 }, { "epoch": 0.14557239204694752, "grad_norm": 0.7894052217395723, "learning_rate": 4.746958637469587e-06, "loss": 0.7485, "step": 4986 }, { "epoch": 0.14560158827479489, "grad_norm": 0.76048003510653, "learning_rate": 4.746796431467965e-06, "loss": 0.7156, "step": 4987 }, { "epoch": 0.14563078450264225, "grad_norm": 0.7235999902011852, "learning_rate": 4.746634225466343e-06, "loss": 0.6903, "step": 4988 }, { "epoch": 0.1456599807304896, "grad_norm": 0.7749258525148983, "learning_rate": 4.746472019464721e-06, "loss": 0.765, "step": 4989 }, { "epoch": 0.145689176958337, "grad_norm": 0.7982768426431885, "learning_rate": 4.746309813463099e-06, "loss": 0.7443, "step": 4990 }, { "epoch": 0.14571837318618436, "grad_norm": 0.7650911251557394, "learning_rate": 4.746147607461477e-06, "loss": 0.6978, "step": 4991 }, { "epoch": 0.14574756941403172, "grad_norm": 0.8367410117108449, "learning_rate": 4.745985401459854e-06, "loss": 0.7876, "step": 4992 }, { "epoch": 0.14577676564187908, "grad_norm": 1.0078026303952135, "learning_rate": 4.745823195458232e-06, "loss": 0.7667, "step": 4993 }, { "epoch": 0.14580596186972644, "grad_norm": 0.8418518408281812, "learning_rate": 4.74566098945661e-06, "loss": 0.722, "step": 4994 }, { "epoch": 0.1458351580975738, "grad_norm": 0.766009153659732, "learning_rate": 4.745498783454988e-06, "loss": 0.7711, "step": 4995 }, { "epoch": 0.14586435432542116, "grad_norm": 0.7583062344703306, "learning_rate": 4.745336577453366e-06, "loss": 0.5787, "step": 4996 }, { "epoch": 0.14589355055326853, "grad_norm": 0.77174210300481, "learning_rate": 4.745174371451744e-06, "loss": 0.6952, "step": 4997 }, { "epoch": 0.1459227467811159, "grad_norm": 0.7952271370451888, "learning_rate": 4.745012165450122e-06, "loss": 0.8238, "step": 4998 }, { "epoch": 0.14595194300896325, "grad_norm": 0.7546136393423375, "learning_rate": 4.7448499594485e-06, "loss": 0.7449, "step": 4999 }, { "epoch": 0.1459811392368106, "grad_norm": 0.710896368266443, "learning_rate": 4.744687753446878e-06, "loss": 0.6782, "step": 5000 }, { "epoch": 0.14601033546465797, "grad_norm": 0.7992909260845611, "learning_rate": 4.744525547445255e-06, "loss": 0.6426, "step": 5001 }, { "epoch": 0.14603953169250533, "grad_norm": 0.8048346171589911, "learning_rate": 4.744363341443633e-06, "loss": 0.7031, "step": 5002 }, { "epoch": 0.1460687279203527, "grad_norm": 0.7105548665911469, "learning_rate": 4.744201135442012e-06, "loss": 0.5763, "step": 5003 }, { "epoch": 0.14609792414820005, "grad_norm": 0.7392754416713151, "learning_rate": 4.74403892944039e-06, "loss": 0.701, "step": 5004 }, { "epoch": 0.14612712037604741, "grad_norm": 0.7997539315701363, "learning_rate": 4.743876723438768e-06, "loss": 0.7756, "step": 5005 }, { "epoch": 0.14615631660389478, "grad_norm": 0.7235950453553127, "learning_rate": 4.743714517437146e-06, "loss": 0.6589, "step": 5006 }, { "epoch": 0.14618551283174214, "grad_norm": 0.6981288908790606, "learning_rate": 4.743552311435523e-06, "loss": 0.6109, "step": 5007 }, { "epoch": 0.1462147090595895, "grad_norm": 0.7850899165609752, "learning_rate": 4.743390105433901e-06, "loss": 0.7707, "step": 5008 }, { "epoch": 0.14624390528743686, "grad_norm": 0.7818922000108072, "learning_rate": 4.743227899432279e-06, "loss": 0.7244, "step": 5009 }, { "epoch": 0.14627310151528422, "grad_norm": 0.7775178532502668, "learning_rate": 4.743065693430657e-06, "loss": 0.7534, "step": 5010 }, { "epoch": 0.14630229774313158, "grad_norm": 0.7802840098119687, "learning_rate": 4.742903487429035e-06, "loss": 0.6932, "step": 5011 }, { "epoch": 0.14633149397097894, "grad_norm": 0.7881352266368526, "learning_rate": 4.742741281427413e-06, "loss": 0.6972, "step": 5012 }, { "epoch": 0.1463606901988263, "grad_norm": 0.8431923564515673, "learning_rate": 4.742579075425791e-06, "loss": 0.7689, "step": 5013 }, { "epoch": 0.14638988642667367, "grad_norm": 0.7624260453438418, "learning_rate": 4.742416869424169e-06, "loss": 0.6989, "step": 5014 }, { "epoch": 0.14641908265452103, "grad_norm": 0.9669156272721132, "learning_rate": 4.742254663422547e-06, "loss": 0.8746, "step": 5015 }, { "epoch": 0.1464482788823684, "grad_norm": 0.8454003246761198, "learning_rate": 4.7420924574209245e-06, "loss": 0.7227, "step": 5016 }, { "epoch": 0.14647747511021575, "grad_norm": 0.7534952711903335, "learning_rate": 4.7419302514193025e-06, "loss": 0.73, "step": 5017 }, { "epoch": 0.1465066713380631, "grad_norm": 0.7368725047553658, "learning_rate": 4.7417680454176805e-06, "loss": 0.6793, "step": 5018 }, { "epoch": 0.14653586756591047, "grad_norm": 0.7677602149264039, "learning_rate": 4.7416058394160585e-06, "loss": 0.8298, "step": 5019 }, { "epoch": 0.14656506379375783, "grad_norm": 1.026969317553622, "learning_rate": 4.7414436334144365e-06, "loss": 0.7867, "step": 5020 }, { "epoch": 0.14659426002160522, "grad_norm": 0.7582756489353412, "learning_rate": 4.7412814274128145e-06, "loss": 0.6877, "step": 5021 }, { "epoch": 0.14662345624945258, "grad_norm": 1.0987291787746, "learning_rate": 4.7411192214111925e-06, "loss": 0.6787, "step": 5022 }, { "epoch": 0.14665265247729994, "grad_norm": 0.7297490071446907, "learning_rate": 4.7409570154095705e-06, "loss": 0.6764, "step": 5023 }, { "epoch": 0.1466818487051473, "grad_norm": 0.7738367912468423, "learning_rate": 4.7407948094079485e-06, "loss": 0.7119, "step": 5024 }, { "epoch": 0.14671104493299467, "grad_norm": 0.7350657213963508, "learning_rate": 4.7406326034063266e-06, "loss": 0.7065, "step": 5025 }, { "epoch": 0.14674024116084203, "grad_norm": 0.7775712503099238, "learning_rate": 4.7404703974047046e-06, "loss": 0.743, "step": 5026 }, { "epoch": 0.1467694373886894, "grad_norm": 0.7988366827817882, "learning_rate": 4.7403081914030826e-06, "loss": 0.7716, "step": 5027 }, { "epoch": 0.14679863361653675, "grad_norm": 0.7938066568819747, "learning_rate": 4.7401459854014606e-06, "loss": 0.6927, "step": 5028 }, { "epoch": 0.1468278298443841, "grad_norm": 0.9052413997640226, "learning_rate": 4.739983779399839e-06, "loss": 0.7934, "step": 5029 }, { "epoch": 0.14685702607223147, "grad_norm": 0.7576672564693573, "learning_rate": 4.739821573398216e-06, "loss": 0.6618, "step": 5030 }, { "epoch": 0.14688622230007883, "grad_norm": 0.8024885278049664, "learning_rate": 4.739659367396594e-06, "loss": 0.6771, "step": 5031 }, { "epoch": 0.1469154185279262, "grad_norm": 0.7393074797938499, "learning_rate": 4.739497161394972e-06, "loss": 0.7123, "step": 5032 }, { "epoch": 0.14694461475577356, "grad_norm": 0.8032969750542217, "learning_rate": 4.73933495539335e-06, "loss": 0.8147, "step": 5033 }, { "epoch": 0.14697381098362092, "grad_norm": 0.7180193988749776, "learning_rate": 4.739172749391728e-06, "loss": 0.6241, "step": 5034 }, { "epoch": 0.14700300721146828, "grad_norm": 0.7654126049926946, "learning_rate": 4.739010543390106e-06, "loss": 0.6964, "step": 5035 }, { "epoch": 0.14703220343931564, "grad_norm": 0.7752941719415672, "learning_rate": 4.738848337388484e-06, "loss": 0.6997, "step": 5036 }, { "epoch": 0.147061399667163, "grad_norm": 0.7289868172455274, "learning_rate": 4.738686131386862e-06, "loss": 0.7013, "step": 5037 }, { "epoch": 0.14709059589501036, "grad_norm": 0.7230928669504714, "learning_rate": 4.73852392538524e-06, "loss": 0.6191, "step": 5038 }, { "epoch": 0.14711979212285772, "grad_norm": 0.7761847173383102, "learning_rate": 4.738361719383617e-06, "loss": 0.6965, "step": 5039 }, { "epoch": 0.14714898835070508, "grad_norm": 0.7450122558185102, "learning_rate": 4.738199513381996e-06, "loss": 0.6685, "step": 5040 }, { "epoch": 0.14717818457855245, "grad_norm": 0.7540622455146776, "learning_rate": 4.738037307380374e-06, "loss": 0.6705, "step": 5041 }, { "epoch": 0.1472073808063998, "grad_norm": 0.7475997510579876, "learning_rate": 4.737875101378752e-06, "loss": 0.6753, "step": 5042 }, { "epoch": 0.14723657703424717, "grad_norm": 0.7569303710226071, "learning_rate": 4.73771289537713e-06, "loss": 0.7363, "step": 5043 }, { "epoch": 0.14726577326209453, "grad_norm": 0.8809755754761602, "learning_rate": 4.737550689375508e-06, "loss": 0.7202, "step": 5044 }, { "epoch": 0.1472949694899419, "grad_norm": 0.8773503720794171, "learning_rate": 4.737388483373885e-06, "loss": 0.8465, "step": 5045 }, { "epoch": 0.14732416571778925, "grad_norm": 0.7753813652237403, "learning_rate": 4.737226277372263e-06, "loss": 0.7138, "step": 5046 }, { "epoch": 0.1473533619456366, "grad_norm": 0.812143937264181, "learning_rate": 4.737064071370641e-06, "loss": 0.7631, "step": 5047 }, { "epoch": 0.14738255817348397, "grad_norm": 1.0159724093947762, "learning_rate": 4.736901865369019e-06, "loss": 0.6626, "step": 5048 }, { "epoch": 0.14741175440133134, "grad_norm": 0.7514498616756728, "learning_rate": 4.736739659367397e-06, "loss": 0.6859, "step": 5049 }, { "epoch": 0.1474409506291787, "grad_norm": 0.9637648681100156, "learning_rate": 4.736577453365775e-06, "loss": 0.6588, "step": 5050 }, { "epoch": 0.14747014685702609, "grad_norm": 0.7656557055582468, "learning_rate": 4.736415247364153e-06, "loss": 0.6823, "step": 5051 }, { "epoch": 0.14749934308487345, "grad_norm": 0.8087936190203651, "learning_rate": 4.736253041362531e-06, "loss": 0.7889, "step": 5052 }, { "epoch": 0.1475285393127208, "grad_norm": 0.7566862898373216, "learning_rate": 4.736090835360909e-06, "loss": 0.6647, "step": 5053 }, { "epoch": 0.14755773554056817, "grad_norm": 0.7810397792440407, "learning_rate": 4.735928629359286e-06, "loss": 0.6639, "step": 5054 }, { "epoch": 0.14758693176841553, "grad_norm": 0.8755578322497185, "learning_rate": 4.735766423357664e-06, "loss": 0.7822, "step": 5055 }, { "epoch": 0.1476161279962629, "grad_norm": 0.7577896957574318, "learning_rate": 4.735604217356042e-06, "loss": 0.6695, "step": 5056 }, { "epoch": 0.14764532422411025, "grad_norm": 0.6889899878325031, "learning_rate": 4.73544201135442e-06, "loss": 0.6365, "step": 5057 }, { "epoch": 0.1476745204519576, "grad_norm": 0.6979588603466682, "learning_rate": 4.735279805352798e-06, "loss": 0.6342, "step": 5058 }, { "epoch": 0.14770371667980497, "grad_norm": 0.7382121388323118, "learning_rate": 4.735117599351176e-06, "loss": 0.6893, "step": 5059 }, { "epoch": 0.14773291290765234, "grad_norm": 0.7570626281019908, "learning_rate": 4.734955393349554e-06, "loss": 0.6724, "step": 5060 }, { "epoch": 0.1477621091354997, "grad_norm": 0.6965782741732224, "learning_rate": 4.734793187347932e-06, "loss": 0.6219, "step": 5061 }, { "epoch": 0.14779130536334706, "grad_norm": 0.7300021221249057, "learning_rate": 4.73463098134631e-06, "loss": 0.6158, "step": 5062 }, { "epoch": 0.14782050159119442, "grad_norm": 0.8353108543045823, "learning_rate": 4.734468775344688e-06, "loss": 0.7866, "step": 5063 }, { "epoch": 0.14784969781904178, "grad_norm": 0.8402958001838876, "learning_rate": 4.734306569343066e-06, "loss": 0.7084, "step": 5064 }, { "epoch": 0.14787889404688914, "grad_norm": 0.8375221834507024, "learning_rate": 4.734144363341444e-06, "loss": 0.7314, "step": 5065 }, { "epoch": 0.1479080902747365, "grad_norm": 0.7512489565572822, "learning_rate": 4.733982157339822e-06, "loss": 0.7191, "step": 5066 }, { "epoch": 0.14793728650258386, "grad_norm": 0.8281739885475885, "learning_rate": 4.7338199513382e-06, "loss": 0.6608, "step": 5067 }, { "epoch": 0.14796648273043123, "grad_norm": 0.7504277622358062, "learning_rate": 4.7336577453365775e-06, "loss": 0.7216, "step": 5068 }, { "epoch": 0.1479956789582786, "grad_norm": 0.8061606983560499, "learning_rate": 4.7334955393349555e-06, "loss": 0.7703, "step": 5069 }, { "epoch": 0.14802487518612595, "grad_norm": 0.7545727936773202, "learning_rate": 4.7333333333333335e-06, "loss": 0.6517, "step": 5070 }, { "epoch": 0.1480540714139733, "grad_norm": 0.8384008404572325, "learning_rate": 4.7331711273317115e-06, "loss": 0.7351, "step": 5071 }, { "epoch": 0.14808326764182067, "grad_norm": 0.7257607306686295, "learning_rate": 4.7330089213300895e-06, "loss": 0.6615, "step": 5072 }, { "epoch": 0.14811246386966803, "grad_norm": 0.7650085795073391, "learning_rate": 4.7328467153284675e-06, "loss": 0.6833, "step": 5073 }, { "epoch": 0.1481416600975154, "grad_norm": 0.7531956569154653, "learning_rate": 4.7326845093268455e-06, "loss": 0.6744, "step": 5074 }, { "epoch": 0.14817085632536275, "grad_norm": 0.7266978010316307, "learning_rate": 4.7325223033252235e-06, "loss": 0.6538, "step": 5075 }, { "epoch": 0.14820005255321012, "grad_norm": 0.8020986963204955, "learning_rate": 4.7323600973236015e-06, "loss": 0.7398, "step": 5076 }, { "epoch": 0.14822924878105748, "grad_norm": 0.6856296487882519, "learning_rate": 4.732197891321979e-06, "loss": 0.6372, "step": 5077 }, { "epoch": 0.14825844500890484, "grad_norm": 0.7373665457999368, "learning_rate": 4.7320356853203575e-06, "loss": 0.6615, "step": 5078 }, { "epoch": 0.1482876412367522, "grad_norm": 0.7831312159260643, "learning_rate": 4.7318734793187355e-06, "loss": 0.66, "step": 5079 }, { "epoch": 0.14831683746459956, "grad_norm": 0.6940392793451374, "learning_rate": 4.7317112733171135e-06, "loss": 0.6017, "step": 5080 }, { "epoch": 0.14834603369244695, "grad_norm": 0.8757304365809738, "learning_rate": 4.7315490673154915e-06, "loss": 0.7424, "step": 5081 }, { "epoch": 0.1483752299202943, "grad_norm": 0.8379032975970127, "learning_rate": 4.7313868613138696e-06, "loss": 0.6884, "step": 5082 }, { "epoch": 0.14840442614814167, "grad_norm": 0.9760864592846206, "learning_rate": 4.731224655312247e-06, "loss": 0.6397, "step": 5083 }, { "epoch": 0.14843362237598903, "grad_norm": 0.8138334007003275, "learning_rate": 4.731062449310625e-06, "loss": 0.7297, "step": 5084 }, { "epoch": 0.1484628186038364, "grad_norm": 0.8030740259070414, "learning_rate": 4.730900243309003e-06, "loss": 0.7653, "step": 5085 }, { "epoch": 0.14849201483168376, "grad_norm": 0.7688134696852433, "learning_rate": 4.730738037307381e-06, "loss": 0.725, "step": 5086 }, { "epoch": 0.14852121105953112, "grad_norm": 0.8650153920297056, "learning_rate": 4.730575831305759e-06, "loss": 0.6758, "step": 5087 }, { "epoch": 0.14855040728737848, "grad_norm": 0.7177382638198045, "learning_rate": 4.730413625304137e-06, "loss": 0.6643, "step": 5088 }, { "epoch": 0.14857960351522584, "grad_norm": 0.7595312683223311, "learning_rate": 4.730251419302515e-06, "loss": 0.7114, "step": 5089 }, { "epoch": 0.1486087997430732, "grad_norm": 0.8012398151023142, "learning_rate": 4.730089213300893e-06, "loss": 0.7804, "step": 5090 }, { "epoch": 0.14863799597092056, "grad_norm": 0.7113047928465156, "learning_rate": 4.729927007299271e-06, "loss": 0.6516, "step": 5091 }, { "epoch": 0.14866719219876792, "grad_norm": 0.7248380764509815, "learning_rate": 4.729764801297648e-06, "loss": 0.5922, "step": 5092 }, { "epoch": 0.14869638842661528, "grad_norm": 0.7598459583052851, "learning_rate": 4.729602595296026e-06, "loss": 0.7352, "step": 5093 }, { "epoch": 0.14872558465446264, "grad_norm": 0.9254967798005915, "learning_rate": 4.729440389294404e-06, "loss": 0.6366, "step": 5094 }, { "epoch": 0.14875478088231, "grad_norm": 0.7133768237289244, "learning_rate": 4.729278183292782e-06, "loss": 0.6474, "step": 5095 }, { "epoch": 0.14878397711015737, "grad_norm": 0.7194649711509409, "learning_rate": 4.72911597729116e-06, "loss": 0.6538, "step": 5096 }, { "epoch": 0.14881317333800473, "grad_norm": 0.7513582949635157, "learning_rate": 4.728953771289538e-06, "loss": 0.6838, "step": 5097 }, { "epoch": 0.1488423695658521, "grad_norm": 0.7759001011854186, "learning_rate": 4.728791565287916e-06, "loss": 0.7428, "step": 5098 }, { "epoch": 0.14887156579369945, "grad_norm": 0.7932850017156247, "learning_rate": 4.728629359286294e-06, "loss": 0.6853, "step": 5099 }, { "epoch": 0.1489007620215468, "grad_norm": 0.7456159550612792, "learning_rate": 4.728467153284672e-06, "loss": 0.6647, "step": 5100 }, { "epoch": 0.14892995824939417, "grad_norm": 0.7511910527332704, "learning_rate": 4.72830494728305e-06, "loss": 0.6854, "step": 5101 }, { "epoch": 0.14895915447724153, "grad_norm": 0.7417749615197029, "learning_rate": 4.728142741281428e-06, "loss": 0.6513, "step": 5102 }, { "epoch": 0.1489883507050889, "grad_norm": 0.7211495518855416, "learning_rate": 4.727980535279806e-06, "loss": 0.7212, "step": 5103 }, { "epoch": 0.14901754693293626, "grad_norm": 0.7618307990713789, "learning_rate": 4.727818329278184e-06, "loss": 0.6891, "step": 5104 }, { "epoch": 0.14904674316078362, "grad_norm": 0.7031845270382528, "learning_rate": 4.727656123276562e-06, "loss": 0.6354, "step": 5105 }, { "epoch": 0.14907593938863098, "grad_norm": 0.7220985069745249, "learning_rate": 4.727493917274939e-06, "loss": 0.6405, "step": 5106 }, { "epoch": 0.14910513561647834, "grad_norm": 0.7438023024357016, "learning_rate": 4.727331711273317e-06, "loss": 0.6819, "step": 5107 }, { "epoch": 0.1491343318443257, "grad_norm": 0.7992030906463499, "learning_rate": 4.727169505271695e-06, "loss": 0.7279, "step": 5108 }, { "epoch": 0.14916352807217306, "grad_norm": 0.8828399564221312, "learning_rate": 4.727007299270073e-06, "loss": 0.6618, "step": 5109 }, { "epoch": 0.14919272430002042, "grad_norm": 1.0433187389330953, "learning_rate": 4.726845093268451e-06, "loss": 0.8384, "step": 5110 }, { "epoch": 0.1492219205278678, "grad_norm": 0.8499233594448311, "learning_rate": 4.726682887266829e-06, "loss": 0.7482, "step": 5111 }, { "epoch": 0.14925111675571517, "grad_norm": 0.7912035919646155, "learning_rate": 4.726520681265207e-06, "loss": 0.7571, "step": 5112 }, { "epoch": 0.14928031298356254, "grad_norm": 0.6949565238823091, "learning_rate": 4.726358475263585e-06, "loss": 0.5555, "step": 5113 }, { "epoch": 0.1493095092114099, "grad_norm": 0.9486108092207362, "learning_rate": 4.726196269261963e-06, "loss": 0.8116, "step": 5114 }, { "epoch": 0.14933870543925726, "grad_norm": 0.7265903527484194, "learning_rate": 4.72603406326034e-06, "loss": 0.6412, "step": 5115 }, { "epoch": 0.14936790166710462, "grad_norm": 0.6799504094997002, "learning_rate": 4.725871857258719e-06, "loss": 0.5915, "step": 5116 }, { "epoch": 0.14939709789495198, "grad_norm": 0.8537592341795801, "learning_rate": 4.725709651257097e-06, "loss": 0.7996, "step": 5117 }, { "epoch": 0.14942629412279934, "grad_norm": 0.7839944753442137, "learning_rate": 4.725547445255475e-06, "loss": 0.6932, "step": 5118 }, { "epoch": 0.1494554903506467, "grad_norm": 0.7335132601803513, "learning_rate": 4.725385239253853e-06, "loss": 0.6538, "step": 5119 }, { "epoch": 0.14948468657849406, "grad_norm": 0.7576903167999629, "learning_rate": 4.725223033252231e-06, "loss": 0.631, "step": 5120 }, { "epoch": 0.14951388280634142, "grad_norm": 0.7524659642285073, "learning_rate": 4.7250608272506084e-06, "loss": 0.6826, "step": 5121 }, { "epoch": 0.14954307903418879, "grad_norm": 0.7883968092804079, "learning_rate": 4.7248986212489864e-06, "loss": 0.758, "step": 5122 }, { "epoch": 0.14957227526203615, "grad_norm": 0.8433901966872329, "learning_rate": 4.7247364152473644e-06, "loss": 0.6797, "step": 5123 }, { "epoch": 0.1496014714898835, "grad_norm": 0.8220865527594141, "learning_rate": 4.7245742092457425e-06, "loss": 0.6941, "step": 5124 }, { "epoch": 0.14963066771773087, "grad_norm": 0.6943822544331287, "learning_rate": 4.7244120032441205e-06, "loss": 0.5813, "step": 5125 }, { "epoch": 0.14965986394557823, "grad_norm": 0.8417787125442112, "learning_rate": 4.7242497972424985e-06, "loss": 0.7909, "step": 5126 }, { "epoch": 0.1496890601734256, "grad_norm": 0.7816967851381754, "learning_rate": 4.7240875912408765e-06, "loss": 0.6611, "step": 5127 }, { "epoch": 0.14971825640127295, "grad_norm": 0.7618840763939844, "learning_rate": 4.7239253852392545e-06, "loss": 0.7229, "step": 5128 }, { "epoch": 0.14974745262912031, "grad_norm": 1.0214973749417076, "learning_rate": 4.723763179237632e-06, "loss": 0.6665, "step": 5129 }, { "epoch": 0.14977664885696768, "grad_norm": 0.8303269032755837, "learning_rate": 4.72360097323601e-06, "loss": 0.6205, "step": 5130 }, { "epoch": 0.14980584508481504, "grad_norm": 0.7820425389881963, "learning_rate": 4.723438767234388e-06, "loss": 0.7785, "step": 5131 }, { "epoch": 0.1498350413126624, "grad_norm": 0.8589763735755421, "learning_rate": 4.723276561232766e-06, "loss": 0.696, "step": 5132 }, { "epoch": 0.14986423754050976, "grad_norm": 0.7339964816806143, "learning_rate": 4.723114355231144e-06, "loss": 0.6752, "step": 5133 }, { "epoch": 0.14989343376835712, "grad_norm": 0.7439354674275873, "learning_rate": 4.722952149229522e-06, "loss": 0.7298, "step": 5134 }, { "epoch": 0.14992262999620448, "grad_norm": 0.7915929793861362, "learning_rate": 4.7227899432279e-06, "loss": 0.6652, "step": 5135 }, { "epoch": 0.14995182622405184, "grad_norm": 0.7592518340274507, "learning_rate": 4.722627737226278e-06, "loss": 0.654, "step": 5136 }, { "epoch": 0.1499810224518992, "grad_norm": 0.7371327514907322, "learning_rate": 4.722465531224656e-06, "loss": 0.6664, "step": 5137 }, { "epoch": 0.15001021867974657, "grad_norm": 0.73573459871041, "learning_rate": 4.722303325223034e-06, "loss": 0.6716, "step": 5138 }, { "epoch": 0.15003941490759393, "grad_norm": 0.8259934381407514, "learning_rate": 4.722141119221412e-06, "loss": 0.6934, "step": 5139 }, { "epoch": 0.1500686111354413, "grad_norm": 0.717103465748386, "learning_rate": 4.72197891321979e-06, "loss": 0.6353, "step": 5140 }, { "epoch": 0.15009780736328868, "grad_norm": 0.7241333141585736, "learning_rate": 4.721816707218168e-06, "loss": 0.6566, "step": 5141 }, { "epoch": 0.15012700359113604, "grad_norm": 0.8048596995406316, "learning_rate": 4.721654501216546e-06, "loss": 0.6693, "step": 5142 }, { "epoch": 0.1501561998189834, "grad_norm": 0.8871409183323098, "learning_rate": 4.721492295214924e-06, "loss": 0.6539, "step": 5143 }, { "epoch": 0.15018539604683076, "grad_norm": 0.8924064616563661, "learning_rate": 4.721330089213301e-06, "loss": 0.635, "step": 5144 }, { "epoch": 0.15021459227467812, "grad_norm": 0.775004980910596, "learning_rate": 4.721167883211679e-06, "loss": 0.6964, "step": 5145 }, { "epoch": 0.15024378850252548, "grad_norm": 0.7831891803209301, "learning_rate": 4.721005677210057e-06, "loss": 0.7143, "step": 5146 }, { "epoch": 0.15027298473037284, "grad_norm": 0.8353485719791177, "learning_rate": 4.720843471208435e-06, "loss": 0.7541, "step": 5147 }, { "epoch": 0.1503021809582202, "grad_norm": 0.8417118822696577, "learning_rate": 4.720681265206813e-06, "loss": 0.6483, "step": 5148 }, { "epoch": 0.15033137718606757, "grad_norm": 0.7622025135886777, "learning_rate": 4.720519059205191e-06, "loss": 0.6638, "step": 5149 }, { "epoch": 0.15036057341391493, "grad_norm": 0.7540035379998575, "learning_rate": 4.720356853203569e-06, "loss": 0.6744, "step": 5150 }, { "epoch": 0.1503897696417623, "grad_norm": 0.6947854861428275, "learning_rate": 4.720194647201947e-06, "loss": 0.632, "step": 5151 }, { "epoch": 0.15041896586960965, "grad_norm": 0.7595813035161607, "learning_rate": 4.720032441200325e-06, "loss": 0.734, "step": 5152 }, { "epoch": 0.150448162097457, "grad_norm": 0.8201441286699576, "learning_rate": 4.719870235198702e-06, "loss": 0.7861, "step": 5153 }, { "epoch": 0.15047735832530437, "grad_norm": 0.8094602863068149, "learning_rate": 4.719708029197081e-06, "loss": 0.731, "step": 5154 }, { "epoch": 0.15050655455315173, "grad_norm": 0.7750911795747413, "learning_rate": 4.719545823195459e-06, "loss": 0.7429, "step": 5155 }, { "epoch": 0.1505357507809991, "grad_norm": 0.7524389424806202, "learning_rate": 4.719383617193837e-06, "loss": 0.7153, "step": 5156 }, { "epoch": 0.15056494700884646, "grad_norm": 0.7068391499184826, "learning_rate": 4.719221411192215e-06, "loss": 0.6517, "step": 5157 }, { "epoch": 0.15059414323669382, "grad_norm": 0.7924651609171721, "learning_rate": 4.719059205190593e-06, "loss": 0.7104, "step": 5158 }, { "epoch": 0.15062333946454118, "grad_norm": 0.7533671436240308, "learning_rate": 4.71889699918897e-06, "loss": 0.6925, "step": 5159 }, { "epoch": 0.15065253569238854, "grad_norm": 0.7588544008342037, "learning_rate": 4.718734793187348e-06, "loss": 0.6654, "step": 5160 }, { "epoch": 0.1506817319202359, "grad_norm": 0.8011895149908518, "learning_rate": 4.718572587185726e-06, "loss": 0.7478, "step": 5161 }, { "epoch": 0.15071092814808326, "grad_norm": 0.7425033733272275, "learning_rate": 4.718410381184104e-06, "loss": 0.6907, "step": 5162 }, { "epoch": 0.15074012437593062, "grad_norm": 0.7612857344223598, "learning_rate": 4.718248175182482e-06, "loss": 0.6958, "step": 5163 }, { "epoch": 0.15076932060377798, "grad_norm": 0.8646660405154442, "learning_rate": 4.71808596918086e-06, "loss": 0.8001, "step": 5164 }, { "epoch": 0.15079851683162535, "grad_norm": 0.7724179576669479, "learning_rate": 4.717923763179238e-06, "loss": 0.7473, "step": 5165 }, { "epoch": 0.1508277130594727, "grad_norm": 0.7277141411452156, "learning_rate": 4.717761557177616e-06, "loss": 0.6595, "step": 5166 }, { "epoch": 0.15085690928732007, "grad_norm": 0.759687183086924, "learning_rate": 4.717599351175993e-06, "loss": 0.6708, "step": 5167 }, { "epoch": 0.15088610551516743, "grad_norm": 0.6774423403388417, "learning_rate": 4.717437145174371e-06, "loss": 0.5813, "step": 5168 }, { "epoch": 0.1509153017430148, "grad_norm": 0.8163504685706966, "learning_rate": 4.717274939172749e-06, "loss": 0.6727, "step": 5169 }, { "epoch": 0.15094449797086215, "grad_norm": 0.7997181133793543, "learning_rate": 4.717112733171127e-06, "loss": 0.7298, "step": 5170 }, { "epoch": 0.15097369419870954, "grad_norm": 0.7609624090628917, "learning_rate": 4.716950527169505e-06, "loss": 0.6631, "step": 5171 }, { "epoch": 0.1510028904265569, "grad_norm": 0.8282863642531221, "learning_rate": 4.716788321167883e-06, "loss": 0.7808, "step": 5172 }, { "epoch": 0.15103208665440426, "grad_norm": 0.6733928869555396, "learning_rate": 4.716626115166261e-06, "loss": 0.5531, "step": 5173 }, { "epoch": 0.15106128288225162, "grad_norm": 0.8919982615634183, "learning_rate": 4.716463909164639e-06, "loss": 0.7758, "step": 5174 }, { "epoch": 0.15109047911009899, "grad_norm": 0.7841580672337297, "learning_rate": 4.716301703163017e-06, "loss": 0.7378, "step": 5175 }, { "epoch": 0.15111967533794635, "grad_norm": 0.8065389176313154, "learning_rate": 4.716139497161395e-06, "loss": 0.747, "step": 5176 }, { "epoch": 0.1511488715657937, "grad_norm": 0.713258890212525, "learning_rate": 4.715977291159773e-06, "loss": 0.6179, "step": 5177 }, { "epoch": 0.15117806779364107, "grad_norm": 0.7877441315252528, "learning_rate": 4.7158150851581514e-06, "loss": 0.7182, "step": 5178 }, { "epoch": 0.15120726402148843, "grad_norm": 0.8219153851827368, "learning_rate": 4.7156528791565294e-06, "loss": 0.7762, "step": 5179 }, { "epoch": 0.1512364602493358, "grad_norm": 0.702595482411945, "learning_rate": 4.7154906731549074e-06, "loss": 0.6236, "step": 5180 }, { "epoch": 0.15126565647718315, "grad_norm": 0.8124157570945179, "learning_rate": 4.7153284671532855e-06, "loss": 0.6945, "step": 5181 }, { "epoch": 0.1512948527050305, "grad_norm": 0.7817405735681761, "learning_rate": 4.715166261151663e-06, "loss": 0.686, "step": 5182 }, { "epoch": 0.15132404893287787, "grad_norm": 0.9957468701586122, "learning_rate": 4.715004055150041e-06, "loss": 0.6863, "step": 5183 }, { "epoch": 0.15135324516072524, "grad_norm": 0.8108308812735235, "learning_rate": 4.714841849148419e-06, "loss": 0.7464, "step": 5184 }, { "epoch": 0.1513824413885726, "grad_norm": 0.6961011469359775, "learning_rate": 4.714679643146797e-06, "loss": 0.5998, "step": 5185 }, { "epoch": 0.15141163761641996, "grad_norm": 0.8037956478788613, "learning_rate": 4.714517437145175e-06, "loss": 0.764, "step": 5186 }, { "epoch": 0.15144083384426732, "grad_norm": 0.7703722335202338, "learning_rate": 4.714355231143553e-06, "loss": 0.7007, "step": 5187 }, { "epoch": 0.15147003007211468, "grad_norm": 0.7427640816524546, "learning_rate": 4.714193025141931e-06, "loss": 0.7417, "step": 5188 }, { "epoch": 0.15149922629996204, "grad_norm": 0.8735811147399141, "learning_rate": 4.714030819140309e-06, "loss": 0.8049, "step": 5189 }, { "epoch": 0.1515284225278094, "grad_norm": 0.8029854654089312, "learning_rate": 4.713868613138687e-06, "loss": 0.7357, "step": 5190 }, { "epoch": 0.15155761875565676, "grad_norm": 0.7576238590320666, "learning_rate": 4.713706407137065e-06, "loss": 0.6583, "step": 5191 }, { "epoch": 0.15158681498350413, "grad_norm": 2.1479300941815866, "learning_rate": 4.713544201135443e-06, "loss": 0.6805, "step": 5192 }, { "epoch": 0.1516160112113515, "grad_norm": 0.7768166499119948, "learning_rate": 4.713381995133821e-06, "loss": 0.7528, "step": 5193 }, { "epoch": 0.15164520743919885, "grad_norm": 0.8181686771590321, "learning_rate": 4.713219789132199e-06, "loss": 0.7069, "step": 5194 }, { "epoch": 0.1516744036670462, "grad_norm": 0.8338635432121657, "learning_rate": 4.713057583130577e-06, "loss": 0.6983, "step": 5195 }, { "epoch": 0.15170359989489357, "grad_norm": 1.1228656332631506, "learning_rate": 4.712895377128955e-06, "loss": 0.783, "step": 5196 }, { "epoch": 0.15173279612274093, "grad_norm": 0.8013967555266026, "learning_rate": 4.712733171127332e-06, "loss": 0.6832, "step": 5197 }, { "epoch": 0.1517619923505883, "grad_norm": 0.7905310038002488, "learning_rate": 4.71257096512571e-06, "loss": 0.751, "step": 5198 }, { "epoch": 0.15179118857843565, "grad_norm": 0.7206481624068741, "learning_rate": 4.712408759124088e-06, "loss": 0.602, "step": 5199 }, { "epoch": 0.15182038480628302, "grad_norm": 0.7630828377833699, "learning_rate": 4.712246553122466e-06, "loss": 0.7379, "step": 5200 }, { "epoch": 0.1518495810341304, "grad_norm": 0.8885646364001195, "learning_rate": 4.712084347120844e-06, "loss": 0.6637, "step": 5201 }, { "epoch": 0.15187877726197777, "grad_norm": 0.8009256074553428, "learning_rate": 4.711922141119222e-06, "loss": 0.7108, "step": 5202 }, { "epoch": 0.15190797348982513, "grad_norm": 0.7151567131152189, "learning_rate": 4.7117599351176e-06, "loss": 0.6262, "step": 5203 }, { "epoch": 0.1519371697176725, "grad_norm": 0.7500919165696357, "learning_rate": 4.711597729115978e-06, "loss": 0.7072, "step": 5204 }, { "epoch": 0.15196636594551985, "grad_norm": 0.906145273152402, "learning_rate": 4.711435523114355e-06, "loss": 0.748, "step": 5205 }, { "epoch": 0.1519955621733672, "grad_norm": 0.739372867031289, "learning_rate": 4.711273317112733e-06, "loss": 0.6513, "step": 5206 }, { "epoch": 0.15202475840121457, "grad_norm": 0.7354718851203674, "learning_rate": 4.711111111111111e-06, "loss": 0.6428, "step": 5207 }, { "epoch": 0.15205395462906193, "grad_norm": 0.8045504233183326, "learning_rate": 4.710948905109489e-06, "loss": 0.804, "step": 5208 }, { "epoch": 0.1520831508569093, "grad_norm": 0.7502923185877312, "learning_rate": 4.710786699107867e-06, "loss": 0.6669, "step": 5209 }, { "epoch": 0.15211234708475666, "grad_norm": 0.741266237705793, "learning_rate": 4.710624493106246e-06, "loss": 0.7273, "step": 5210 }, { "epoch": 0.15214154331260402, "grad_norm": 0.8940876292022852, "learning_rate": 4.710462287104623e-06, "loss": 0.7723, "step": 5211 }, { "epoch": 0.15217073954045138, "grad_norm": 0.761262420167598, "learning_rate": 4.710300081103001e-06, "loss": 0.7189, "step": 5212 }, { "epoch": 0.15219993576829874, "grad_norm": 0.7691607347779172, "learning_rate": 4.710137875101379e-06, "loss": 0.679, "step": 5213 }, { "epoch": 0.1522291319961461, "grad_norm": 0.7759397894426335, "learning_rate": 4.709975669099757e-06, "loss": 0.7, "step": 5214 }, { "epoch": 0.15225832822399346, "grad_norm": 0.6986741301024487, "learning_rate": 4.709813463098135e-06, "loss": 0.5859, "step": 5215 }, { "epoch": 0.15228752445184082, "grad_norm": 0.7223645865147152, "learning_rate": 4.709651257096513e-06, "loss": 0.6506, "step": 5216 }, { "epoch": 0.15231672067968818, "grad_norm": 0.7600848240398819, "learning_rate": 4.709489051094891e-06, "loss": 0.6892, "step": 5217 }, { "epoch": 0.15234591690753554, "grad_norm": 0.7446728568151988, "learning_rate": 4.709326845093269e-06, "loss": 0.7102, "step": 5218 }, { "epoch": 0.1523751131353829, "grad_norm": 0.7238105065137813, "learning_rate": 4.709164639091647e-06, "loss": 0.6502, "step": 5219 }, { "epoch": 0.15240430936323027, "grad_norm": 0.7649967313798082, "learning_rate": 4.709002433090024e-06, "loss": 0.7141, "step": 5220 }, { "epoch": 0.15243350559107763, "grad_norm": 0.7111647203808757, "learning_rate": 4.708840227088402e-06, "loss": 0.6635, "step": 5221 }, { "epoch": 0.152462701818925, "grad_norm": 0.8183109213652366, "learning_rate": 4.70867802108678e-06, "loss": 0.6828, "step": 5222 }, { "epoch": 0.15249189804677235, "grad_norm": 0.7940553483371399, "learning_rate": 4.708515815085158e-06, "loss": 0.779, "step": 5223 }, { "epoch": 0.1525210942746197, "grad_norm": 0.8017739608402449, "learning_rate": 4.708353609083536e-06, "loss": 0.7121, "step": 5224 }, { "epoch": 0.15255029050246707, "grad_norm": 0.6952978151273134, "learning_rate": 4.708191403081914e-06, "loss": 0.614, "step": 5225 }, { "epoch": 0.15257948673031443, "grad_norm": 0.7229337533994645, "learning_rate": 4.708029197080292e-06, "loss": 0.6476, "step": 5226 }, { "epoch": 0.1526086829581618, "grad_norm": 0.746560414771925, "learning_rate": 4.70786699107867e-06, "loss": 0.6604, "step": 5227 }, { "epoch": 0.15263787918600916, "grad_norm": 0.7722984510423189, "learning_rate": 4.707704785077048e-06, "loss": 0.7334, "step": 5228 }, { "epoch": 0.15266707541385652, "grad_norm": 0.8208698312822805, "learning_rate": 4.707542579075426e-06, "loss": 0.6818, "step": 5229 }, { "epoch": 0.15269627164170388, "grad_norm": 0.74194333017049, "learning_rate": 4.707380373073804e-06, "loss": 0.6944, "step": 5230 }, { "epoch": 0.15272546786955124, "grad_norm": 0.7056577164025298, "learning_rate": 4.707218167072182e-06, "loss": 0.6347, "step": 5231 }, { "epoch": 0.15275466409739863, "grad_norm": 0.7638276469734513, "learning_rate": 4.70705596107056e-06, "loss": 0.7037, "step": 5232 }, { "epoch": 0.152783860325246, "grad_norm": 0.7473140041185843, "learning_rate": 4.706893755068938e-06, "loss": 0.6203, "step": 5233 }, { "epoch": 0.15281305655309335, "grad_norm": 0.7539292203269156, "learning_rate": 4.7067315490673156e-06, "loss": 0.6894, "step": 5234 }, { "epoch": 0.1528422527809407, "grad_norm": 0.7165506730185925, "learning_rate": 4.706569343065694e-06, "loss": 0.5989, "step": 5235 }, { "epoch": 0.15287144900878807, "grad_norm": 0.7494325546577795, "learning_rate": 4.706407137064072e-06, "loss": 0.7305, "step": 5236 }, { "epoch": 0.15290064523663544, "grad_norm": 1.1370357160750761, "learning_rate": 4.70624493106245e-06, "loss": 0.776, "step": 5237 }, { "epoch": 0.1529298414644828, "grad_norm": 0.7325515827056884, "learning_rate": 4.706082725060828e-06, "loss": 0.6638, "step": 5238 }, { "epoch": 0.15295903769233016, "grad_norm": 0.7920311430925443, "learning_rate": 4.705920519059206e-06, "loss": 0.7176, "step": 5239 }, { "epoch": 0.15298823392017752, "grad_norm": 0.7467088762574583, "learning_rate": 4.705758313057584e-06, "loss": 0.6155, "step": 5240 }, { "epoch": 0.15301743014802488, "grad_norm": 0.8023084003871535, "learning_rate": 4.705596107055962e-06, "loss": 0.7922, "step": 5241 }, { "epoch": 0.15304662637587224, "grad_norm": 0.7093798550137116, "learning_rate": 4.70543390105434e-06, "loss": 0.6482, "step": 5242 }, { "epoch": 0.1530758226037196, "grad_norm": 0.7430917977899882, "learning_rate": 4.705271695052717e-06, "loss": 0.6571, "step": 5243 }, { "epoch": 0.15310501883156696, "grad_norm": 0.7085934945598982, "learning_rate": 4.705109489051095e-06, "loss": 0.6019, "step": 5244 }, { "epoch": 0.15313421505941432, "grad_norm": 0.9606014638654408, "learning_rate": 4.704947283049473e-06, "loss": 0.8314, "step": 5245 }, { "epoch": 0.15316341128726169, "grad_norm": 0.7626537680818203, "learning_rate": 4.704785077047851e-06, "loss": 0.724, "step": 5246 }, { "epoch": 0.15319260751510905, "grad_norm": 0.767947589866332, "learning_rate": 4.704622871046229e-06, "loss": 0.6781, "step": 5247 }, { "epoch": 0.1532218037429564, "grad_norm": 0.8353528520960802, "learning_rate": 4.704460665044608e-06, "loss": 0.8145, "step": 5248 }, { "epoch": 0.15325099997080377, "grad_norm": 0.8252224055909074, "learning_rate": 4.704298459042985e-06, "loss": 0.6743, "step": 5249 }, { "epoch": 0.15328019619865113, "grad_norm": 0.7244129616342566, "learning_rate": 4.704136253041363e-06, "loss": 0.6799, "step": 5250 }, { "epoch": 0.1533093924264985, "grad_norm": 0.6743926539316061, "learning_rate": 4.703974047039741e-06, "loss": 0.5882, "step": 5251 }, { "epoch": 0.15333858865434585, "grad_norm": 0.878165479253084, "learning_rate": 4.703811841038119e-06, "loss": 0.8991, "step": 5252 }, { "epoch": 0.15336778488219321, "grad_norm": 0.7750288148526336, "learning_rate": 4.703649635036497e-06, "loss": 0.6931, "step": 5253 }, { "epoch": 0.15339698111004058, "grad_norm": 0.8169361307565174, "learning_rate": 4.703487429034875e-06, "loss": 0.76, "step": 5254 }, { "epoch": 0.15342617733788794, "grad_norm": 0.788460894030563, "learning_rate": 4.703325223033253e-06, "loss": 0.7436, "step": 5255 }, { "epoch": 0.1534553735657353, "grad_norm": 0.7246959138240472, "learning_rate": 4.703163017031631e-06, "loss": 0.6607, "step": 5256 }, { "epoch": 0.15348456979358266, "grad_norm": 0.6839487075715809, "learning_rate": 4.703000811030009e-06, "loss": 0.5915, "step": 5257 }, { "epoch": 0.15351376602143002, "grad_norm": 0.7619442256294068, "learning_rate": 4.702838605028386e-06, "loss": 0.6089, "step": 5258 }, { "epoch": 0.15354296224927738, "grad_norm": 0.7588992917215769, "learning_rate": 4.702676399026764e-06, "loss": 0.6869, "step": 5259 }, { "epoch": 0.15357215847712474, "grad_norm": 0.8333284210600096, "learning_rate": 4.702514193025142e-06, "loss": 0.7104, "step": 5260 }, { "epoch": 0.1536013547049721, "grad_norm": 0.83331557945042, "learning_rate": 4.70235198702352e-06, "loss": 0.8062, "step": 5261 }, { "epoch": 0.1536305509328195, "grad_norm": 0.8171796463333078, "learning_rate": 4.702189781021898e-06, "loss": 0.7552, "step": 5262 }, { "epoch": 0.15365974716066685, "grad_norm": 0.9525353210893674, "learning_rate": 4.702027575020276e-06, "loss": 0.7762, "step": 5263 }, { "epoch": 0.15368894338851422, "grad_norm": 0.8092730069617164, "learning_rate": 4.701865369018654e-06, "loss": 0.7768, "step": 5264 }, { "epoch": 0.15371813961636158, "grad_norm": 0.8356752281926886, "learning_rate": 4.701703163017032e-06, "loss": 0.8564, "step": 5265 }, { "epoch": 0.15374733584420894, "grad_norm": 0.7792604534578138, "learning_rate": 4.70154095701541e-06, "loss": 0.7018, "step": 5266 }, { "epoch": 0.1537765320720563, "grad_norm": 0.719749336300571, "learning_rate": 4.701378751013788e-06, "loss": 0.6019, "step": 5267 }, { "epoch": 0.15380572829990366, "grad_norm": 0.8388617656226531, "learning_rate": 4.701216545012166e-06, "loss": 0.8226, "step": 5268 }, { "epoch": 0.15383492452775102, "grad_norm": 0.7689429380833857, "learning_rate": 4.701054339010544e-06, "loss": 0.6804, "step": 5269 }, { "epoch": 0.15386412075559838, "grad_norm": 0.6891463953580099, "learning_rate": 4.700892133008922e-06, "loss": 0.5876, "step": 5270 }, { "epoch": 0.15389331698344574, "grad_norm": 0.7257210734081541, "learning_rate": 4.7007299270073e-06, "loss": 0.6773, "step": 5271 }, { "epoch": 0.1539225132112931, "grad_norm": 1.056508207597803, "learning_rate": 4.700567721005677e-06, "loss": 0.6974, "step": 5272 }, { "epoch": 0.15395170943914047, "grad_norm": 0.9002210092055133, "learning_rate": 4.700405515004055e-06, "loss": 0.6645, "step": 5273 }, { "epoch": 0.15398090566698783, "grad_norm": 0.6921237647536959, "learning_rate": 4.700243309002433e-06, "loss": 0.6094, "step": 5274 }, { "epoch": 0.1540101018948352, "grad_norm": 0.7699862159211746, "learning_rate": 4.700081103000811e-06, "loss": 0.6267, "step": 5275 }, { "epoch": 0.15403929812268255, "grad_norm": 0.8198395678298641, "learning_rate": 4.699918896999189e-06, "loss": 0.7638, "step": 5276 }, { "epoch": 0.1540684943505299, "grad_norm": 0.6906530797929558, "learning_rate": 4.699756690997567e-06, "loss": 0.6198, "step": 5277 }, { "epoch": 0.15409769057837727, "grad_norm": 0.8482731754971061, "learning_rate": 4.699594484995945e-06, "loss": 0.6217, "step": 5278 }, { "epoch": 0.15412688680622463, "grad_norm": 0.8452484033405443, "learning_rate": 4.699432278994323e-06, "loss": 0.7165, "step": 5279 }, { "epoch": 0.154156083034072, "grad_norm": 0.8919099425657027, "learning_rate": 4.699270072992701e-06, "loss": 0.7135, "step": 5280 }, { "epoch": 0.15418527926191936, "grad_norm": 0.8788055268768378, "learning_rate": 4.6991078669910785e-06, "loss": 0.7988, "step": 5281 }, { "epoch": 0.15421447548976672, "grad_norm": 0.7607242422802962, "learning_rate": 4.6989456609894565e-06, "loss": 0.7342, "step": 5282 }, { "epoch": 0.15424367171761408, "grad_norm": 0.8304462122635052, "learning_rate": 4.6987834549878345e-06, "loss": 0.6821, "step": 5283 }, { "epoch": 0.15427286794546144, "grad_norm": 0.710760623461079, "learning_rate": 4.6986212489862125e-06, "loss": 0.6643, "step": 5284 }, { "epoch": 0.1543020641733088, "grad_norm": 0.7878650825575144, "learning_rate": 4.6984590429845905e-06, "loss": 0.7248, "step": 5285 }, { "epoch": 0.15433126040115616, "grad_norm": 0.8172856826269701, "learning_rate": 4.698296836982969e-06, "loss": 0.7268, "step": 5286 }, { "epoch": 0.15436045662900352, "grad_norm": 0.8129558986357966, "learning_rate": 4.6981346309813465e-06, "loss": 0.7267, "step": 5287 }, { "epoch": 0.15438965285685088, "grad_norm": 0.7821446142169555, "learning_rate": 4.6979724249797246e-06, "loss": 0.6769, "step": 5288 }, { "epoch": 0.15441884908469825, "grad_norm": 0.7590588364253154, "learning_rate": 4.6978102189781026e-06, "loss": 0.6957, "step": 5289 }, { "epoch": 0.1544480453125456, "grad_norm": 0.7136068175649014, "learning_rate": 4.6976480129764806e-06, "loss": 0.6465, "step": 5290 }, { "epoch": 0.15447724154039297, "grad_norm": 0.7174676804605792, "learning_rate": 4.6974858069748586e-06, "loss": 0.6571, "step": 5291 }, { "epoch": 0.15450643776824036, "grad_norm": 0.791256983391907, "learning_rate": 4.697323600973237e-06, "loss": 0.7469, "step": 5292 }, { "epoch": 0.15453563399608772, "grad_norm": 0.7068527045011446, "learning_rate": 4.697161394971615e-06, "loss": 0.632, "step": 5293 }, { "epoch": 0.15456483022393508, "grad_norm": 0.7670675282060841, "learning_rate": 4.696999188969993e-06, "loss": 0.7519, "step": 5294 }, { "epoch": 0.15459402645178244, "grad_norm": 0.725079466367568, "learning_rate": 4.696836982968371e-06, "loss": 0.6275, "step": 5295 }, { "epoch": 0.1546232226796298, "grad_norm": 0.7286541616148626, "learning_rate": 4.696674776966748e-06, "loss": 0.6434, "step": 5296 }, { "epoch": 0.15465241890747716, "grad_norm": 0.7664038733895205, "learning_rate": 4.696512570965126e-06, "loss": 0.6265, "step": 5297 }, { "epoch": 0.15468161513532452, "grad_norm": 0.684874457865872, "learning_rate": 4.696350364963504e-06, "loss": 0.6087, "step": 5298 }, { "epoch": 0.15471081136317189, "grad_norm": 0.7628643485009412, "learning_rate": 4.696188158961882e-06, "loss": 0.7187, "step": 5299 }, { "epoch": 0.15474000759101925, "grad_norm": 0.7894382054533491, "learning_rate": 4.69602595296026e-06, "loss": 0.7382, "step": 5300 }, { "epoch": 0.1547692038188666, "grad_norm": 0.8574100890079828, "learning_rate": 4.695863746958638e-06, "loss": 0.7858, "step": 5301 }, { "epoch": 0.15479840004671397, "grad_norm": 0.7023121086921387, "learning_rate": 4.695701540957016e-06, "loss": 0.6039, "step": 5302 }, { "epoch": 0.15482759627456133, "grad_norm": 0.7703720449057824, "learning_rate": 4.695539334955394e-06, "loss": 0.7789, "step": 5303 }, { "epoch": 0.1548567925024087, "grad_norm": 0.871910149878516, "learning_rate": 4.695377128953772e-06, "loss": 0.6752, "step": 5304 }, { "epoch": 0.15488598873025605, "grad_norm": 0.7576764456150986, "learning_rate": 4.69521492295215e-06, "loss": 0.6931, "step": 5305 }, { "epoch": 0.1549151849581034, "grad_norm": 0.672434006114625, "learning_rate": 4.695052716950528e-06, "loss": 0.5721, "step": 5306 }, { "epoch": 0.15494438118595077, "grad_norm": 0.8051503398785516, "learning_rate": 4.694890510948906e-06, "loss": 0.7244, "step": 5307 }, { "epoch": 0.15497357741379814, "grad_norm": 0.7677228117130286, "learning_rate": 4.694728304947284e-06, "loss": 0.7007, "step": 5308 }, { "epoch": 0.1550027736416455, "grad_norm": 0.7765807582738782, "learning_rate": 4.694566098945662e-06, "loss": 0.7423, "step": 5309 }, { "epoch": 0.15503196986949286, "grad_norm": 0.7429055978156188, "learning_rate": 4.694403892944039e-06, "loss": 0.6858, "step": 5310 }, { "epoch": 0.15506116609734022, "grad_norm": 0.830394922982267, "learning_rate": 4.694241686942417e-06, "loss": 0.806, "step": 5311 }, { "epoch": 0.15509036232518758, "grad_norm": 0.7962913655662829, "learning_rate": 4.694079480940795e-06, "loss": 0.7043, "step": 5312 }, { "epoch": 0.15511955855303494, "grad_norm": 0.7521902082305274, "learning_rate": 4.693917274939173e-06, "loss": 0.6584, "step": 5313 }, { "epoch": 0.1551487547808823, "grad_norm": 0.7407056029454502, "learning_rate": 4.693755068937551e-06, "loss": 0.6181, "step": 5314 }, { "epoch": 0.15517795100872966, "grad_norm": 0.7191407836168173, "learning_rate": 4.693592862935929e-06, "loss": 0.6263, "step": 5315 }, { "epoch": 0.15520714723657703, "grad_norm": 0.7464754939530972, "learning_rate": 4.693430656934307e-06, "loss": 0.6857, "step": 5316 }, { "epoch": 0.1552363434644244, "grad_norm": 0.8682406185654153, "learning_rate": 4.693268450932685e-06, "loss": 0.7853, "step": 5317 }, { "epoch": 0.15526553969227175, "grad_norm": 0.7367228464397577, "learning_rate": 4.693106244931063e-06, "loss": 0.708, "step": 5318 }, { "epoch": 0.1552947359201191, "grad_norm": 0.7804858117155239, "learning_rate": 4.69294403892944e-06, "loss": 0.6619, "step": 5319 }, { "epoch": 0.15532393214796647, "grad_norm": 0.702943529533021, "learning_rate": 4.692781832927818e-06, "loss": 0.594, "step": 5320 }, { "epoch": 0.15535312837581383, "grad_norm": 0.6777725602383167, "learning_rate": 4.692619626926196e-06, "loss": 0.6269, "step": 5321 }, { "epoch": 0.15538232460366122, "grad_norm": 0.7819341166310478, "learning_rate": 4.692457420924574e-06, "loss": 0.6861, "step": 5322 }, { "epoch": 0.15541152083150858, "grad_norm": 0.7659769040848078, "learning_rate": 4.692295214922952e-06, "loss": 0.6767, "step": 5323 }, { "epoch": 0.15544071705935594, "grad_norm": 0.743046599435303, "learning_rate": 4.692133008921331e-06, "loss": 0.6742, "step": 5324 }, { "epoch": 0.1554699132872033, "grad_norm": 0.7365035927457843, "learning_rate": 4.691970802919708e-06, "loss": 0.6983, "step": 5325 }, { "epoch": 0.15549910951505067, "grad_norm": 0.8762745414218845, "learning_rate": 4.691808596918086e-06, "loss": 0.779, "step": 5326 }, { "epoch": 0.15552830574289803, "grad_norm": 0.8263470990344167, "learning_rate": 4.691646390916464e-06, "loss": 0.8616, "step": 5327 }, { "epoch": 0.1555575019707454, "grad_norm": 0.7742660285927007, "learning_rate": 4.691484184914842e-06, "loss": 0.7231, "step": 5328 }, { "epoch": 0.15558669819859275, "grad_norm": 0.7807324514483055, "learning_rate": 4.69132197891322e-06, "loss": 0.7133, "step": 5329 }, { "epoch": 0.1556158944264401, "grad_norm": 0.7588812919416036, "learning_rate": 4.691159772911598e-06, "loss": 0.6973, "step": 5330 }, { "epoch": 0.15564509065428747, "grad_norm": 0.8175615495952714, "learning_rate": 4.690997566909976e-06, "loss": 0.8072, "step": 5331 }, { "epoch": 0.15567428688213483, "grad_norm": 0.7615835154087848, "learning_rate": 4.690835360908354e-06, "loss": 0.7561, "step": 5332 }, { "epoch": 0.1557034831099822, "grad_norm": 0.8367529441352904, "learning_rate": 4.690673154906732e-06, "loss": 0.74, "step": 5333 }, { "epoch": 0.15573267933782955, "grad_norm": 0.7769833189471439, "learning_rate": 4.6905109489051095e-06, "loss": 0.7259, "step": 5334 }, { "epoch": 0.15576187556567692, "grad_norm": 0.8327913437060614, "learning_rate": 4.6903487429034875e-06, "loss": 0.7386, "step": 5335 }, { "epoch": 0.15579107179352428, "grad_norm": 0.8691037959142881, "learning_rate": 4.6901865369018655e-06, "loss": 0.7423, "step": 5336 }, { "epoch": 0.15582026802137164, "grad_norm": 0.715818846120503, "learning_rate": 4.6900243309002435e-06, "loss": 0.6622, "step": 5337 }, { "epoch": 0.155849464249219, "grad_norm": 0.7652293858992641, "learning_rate": 4.6898621248986215e-06, "loss": 0.7309, "step": 5338 }, { "epoch": 0.15587866047706636, "grad_norm": 0.6966890418955122, "learning_rate": 4.6896999188969995e-06, "loss": 0.6082, "step": 5339 }, { "epoch": 0.15590785670491372, "grad_norm": 0.7525752451921803, "learning_rate": 4.6895377128953775e-06, "loss": 0.6043, "step": 5340 }, { "epoch": 0.15593705293276108, "grad_norm": 0.766018254088648, "learning_rate": 4.6893755068937555e-06, "loss": 0.6842, "step": 5341 }, { "epoch": 0.15596624916060844, "grad_norm": 0.7397295927010084, "learning_rate": 4.6892133008921335e-06, "loss": 0.7262, "step": 5342 }, { "epoch": 0.1559954453884558, "grad_norm": 0.8434440504189559, "learning_rate": 4.6890510948905115e-06, "loss": 0.7688, "step": 5343 }, { "epoch": 0.15602464161630317, "grad_norm": 0.8713271676224628, "learning_rate": 4.6888888888888895e-06, "loss": 0.7527, "step": 5344 }, { "epoch": 0.15605383784415053, "grad_norm": 0.7498234831561331, "learning_rate": 4.6887266828872676e-06, "loss": 0.6663, "step": 5345 }, { "epoch": 0.1560830340719979, "grad_norm": 0.8104279408311409, "learning_rate": 4.6885644768856456e-06, "loss": 0.7072, "step": 5346 }, { "epoch": 0.15611223029984525, "grad_norm": 0.8523461048827913, "learning_rate": 4.6884022708840236e-06, "loss": 0.7696, "step": 5347 }, { "epoch": 0.1561414265276926, "grad_norm": 0.711902956376961, "learning_rate": 4.688240064882401e-06, "loss": 0.6018, "step": 5348 }, { "epoch": 0.15617062275553997, "grad_norm": 0.9980798493827885, "learning_rate": 4.688077858880779e-06, "loss": 0.6135, "step": 5349 }, { "epoch": 0.15619981898338733, "grad_norm": 0.7637756074840918, "learning_rate": 4.687915652879157e-06, "loss": 0.7059, "step": 5350 }, { "epoch": 0.1562290152112347, "grad_norm": 0.8355816165687172, "learning_rate": 4.687753446877535e-06, "loss": 0.692, "step": 5351 }, { "epoch": 0.15625821143908208, "grad_norm": 0.9826775461103192, "learning_rate": 4.687591240875913e-06, "loss": 0.752, "step": 5352 }, { "epoch": 0.15628740766692945, "grad_norm": 0.742480938006079, "learning_rate": 4.687429034874291e-06, "loss": 0.6723, "step": 5353 }, { "epoch": 0.1563166038947768, "grad_norm": 0.6863380681794556, "learning_rate": 4.687266828872669e-06, "loss": 0.6049, "step": 5354 }, { "epoch": 0.15634580012262417, "grad_norm": 0.7592188034622127, "learning_rate": 4.687104622871047e-06, "loss": 0.6365, "step": 5355 }, { "epoch": 0.15637499635047153, "grad_norm": 0.802860709300532, "learning_rate": 4.686942416869425e-06, "loss": 0.6983, "step": 5356 }, { "epoch": 0.1564041925783189, "grad_norm": 0.7735975640855909, "learning_rate": 4.686780210867802e-06, "loss": 0.6485, "step": 5357 }, { "epoch": 0.15643338880616625, "grad_norm": 0.8659587175577254, "learning_rate": 4.68661800486618e-06, "loss": 0.7756, "step": 5358 }, { "epoch": 0.1564625850340136, "grad_norm": 0.769410634881755, "learning_rate": 4.686455798864558e-06, "loss": 0.6759, "step": 5359 }, { "epoch": 0.15649178126186097, "grad_norm": 0.748553725894408, "learning_rate": 4.686293592862936e-06, "loss": 0.6844, "step": 5360 }, { "epoch": 0.15652097748970834, "grad_norm": 0.7824416801545367, "learning_rate": 4.686131386861315e-06, "loss": 0.7096, "step": 5361 }, { "epoch": 0.1565501737175557, "grad_norm": 0.7360919709402584, "learning_rate": 4.685969180859693e-06, "loss": 0.6697, "step": 5362 }, { "epoch": 0.15657936994540306, "grad_norm": 0.8268226349256939, "learning_rate": 4.68580697485807e-06, "loss": 0.7146, "step": 5363 }, { "epoch": 0.15660856617325042, "grad_norm": 0.7351386190522563, "learning_rate": 4.685644768856448e-06, "loss": 0.7042, "step": 5364 }, { "epoch": 0.15663776240109778, "grad_norm": 0.692767780146968, "learning_rate": 4.685482562854826e-06, "loss": 0.6027, "step": 5365 }, { "epoch": 0.15666695862894514, "grad_norm": 0.7293065000164852, "learning_rate": 4.685320356853204e-06, "loss": 0.6129, "step": 5366 }, { "epoch": 0.1566961548567925, "grad_norm": 0.7457827897385173, "learning_rate": 4.685158150851582e-06, "loss": 0.733, "step": 5367 }, { "epoch": 0.15672535108463986, "grad_norm": 0.6695952021007167, "learning_rate": 4.68499594484996e-06, "loss": 0.5641, "step": 5368 }, { "epoch": 0.15675454731248722, "grad_norm": 0.8090234529589231, "learning_rate": 4.684833738848338e-06, "loss": 0.8612, "step": 5369 }, { "epoch": 0.15678374354033459, "grad_norm": 0.7800307851371383, "learning_rate": 4.684671532846716e-06, "loss": 0.7019, "step": 5370 }, { "epoch": 0.15681293976818195, "grad_norm": 0.747536853762417, "learning_rate": 4.684509326845094e-06, "loss": 0.6546, "step": 5371 }, { "epoch": 0.1568421359960293, "grad_norm": 0.6632950006521009, "learning_rate": 4.684347120843471e-06, "loss": 0.5619, "step": 5372 }, { "epoch": 0.15687133222387667, "grad_norm": 0.762013025460895, "learning_rate": 4.684184914841849e-06, "loss": 0.7282, "step": 5373 }, { "epoch": 0.15690052845172403, "grad_norm": 0.723605246226589, "learning_rate": 4.684022708840227e-06, "loss": 0.649, "step": 5374 }, { "epoch": 0.1569297246795714, "grad_norm": 0.9651106362525314, "learning_rate": 4.683860502838605e-06, "loss": 0.766, "step": 5375 }, { "epoch": 0.15695892090741875, "grad_norm": 0.723660960776776, "learning_rate": 4.683698296836983e-06, "loss": 0.5886, "step": 5376 }, { "epoch": 0.15698811713526611, "grad_norm": 0.7348233815738995, "learning_rate": 4.683536090835361e-06, "loss": 0.6948, "step": 5377 }, { "epoch": 0.15701731336311348, "grad_norm": 0.7613742470393752, "learning_rate": 4.683373884833739e-06, "loss": 0.6758, "step": 5378 }, { "epoch": 0.15704650959096084, "grad_norm": 0.7488765138965952, "learning_rate": 4.683211678832117e-06, "loss": 0.6695, "step": 5379 }, { "epoch": 0.1570757058188082, "grad_norm": 0.7688867055802696, "learning_rate": 4.683049472830495e-06, "loss": 0.7385, "step": 5380 }, { "epoch": 0.15710490204665556, "grad_norm": 0.7200388931379382, "learning_rate": 4.682887266828873e-06, "loss": 0.6508, "step": 5381 }, { "epoch": 0.15713409827450295, "grad_norm": 0.7744800678320733, "learning_rate": 4.682725060827251e-06, "loss": 0.709, "step": 5382 }, { "epoch": 0.1571632945023503, "grad_norm": 0.7483884727302516, "learning_rate": 4.682562854825629e-06, "loss": 0.7224, "step": 5383 }, { "epoch": 0.15719249073019767, "grad_norm": 0.7654745121753904, "learning_rate": 4.682400648824007e-06, "loss": 0.6614, "step": 5384 }, { "epoch": 0.15722168695804503, "grad_norm": 0.7180526491165907, "learning_rate": 4.682238442822385e-06, "loss": 0.6177, "step": 5385 }, { "epoch": 0.1572508831858924, "grad_norm": 0.7711183555475856, "learning_rate": 4.6820762368207624e-06, "loss": 0.6957, "step": 5386 }, { "epoch": 0.15728007941373975, "grad_norm": 0.7353707210759941, "learning_rate": 4.6819140308191405e-06, "loss": 0.6815, "step": 5387 }, { "epoch": 0.15730927564158712, "grad_norm": 0.7671706507042448, "learning_rate": 4.6817518248175185e-06, "loss": 0.706, "step": 5388 }, { "epoch": 0.15733847186943448, "grad_norm": 0.7691986907210777, "learning_rate": 4.6815896188158965e-06, "loss": 0.708, "step": 5389 }, { "epoch": 0.15736766809728184, "grad_norm": 0.8034475801983545, "learning_rate": 4.6814274128142745e-06, "loss": 0.6949, "step": 5390 }, { "epoch": 0.1573968643251292, "grad_norm": 0.7444674183525123, "learning_rate": 4.6812652068126525e-06, "loss": 0.6484, "step": 5391 }, { "epoch": 0.15742606055297656, "grad_norm": 0.9305381955612002, "learning_rate": 4.6811030008110305e-06, "loss": 0.6742, "step": 5392 }, { "epoch": 0.15745525678082392, "grad_norm": 0.830686219644087, "learning_rate": 4.6809407948094085e-06, "loss": 0.7189, "step": 5393 }, { "epoch": 0.15748445300867128, "grad_norm": 0.7592252836360889, "learning_rate": 4.6807785888077865e-06, "loss": 0.7339, "step": 5394 }, { "epoch": 0.15751364923651864, "grad_norm": 0.7440465035626803, "learning_rate": 4.680616382806164e-06, "loss": 0.5576, "step": 5395 }, { "epoch": 0.157542845464366, "grad_norm": 0.8552105283361732, "learning_rate": 4.680454176804542e-06, "loss": 0.7305, "step": 5396 }, { "epoch": 0.15757204169221337, "grad_norm": 0.7493925108643325, "learning_rate": 4.68029197080292e-06, "loss": 0.7031, "step": 5397 }, { "epoch": 0.15760123792006073, "grad_norm": 0.7793534629718195, "learning_rate": 4.680129764801298e-06, "loss": 0.672, "step": 5398 }, { "epoch": 0.1576304341479081, "grad_norm": 0.8468379714013878, "learning_rate": 4.6799675587996765e-06, "loss": 0.7412, "step": 5399 }, { "epoch": 0.15765963037575545, "grad_norm": 0.7452839395958739, "learning_rate": 4.6798053527980545e-06, "loss": 0.6971, "step": 5400 }, { "epoch": 0.1576888266036028, "grad_norm": 0.8633561708534729, "learning_rate": 4.679643146796432e-06, "loss": 0.8142, "step": 5401 }, { "epoch": 0.15771802283145017, "grad_norm": 0.7581045619274974, "learning_rate": 4.67948094079481e-06, "loss": 0.6533, "step": 5402 }, { "epoch": 0.15774721905929753, "grad_norm": 0.7628802258427096, "learning_rate": 4.679318734793188e-06, "loss": 0.6474, "step": 5403 }, { "epoch": 0.1577764152871449, "grad_norm": 0.7458793816297281, "learning_rate": 4.679156528791566e-06, "loss": 0.6112, "step": 5404 }, { "epoch": 0.15780561151499226, "grad_norm": 0.7356927253827749, "learning_rate": 4.678994322789944e-06, "loss": 0.6588, "step": 5405 }, { "epoch": 0.15783480774283962, "grad_norm": 0.8001447473511856, "learning_rate": 4.678832116788322e-06, "loss": 0.7774, "step": 5406 }, { "epoch": 0.15786400397068698, "grad_norm": 0.8937439281543786, "learning_rate": 4.6786699107867e-06, "loss": 0.77, "step": 5407 }, { "epoch": 0.15789320019853434, "grad_norm": 0.7854756248314247, "learning_rate": 4.678507704785078e-06, "loss": 0.7292, "step": 5408 }, { "epoch": 0.1579223964263817, "grad_norm": 0.8003218711132412, "learning_rate": 4.678345498783456e-06, "loss": 0.7782, "step": 5409 }, { "epoch": 0.15795159265422906, "grad_norm": 0.700028820092892, "learning_rate": 4.678183292781833e-06, "loss": 0.5655, "step": 5410 }, { "epoch": 0.15798078888207642, "grad_norm": 0.7479019013240567, "learning_rate": 4.678021086780211e-06, "loss": 0.6733, "step": 5411 }, { "epoch": 0.15800998510992378, "grad_norm": 0.743221187936617, "learning_rate": 4.677858880778589e-06, "loss": 0.6754, "step": 5412 }, { "epoch": 0.15803918133777117, "grad_norm": 0.8461774046984492, "learning_rate": 4.677696674776967e-06, "loss": 0.7183, "step": 5413 }, { "epoch": 0.15806837756561853, "grad_norm": 0.7490476450260831, "learning_rate": 4.677534468775345e-06, "loss": 0.6367, "step": 5414 }, { "epoch": 0.1580975737934659, "grad_norm": 0.7983042794403507, "learning_rate": 4.677372262773723e-06, "loss": 0.7399, "step": 5415 }, { "epoch": 0.15812677002131326, "grad_norm": 0.7693278318857205, "learning_rate": 4.677210056772101e-06, "loss": 0.695, "step": 5416 }, { "epoch": 0.15815596624916062, "grad_norm": 0.7388344750753402, "learning_rate": 4.677047850770479e-06, "loss": 0.6501, "step": 5417 }, { "epoch": 0.15818516247700798, "grad_norm": 0.7288759460653256, "learning_rate": 4.676885644768857e-06, "loss": 0.6735, "step": 5418 }, { "epoch": 0.15821435870485534, "grad_norm": 0.8422190594026848, "learning_rate": 4.676723438767235e-06, "loss": 0.7617, "step": 5419 }, { "epoch": 0.1582435549327027, "grad_norm": 0.7339211725046089, "learning_rate": 4.676561232765613e-06, "loss": 0.6826, "step": 5420 }, { "epoch": 0.15827275116055006, "grad_norm": 0.7418963075622851, "learning_rate": 4.676399026763991e-06, "loss": 0.694, "step": 5421 }, { "epoch": 0.15830194738839742, "grad_norm": 0.9149920408650276, "learning_rate": 4.676236820762369e-06, "loss": 0.7611, "step": 5422 }, { "epoch": 0.15833114361624478, "grad_norm": 0.7926221312954767, "learning_rate": 4.676074614760747e-06, "loss": 0.7823, "step": 5423 }, { "epoch": 0.15836033984409215, "grad_norm": 0.8080887937810226, "learning_rate": 4.675912408759124e-06, "loss": 0.7541, "step": 5424 }, { "epoch": 0.1583895360719395, "grad_norm": 0.7485905533866354, "learning_rate": 4.675750202757502e-06, "loss": 0.6829, "step": 5425 }, { "epoch": 0.15841873229978687, "grad_norm": 0.7529115889472431, "learning_rate": 4.67558799675588e-06, "loss": 0.6667, "step": 5426 }, { "epoch": 0.15844792852763423, "grad_norm": 0.7470311436237679, "learning_rate": 4.675425790754258e-06, "loss": 0.7097, "step": 5427 }, { "epoch": 0.1584771247554816, "grad_norm": 0.7443855874509291, "learning_rate": 4.675263584752636e-06, "loss": 0.6249, "step": 5428 }, { "epoch": 0.15850632098332895, "grad_norm": 0.9501892836880493, "learning_rate": 4.675101378751014e-06, "loss": 0.6582, "step": 5429 }, { "epoch": 0.1585355172111763, "grad_norm": 0.7346302861770821, "learning_rate": 4.674939172749392e-06, "loss": 0.6537, "step": 5430 }, { "epoch": 0.15856471343902367, "grad_norm": 0.7858770142104569, "learning_rate": 4.67477696674777e-06, "loss": 0.658, "step": 5431 }, { "epoch": 0.15859390966687104, "grad_norm": 0.7702700432601783, "learning_rate": 4.674614760746148e-06, "loss": 0.6846, "step": 5432 }, { "epoch": 0.1586231058947184, "grad_norm": 0.8129001429819075, "learning_rate": 4.674452554744525e-06, "loss": 0.7879, "step": 5433 }, { "epoch": 0.15865230212256576, "grad_norm": 0.7247081451909747, "learning_rate": 4.674290348742903e-06, "loss": 0.6284, "step": 5434 }, { "epoch": 0.15868149835041312, "grad_norm": 0.7528899842943702, "learning_rate": 4.674128142741281e-06, "loss": 0.7402, "step": 5435 }, { "epoch": 0.15871069457826048, "grad_norm": 0.7138092379983555, "learning_rate": 4.673965936739659e-06, "loss": 0.6318, "step": 5436 }, { "epoch": 0.15873989080610784, "grad_norm": 0.7479456865258995, "learning_rate": 4.673803730738038e-06, "loss": 0.731, "step": 5437 }, { "epoch": 0.1587690870339552, "grad_norm": 0.8282495577457285, "learning_rate": 4.673641524736416e-06, "loss": 0.6758, "step": 5438 }, { "epoch": 0.15879828326180256, "grad_norm": 0.7567185565343498, "learning_rate": 4.673479318734793e-06, "loss": 0.7534, "step": 5439 }, { "epoch": 0.15882747948964993, "grad_norm": 0.7885171266662618, "learning_rate": 4.6733171127331714e-06, "loss": 0.6972, "step": 5440 }, { "epoch": 0.1588566757174973, "grad_norm": 0.7640825338599324, "learning_rate": 4.6731549067315494e-06, "loss": 0.6842, "step": 5441 }, { "epoch": 0.15888587194534465, "grad_norm": 0.7166481961823682, "learning_rate": 4.6729927007299274e-06, "loss": 0.6033, "step": 5442 }, { "epoch": 0.15891506817319204, "grad_norm": 0.7197116973159613, "learning_rate": 4.6728304947283054e-06, "loss": 0.664, "step": 5443 }, { "epoch": 0.1589442644010394, "grad_norm": 0.7674614590913859, "learning_rate": 4.6726682887266835e-06, "loss": 0.6925, "step": 5444 }, { "epoch": 0.15897346062888676, "grad_norm": 0.7479436194669755, "learning_rate": 4.6725060827250615e-06, "loss": 0.7153, "step": 5445 }, { "epoch": 0.15900265685673412, "grad_norm": 0.8950259002727813, "learning_rate": 4.6723438767234395e-06, "loss": 0.6463, "step": 5446 }, { "epoch": 0.15903185308458148, "grad_norm": 0.7541040675639169, "learning_rate": 4.6721816707218175e-06, "loss": 0.6758, "step": 5447 }, { "epoch": 0.15906104931242884, "grad_norm": 0.8153570174096987, "learning_rate": 4.672019464720195e-06, "loss": 0.8214, "step": 5448 }, { "epoch": 0.1590902455402762, "grad_norm": 0.8145444939286013, "learning_rate": 4.671857258718573e-06, "loss": 0.7916, "step": 5449 }, { "epoch": 0.15911944176812357, "grad_norm": 0.7416424339509792, "learning_rate": 4.671695052716951e-06, "loss": 0.647, "step": 5450 }, { "epoch": 0.15914863799597093, "grad_norm": 0.7109612982051993, "learning_rate": 4.671532846715329e-06, "loss": 0.6692, "step": 5451 }, { "epoch": 0.1591778342238183, "grad_norm": 0.7598064937631339, "learning_rate": 4.671370640713707e-06, "loss": 0.6949, "step": 5452 }, { "epoch": 0.15920703045166565, "grad_norm": 0.696349151199726, "learning_rate": 4.671208434712085e-06, "loss": 0.5837, "step": 5453 }, { "epoch": 0.159236226679513, "grad_norm": 0.737930191899615, "learning_rate": 4.671046228710463e-06, "loss": 0.6762, "step": 5454 }, { "epoch": 0.15926542290736037, "grad_norm": 0.8261640213212021, "learning_rate": 4.670884022708841e-06, "loss": 0.7548, "step": 5455 }, { "epoch": 0.15929461913520773, "grad_norm": 0.7294142569707067, "learning_rate": 4.670721816707219e-06, "loss": 0.6429, "step": 5456 }, { "epoch": 0.1593238153630551, "grad_norm": 0.8084738830396249, "learning_rate": 4.670559610705597e-06, "loss": 0.7394, "step": 5457 }, { "epoch": 0.15935301159090245, "grad_norm": 0.7686992752610847, "learning_rate": 4.670397404703975e-06, "loss": 0.6926, "step": 5458 }, { "epoch": 0.15938220781874982, "grad_norm": 0.7161874859517913, "learning_rate": 4.670235198702353e-06, "loss": 0.6824, "step": 5459 }, { "epoch": 0.15941140404659718, "grad_norm": 0.8030215541236025, "learning_rate": 4.670072992700731e-06, "loss": 0.8314, "step": 5460 }, { "epoch": 0.15944060027444454, "grad_norm": 0.7939968581468191, "learning_rate": 4.669910786699109e-06, "loss": 0.7684, "step": 5461 }, { "epoch": 0.1594697965022919, "grad_norm": 0.7730644639654387, "learning_rate": 4.669748580697486e-06, "loss": 0.7043, "step": 5462 }, { "epoch": 0.15949899273013926, "grad_norm": 0.723507048218302, "learning_rate": 4.669586374695864e-06, "loss": 0.6576, "step": 5463 }, { "epoch": 0.15952818895798662, "grad_norm": 0.7827203717651274, "learning_rate": 4.669424168694242e-06, "loss": 0.7341, "step": 5464 }, { "epoch": 0.15955738518583398, "grad_norm": 0.8719717990313841, "learning_rate": 4.66926196269262e-06, "loss": 0.7463, "step": 5465 }, { "epoch": 0.15958658141368134, "grad_norm": 0.7514175692066012, "learning_rate": 4.669099756690998e-06, "loss": 0.6566, "step": 5466 }, { "epoch": 0.1596157776415287, "grad_norm": 0.7658235608102543, "learning_rate": 4.668937550689376e-06, "loss": 0.7144, "step": 5467 }, { "epoch": 0.15964497386937607, "grad_norm": 0.7645396570332175, "learning_rate": 4.668775344687754e-06, "loss": 0.6744, "step": 5468 }, { "epoch": 0.15967417009722343, "grad_norm": 0.8522214470987319, "learning_rate": 4.668613138686132e-06, "loss": 0.8132, "step": 5469 }, { "epoch": 0.1597033663250708, "grad_norm": 0.7643746796744577, "learning_rate": 4.66845093268451e-06, "loss": 0.7143, "step": 5470 }, { "epoch": 0.15973256255291815, "grad_norm": 0.8250345141393055, "learning_rate": 4.668288726682887e-06, "loss": 0.8302, "step": 5471 }, { "epoch": 0.1597617587807655, "grad_norm": 0.8060173790475205, "learning_rate": 4.668126520681265e-06, "loss": 0.7045, "step": 5472 }, { "epoch": 0.1597909550086129, "grad_norm": 0.7106312901045801, "learning_rate": 4.667964314679643e-06, "loss": 0.6483, "step": 5473 }, { "epoch": 0.15982015123646026, "grad_norm": 0.7605318794646484, "learning_rate": 4.667802108678021e-06, "loss": 0.6329, "step": 5474 }, { "epoch": 0.15984934746430762, "grad_norm": 0.8008111927613087, "learning_rate": 4.6676399026764e-06, "loss": 0.7831, "step": 5475 }, { "epoch": 0.15987854369215498, "grad_norm": 0.7734018159133224, "learning_rate": 4.667477696674778e-06, "loss": 0.708, "step": 5476 }, { "epoch": 0.15990773992000235, "grad_norm": 0.8016487525222473, "learning_rate": 4.667315490673155e-06, "loss": 0.7428, "step": 5477 }, { "epoch": 0.1599369361478497, "grad_norm": 0.872230757955062, "learning_rate": 4.667153284671533e-06, "loss": 0.6944, "step": 5478 }, { "epoch": 0.15996613237569707, "grad_norm": 0.7733816159065061, "learning_rate": 4.666991078669911e-06, "loss": 0.7289, "step": 5479 }, { "epoch": 0.15999532860354443, "grad_norm": 0.7435430009343651, "learning_rate": 4.666828872668289e-06, "loss": 0.6676, "step": 5480 }, { "epoch": 0.1600245248313918, "grad_norm": 0.742028041679998, "learning_rate": 4.666666666666667e-06, "loss": 0.7125, "step": 5481 }, { "epoch": 0.16005372105923915, "grad_norm": 0.7937292627236452, "learning_rate": 4.666504460665045e-06, "loss": 0.7385, "step": 5482 }, { "epoch": 0.1600829172870865, "grad_norm": 0.7608371346319188, "learning_rate": 4.666342254663423e-06, "loss": 0.6876, "step": 5483 }, { "epoch": 0.16011211351493387, "grad_norm": 0.8716090246909467, "learning_rate": 4.666180048661801e-06, "loss": 0.8172, "step": 5484 }, { "epoch": 0.16014130974278123, "grad_norm": 0.7119815541476489, "learning_rate": 4.666017842660178e-06, "loss": 0.6102, "step": 5485 }, { "epoch": 0.1601705059706286, "grad_norm": 0.976341436550479, "learning_rate": 4.665855636658556e-06, "loss": 0.693, "step": 5486 }, { "epoch": 0.16019970219847596, "grad_norm": 0.7235716220321343, "learning_rate": 4.665693430656934e-06, "loss": 0.6031, "step": 5487 }, { "epoch": 0.16022889842632332, "grad_norm": 0.7396009270336925, "learning_rate": 4.665531224655312e-06, "loss": 0.67, "step": 5488 }, { "epoch": 0.16025809465417068, "grad_norm": 0.8124645673244169, "learning_rate": 4.66536901865369e-06, "loss": 0.7498, "step": 5489 }, { "epoch": 0.16028729088201804, "grad_norm": 0.7648416535739242, "learning_rate": 4.665206812652068e-06, "loss": 0.7241, "step": 5490 }, { "epoch": 0.1603164871098654, "grad_norm": 0.7590978838082538, "learning_rate": 4.665044606650446e-06, "loss": 0.7323, "step": 5491 }, { "epoch": 0.16034568333771276, "grad_norm": 0.9011424948897071, "learning_rate": 4.664882400648824e-06, "loss": 0.8578, "step": 5492 }, { "epoch": 0.16037487956556012, "grad_norm": 0.8244057196057708, "learning_rate": 4.664720194647202e-06, "loss": 0.716, "step": 5493 }, { "epoch": 0.16040407579340749, "grad_norm": 0.7803005727964248, "learning_rate": 4.66455798864558e-06, "loss": 0.7458, "step": 5494 }, { "epoch": 0.16043327202125485, "grad_norm": 1.370055794490399, "learning_rate": 4.664395782643958e-06, "loss": 0.7434, "step": 5495 }, { "epoch": 0.1604624682491022, "grad_norm": 0.7653398138844688, "learning_rate": 4.664233576642336e-06, "loss": 0.6215, "step": 5496 }, { "epoch": 0.16049166447694957, "grad_norm": 0.7902826290258561, "learning_rate": 4.664071370640714e-06, "loss": 0.7998, "step": 5497 }, { "epoch": 0.16052086070479693, "grad_norm": 0.7558243478372285, "learning_rate": 4.6639091646390924e-06, "loss": 0.7256, "step": 5498 }, { "epoch": 0.1605500569326443, "grad_norm": 0.7714385815932562, "learning_rate": 4.6637469586374704e-06, "loss": 0.6041, "step": 5499 }, { "epoch": 0.16057925316049165, "grad_norm": 0.7980913345618632, "learning_rate": 4.663584752635848e-06, "loss": 0.7082, "step": 5500 }, { "epoch": 0.16060844938833901, "grad_norm": 0.791899856304642, "learning_rate": 4.663422546634226e-06, "loss": 0.7505, "step": 5501 }, { "epoch": 0.16063764561618638, "grad_norm": 0.7685556528970621, "learning_rate": 4.663260340632604e-06, "loss": 0.7411, "step": 5502 }, { "epoch": 0.16066684184403376, "grad_norm": 0.92094238565102, "learning_rate": 4.663098134630982e-06, "loss": 0.7374, "step": 5503 }, { "epoch": 0.16069603807188113, "grad_norm": 0.6576332149452998, "learning_rate": 4.66293592862936e-06, "loss": 0.5474, "step": 5504 }, { "epoch": 0.1607252342997285, "grad_norm": 0.712028227295157, "learning_rate": 4.662773722627738e-06, "loss": 0.6179, "step": 5505 }, { "epoch": 0.16075443052757585, "grad_norm": 0.7334048947362144, "learning_rate": 4.662611516626116e-06, "loss": 0.6736, "step": 5506 }, { "epoch": 0.1607836267554232, "grad_norm": 0.7748465528330407, "learning_rate": 4.662449310624494e-06, "loss": 0.7056, "step": 5507 }, { "epoch": 0.16081282298327057, "grad_norm": 0.7397046013643075, "learning_rate": 4.662287104622872e-06, "loss": 0.6508, "step": 5508 }, { "epoch": 0.16084201921111793, "grad_norm": 0.7451973779775191, "learning_rate": 4.662124898621249e-06, "loss": 0.6864, "step": 5509 }, { "epoch": 0.1608712154389653, "grad_norm": 0.7934114592421234, "learning_rate": 4.661962692619627e-06, "loss": 0.6913, "step": 5510 }, { "epoch": 0.16090041166681265, "grad_norm": 0.8633060729013167, "learning_rate": 4.661800486618005e-06, "loss": 0.8422, "step": 5511 }, { "epoch": 0.16092960789466002, "grad_norm": 0.6999072089174094, "learning_rate": 4.661638280616384e-06, "loss": 0.6231, "step": 5512 }, { "epoch": 0.16095880412250738, "grad_norm": 0.7967882888912564, "learning_rate": 4.661476074614762e-06, "loss": 0.7309, "step": 5513 }, { "epoch": 0.16098800035035474, "grad_norm": 0.7551642004604726, "learning_rate": 4.66131386861314e-06, "loss": 0.6384, "step": 5514 }, { "epoch": 0.1610171965782021, "grad_norm": 0.7250748700000541, "learning_rate": 4.661151662611517e-06, "loss": 0.6677, "step": 5515 }, { "epoch": 0.16104639280604946, "grad_norm": 0.7554121620129566, "learning_rate": 4.660989456609895e-06, "loss": 0.7683, "step": 5516 }, { "epoch": 0.16107558903389682, "grad_norm": 0.6844449069695803, "learning_rate": 4.660827250608273e-06, "loss": 0.6166, "step": 5517 }, { "epoch": 0.16110478526174418, "grad_norm": 0.7479969877690356, "learning_rate": 4.660665044606651e-06, "loss": 0.7143, "step": 5518 }, { "epoch": 0.16113398148959154, "grad_norm": 0.7015933002162749, "learning_rate": 4.660502838605029e-06, "loss": 0.6856, "step": 5519 }, { "epoch": 0.1611631777174389, "grad_norm": 0.8091817196872084, "learning_rate": 4.660340632603407e-06, "loss": 0.7267, "step": 5520 }, { "epoch": 0.16119237394528627, "grad_norm": 0.7973838928100124, "learning_rate": 4.660178426601785e-06, "loss": 0.744, "step": 5521 }, { "epoch": 0.16122157017313363, "grad_norm": 1.248685772201201, "learning_rate": 4.660016220600163e-06, "loss": 0.8372, "step": 5522 }, { "epoch": 0.161250766400981, "grad_norm": 0.7894270961176195, "learning_rate": 4.65985401459854e-06, "loss": 0.7456, "step": 5523 }, { "epoch": 0.16127996262882835, "grad_norm": 0.8064802757933995, "learning_rate": 4.659691808596918e-06, "loss": 0.7503, "step": 5524 }, { "epoch": 0.1613091588566757, "grad_norm": 0.7661905617880552, "learning_rate": 4.659529602595296e-06, "loss": 0.7395, "step": 5525 }, { "epoch": 0.16133835508452307, "grad_norm": 0.9449297689247647, "learning_rate": 4.659367396593674e-06, "loss": 0.7042, "step": 5526 }, { "epoch": 0.16136755131237043, "grad_norm": 0.7347456892854122, "learning_rate": 4.659205190592052e-06, "loss": 0.6631, "step": 5527 }, { "epoch": 0.1613967475402178, "grad_norm": 0.7148279845987094, "learning_rate": 4.65904298459043e-06, "loss": 0.6595, "step": 5528 }, { "epoch": 0.16142594376806516, "grad_norm": 0.7779399374606194, "learning_rate": 4.658880778588808e-06, "loss": 0.6988, "step": 5529 }, { "epoch": 0.16145513999591252, "grad_norm": 0.7816869602479098, "learning_rate": 4.658718572587186e-06, "loss": 0.722, "step": 5530 }, { "epoch": 0.16148433622375988, "grad_norm": 0.7714979963478588, "learning_rate": 4.658556366585564e-06, "loss": 0.6752, "step": 5531 }, { "epoch": 0.16151353245160724, "grad_norm": 0.7048869476731495, "learning_rate": 4.658394160583942e-06, "loss": 0.6776, "step": 5532 }, { "epoch": 0.16154272867945463, "grad_norm": 0.7538691094313231, "learning_rate": 4.65823195458232e-06, "loss": 0.726, "step": 5533 }, { "epoch": 0.161571924907302, "grad_norm": 0.822307295122036, "learning_rate": 4.658069748580698e-06, "loss": 0.7229, "step": 5534 }, { "epoch": 0.16160112113514935, "grad_norm": 0.7229146389738741, "learning_rate": 4.657907542579076e-06, "loss": 0.6339, "step": 5535 }, { "epoch": 0.1616303173629967, "grad_norm": 0.8987066596371203, "learning_rate": 4.657745336577454e-06, "loss": 0.679, "step": 5536 }, { "epoch": 0.16165951359084407, "grad_norm": 0.785369549253533, "learning_rate": 4.657583130575832e-06, "loss": 0.7571, "step": 5537 }, { "epoch": 0.16168870981869143, "grad_norm": 0.7530382862507219, "learning_rate": 4.657420924574209e-06, "loss": 0.6584, "step": 5538 }, { "epoch": 0.1617179060465388, "grad_norm": 0.7532340144093592, "learning_rate": 4.657258718572587e-06, "loss": 0.6658, "step": 5539 }, { "epoch": 0.16174710227438616, "grad_norm": 0.7329430405549413, "learning_rate": 4.657096512570965e-06, "loss": 0.6992, "step": 5540 }, { "epoch": 0.16177629850223352, "grad_norm": 0.7879753503752684, "learning_rate": 4.656934306569343e-06, "loss": 0.7293, "step": 5541 }, { "epoch": 0.16180549473008088, "grad_norm": 0.7873881734070599, "learning_rate": 4.656772100567721e-06, "loss": 0.7371, "step": 5542 }, { "epoch": 0.16183469095792824, "grad_norm": 0.7802770639940272, "learning_rate": 4.656609894566099e-06, "loss": 0.7511, "step": 5543 }, { "epoch": 0.1618638871857756, "grad_norm": 0.8525735386135249, "learning_rate": 4.656447688564477e-06, "loss": 0.8042, "step": 5544 }, { "epoch": 0.16189308341362296, "grad_norm": 0.7274684162132277, "learning_rate": 4.656285482562855e-06, "loss": 0.6491, "step": 5545 }, { "epoch": 0.16192227964147032, "grad_norm": 0.6836923543166881, "learning_rate": 4.656123276561233e-06, "loss": 0.5877, "step": 5546 }, { "epoch": 0.16195147586931768, "grad_norm": 0.8019498956092928, "learning_rate": 4.6559610705596105e-06, "loss": 0.7402, "step": 5547 }, { "epoch": 0.16198067209716505, "grad_norm": 0.7283994032850771, "learning_rate": 4.6557988645579885e-06, "loss": 0.6794, "step": 5548 }, { "epoch": 0.1620098683250124, "grad_norm": 0.8900463685449428, "learning_rate": 4.6556366585563665e-06, "loss": 0.633, "step": 5549 }, { "epoch": 0.16203906455285977, "grad_norm": 0.7436982106891367, "learning_rate": 4.655474452554745e-06, "loss": 0.6555, "step": 5550 }, { "epoch": 0.16206826078070713, "grad_norm": 0.7298773034655395, "learning_rate": 4.655312246553123e-06, "loss": 0.6332, "step": 5551 }, { "epoch": 0.1620974570085545, "grad_norm": 0.7760389174505548, "learning_rate": 4.655150040551501e-06, "loss": 0.6539, "step": 5552 }, { "epoch": 0.16212665323640185, "grad_norm": 0.8007222594925005, "learning_rate": 4.6549878345498786e-06, "loss": 0.7335, "step": 5553 }, { "epoch": 0.1621558494642492, "grad_norm": 0.7229414388430883, "learning_rate": 4.6548256285482566e-06, "loss": 0.655, "step": 5554 }, { "epoch": 0.16218504569209657, "grad_norm": 0.6767364503851028, "learning_rate": 4.654663422546635e-06, "loss": 0.6272, "step": 5555 }, { "epoch": 0.16221424191994394, "grad_norm": 0.7141269992736287, "learning_rate": 4.654501216545013e-06, "loss": 0.6014, "step": 5556 }, { "epoch": 0.1622434381477913, "grad_norm": 0.8171376287891366, "learning_rate": 4.654339010543391e-06, "loss": 0.6926, "step": 5557 }, { "epoch": 0.16227263437563866, "grad_norm": 0.7383666425858676, "learning_rate": 4.654176804541769e-06, "loss": 0.682, "step": 5558 }, { "epoch": 0.16230183060348602, "grad_norm": 0.7127564740383895, "learning_rate": 4.654014598540147e-06, "loss": 0.6401, "step": 5559 }, { "epoch": 0.16233102683133338, "grad_norm": 0.7583997254822441, "learning_rate": 4.653852392538525e-06, "loss": 0.7212, "step": 5560 }, { "epoch": 0.16236022305918074, "grad_norm": 0.7236812619557363, "learning_rate": 4.653690186536902e-06, "loss": 0.6754, "step": 5561 }, { "epoch": 0.1623894192870281, "grad_norm": 0.7430463585623219, "learning_rate": 4.65352798053528e-06, "loss": 0.6695, "step": 5562 }, { "epoch": 0.1624186155148755, "grad_norm": 0.7849220254276909, "learning_rate": 4.653365774533658e-06, "loss": 0.682, "step": 5563 }, { "epoch": 0.16244781174272285, "grad_norm": 0.7013275282726713, "learning_rate": 4.653203568532036e-06, "loss": 0.6258, "step": 5564 }, { "epoch": 0.16247700797057021, "grad_norm": 0.7547386472251443, "learning_rate": 4.653041362530414e-06, "loss": 0.7406, "step": 5565 }, { "epoch": 0.16250620419841758, "grad_norm": 0.7891471745483012, "learning_rate": 4.652879156528792e-06, "loss": 0.7349, "step": 5566 }, { "epoch": 0.16253540042626494, "grad_norm": 0.9934582900046545, "learning_rate": 4.65271695052717e-06, "loss": 0.6672, "step": 5567 }, { "epoch": 0.1625645966541123, "grad_norm": 0.8113640196732612, "learning_rate": 4.652554744525548e-06, "loss": 0.7393, "step": 5568 }, { "epoch": 0.16259379288195966, "grad_norm": 0.8469524719293355, "learning_rate": 4.652392538523926e-06, "loss": 0.7418, "step": 5569 }, { "epoch": 0.16262298910980702, "grad_norm": 0.7535703734223618, "learning_rate": 4.652230332522304e-06, "loss": 0.6968, "step": 5570 }, { "epoch": 0.16265218533765438, "grad_norm": 0.7475617083511392, "learning_rate": 4.652068126520682e-06, "loss": 0.6899, "step": 5571 }, { "epoch": 0.16268138156550174, "grad_norm": 0.8346197606394071, "learning_rate": 4.65190592051906e-06, "loss": 0.6821, "step": 5572 }, { "epoch": 0.1627105777933491, "grad_norm": 0.7883593926811369, "learning_rate": 4.651743714517438e-06, "loss": 0.702, "step": 5573 }, { "epoch": 0.16273977402119646, "grad_norm": 0.818375484872594, "learning_rate": 4.651581508515816e-06, "loss": 0.7447, "step": 5574 }, { "epoch": 0.16276897024904383, "grad_norm": 0.7509957625832024, "learning_rate": 4.651419302514194e-06, "loss": 0.6973, "step": 5575 }, { "epoch": 0.1627981664768912, "grad_norm": 0.8843166807286716, "learning_rate": 4.651257096512571e-06, "loss": 0.7073, "step": 5576 }, { "epoch": 0.16282736270473855, "grad_norm": 0.790446997796276, "learning_rate": 4.651094890510949e-06, "loss": 0.7209, "step": 5577 }, { "epoch": 0.1628565589325859, "grad_norm": 0.8260155694754447, "learning_rate": 4.650932684509327e-06, "loss": 0.771, "step": 5578 }, { "epoch": 0.16288575516043327, "grad_norm": 0.7360769194367913, "learning_rate": 4.650770478507705e-06, "loss": 0.7056, "step": 5579 }, { "epoch": 0.16291495138828063, "grad_norm": 0.7012804840575227, "learning_rate": 4.650608272506083e-06, "loss": 0.6174, "step": 5580 }, { "epoch": 0.162944147616128, "grad_norm": 0.758331080072708, "learning_rate": 4.650446066504461e-06, "loss": 0.7366, "step": 5581 }, { "epoch": 0.16297334384397535, "grad_norm": 0.7968342071316151, "learning_rate": 4.650283860502839e-06, "loss": 0.7031, "step": 5582 }, { "epoch": 0.16300254007182272, "grad_norm": 0.7568341404759399, "learning_rate": 4.650121654501217e-06, "loss": 0.7242, "step": 5583 }, { "epoch": 0.16303173629967008, "grad_norm": 0.8280932120719074, "learning_rate": 4.649959448499595e-06, "loss": 0.7607, "step": 5584 }, { "epoch": 0.16306093252751744, "grad_norm": 0.7721066737448342, "learning_rate": 4.649797242497972e-06, "loss": 0.6786, "step": 5585 }, { "epoch": 0.1630901287553648, "grad_norm": 0.8111528725457899, "learning_rate": 4.64963503649635e-06, "loss": 0.7437, "step": 5586 }, { "epoch": 0.16311932498321216, "grad_norm": 0.68942788782518, "learning_rate": 4.649472830494728e-06, "loss": 0.5984, "step": 5587 }, { "epoch": 0.16314852121105952, "grad_norm": 0.7906394281260876, "learning_rate": 4.649310624493107e-06, "loss": 0.7265, "step": 5588 }, { "epoch": 0.16317771743890688, "grad_norm": 0.6656490594656574, "learning_rate": 4.649148418491485e-06, "loss": 0.6335, "step": 5589 }, { "epoch": 0.16320691366675424, "grad_norm": 0.7870727545702081, "learning_rate": 4.648986212489863e-06, "loss": 0.7177, "step": 5590 }, { "epoch": 0.1632361098946016, "grad_norm": 0.7208127185087391, "learning_rate": 4.64882400648824e-06, "loss": 0.5978, "step": 5591 }, { "epoch": 0.16326530612244897, "grad_norm": 0.7857717775520344, "learning_rate": 4.648661800486618e-06, "loss": 0.7215, "step": 5592 }, { "epoch": 0.16329450235029636, "grad_norm": 0.7140274377294797, "learning_rate": 4.648499594484996e-06, "loss": 0.6212, "step": 5593 }, { "epoch": 0.16332369857814372, "grad_norm": 0.7142236950393943, "learning_rate": 4.648337388483374e-06, "loss": 0.6688, "step": 5594 }, { "epoch": 0.16335289480599108, "grad_norm": 0.7640620534385176, "learning_rate": 4.648175182481752e-06, "loss": 0.7005, "step": 5595 }, { "epoch": 0.16338209103383844, "grad_norm": 0.7488951856796167, "learning_rate": 4.64801297648013e-06, "loss": 0.6299, "step": 5596 }, { "epoch": 0.1634112872616858, "grad_norm": 0.7930609389466863, "learning_rate": 4.647850770478508e-06, "loss": 0.6582, "step": 5597 }, { "epoch": 0.16344048348953316, "grad_norm": 0.7191226143320583, "learning_rate": 4.647688564476886e-06, "loss": 0.5763, "step": 5598 }, { "epoch": 0.16346967971738052, "grad_norm": 0.7488965906907983, "learning_rate": 4.6475263584752635e-06, "loss": 0.684, "step": 5599 }, { "epoch": 0.16349887594522788, "grad_norm": 0.7312696505201113, "learning_rate": 4.6473641524736415e-06, "loss": 0.6989, "step": 5600 }, { "epoch": 0.16352807217307525, "grad_norm": 0.7323047042680764, "learning_rate": 4.6472019464720195e-06, "loss": 0.6826, "step": 5601 }, { "epoch": 0.1635572684009226, "grad_norm": 0.7626166504334352, "learning_rate": 4.6470397404703975e-06, "loss": 0.76, "step": 5602 }, { "epoch": 0.16358646462876997, "grad_norm": 0.7590802458683971, "learning_rate": 4.6468775344687755e-06, "loss": 0.7131, "step": 5603 }, { "epoch": 0.16361566085661733, "grad_norm": 0.8253369648484739, "learning_rate": 4.6467153284671535e-06, "loss": 0.6382, "step": 5604 }, { "epoch": 0.1636448570844647, "grad_norm": 0.7292272048491235, "learning_rate": 4.6465531224655315e-06, "loss": 0.6317, "step": 5605 }, { "epoch": 0.16367405331231205, "grad_norm": 0.7521582031944107, "learning_rate": 4.6463909164639095e-06, "loss": 0.7373, "step": 5606 }, { "epoch": 0.1637032495401594, "grad_norm": 0.8538637735845103, "learning_rate": 4.6462287104622875e-06, "loss": 0.7115, "step": 5607 }, { "epoch": 0.16373244576800677, "grad_norm": 0.7030623413456417, "learning_rate": 4.6460665044606656e-06, "loss": 0.6423, "step": 5608 }, { "epoch": 0.16376164199585413, "grad_norm": 0.754285247042415, "learning_rate": 4.6459042984590436e-06, "loss": 0.7501, "step": 5609 }, { "epoch": 0.1637908382237015, "grad_norm": 0.697678165634523, "learning_rate": 4.6457420924574216e-06, "loss": 0.6071, "step": 5610 }, { "epoch": 0.16382003445154886, "grad_norm": 0.9118925333162716, "learning_rate": 4.6455798864557996e-06, "loss": 0.8133, "step": 5611 }, { "epoch": 0.16384923067939622, "grad_norm": 1.1335089643311396, "learning_rate": 4.645417680454178e-06, "loss": 0.6838, "step": 5612 }, { "epoch": 0.16387842690724358, "grad_norm": 0.7882286831498603, "learning_rate": 4.645255474452556e-06, "loss": 0.7054, "step": 5613 }, { "epoch": 0.16390762313509094, "grad_norm": 0.7081099481833873, "learning_rate": 4.645093268450933e-06, "loss": 0.6615, "step": 5614 }, { "epoch": 0.1639368193629383, "grad_norm": 0.7644240546976151, "learning_rate": 4.644931062449311e-06, "loss": 0.7171, "step": 5615 }, { "epoch": 0.16396601559078566, "grad_norm": 0.7880082622925946, "learning_rate": 4.644768856447689e-06, "loss": 0.7893, "step": 5616 }, { "epoch": 0.16399521181863302, "grad_norm": 0.7048547614334507, "learning_rate": 4.644606650446067e-06, "loss": 0.6464, "step": 5617 }, { "epoch": 0.16402440804648039, "grad_norm": 0.7596853504867653, "learning_rate": 4.644444444444445e-06, "loss": 0.6996, "step": 5618 }, { "epoch": 0.16405360427432775, "grad_norm": 0.7335051280648737, "learning_rate": 4.644282238442823e-06, "loss": 0.6868, "step": 5619 }, { "epoch": 0.1640828005021751, "grad_norm": 0.9315102569944833, "learning_rate": 4.644120032441201e-06, "loss": 0.6938, "step": 5620 }, { "epoch": 0.16411199673002247, "grad_norm": 0.7768382465520545, "learning_rate": 4.643957826439579e-06, "loss": 0.7449, "step": 5621 }, { "epoch": 0.16414119295786983, "grad_norm": 0.7453754702578981, "learning_rate": 4.643795620437957e-06, "loss": 0.6401, "step": 5622 }, { "epoch": 0.1641703891857172, "grad_norm": 0.7360569093024836, "learning_rate": 4.643633414436334e-06, "loss": 0.6704, "step": 5623 }, { "epoch": 0.16419958541356458, "grad_norm": 0.8322316075306454, "learning_rate": 4.643471208434712e-06, "loss": 0.7133, "step": 5624 }, { "epoch": 0.16422878164141194, "grad_norm": 0.8082284634566076, "learning_rate": 4.64330900243309e-06, "loss": 0.6904, "step": 5625 }, { "epoch": 0.1642579778692593, "grad_norm": 0.7505687535766239, "learning_rate": 4.643146796431469e-06, "loss": 0.767, "step": 5626 }, { "epoch": 0.16428717409710666, "grad_norm": 0.6989427666094096, "learning_rate": 4.642984590429847e-06, "loss": 0.6483, "step": 5627 }, { "epoch": 0.16431637032495403, "grad_norm": 0.7481062831901303, "learning_rate": 4.642822384428224e-06, "loss": 0.6851, "step": 5628 }, { "epoch": 0.1643455665528014, "grad_norm": 0.8058092461226579, "learning_rate": 4.642660178426602e-06, "loss": 0.7913, "step": 5629 }, { "epoch": 0.16437476278064875, "grad_norm": 0.8223444244102442, "learning_rate": 4.64249797242498e-06, "loss": 0.7243, "step": 5630 }, { "epoch": 0.1644039590084961, "grad_norm": 0.7224571962616894, "learning_rate": 4.642335766423358e-06, "loss": 0.6351, "step": 5631 }, { "epoch": 0.16443315523634347, "grad_norm": 0.7624128450000905, "learning_rate": 4.642173560421736e-06, "loss": 0.6456, "step": 5632 }, { "epoch": 0.16446235146419083, "grad_norm": 0.7380695590685346, "learning_rate": 4.642011354420114e-06, "loss": 0.672, "step": 5633 }, { "epoch": 0.1644915476920382, "grad_norm": 0.6942751145903915, "learning_rate": 4.641849148418492e-06, "loss": 0.6248, "step": 5634 }, { "epoch": 0.16452074391988555, "grad_norm": 0.7619083162634237, "learning_rate": 4.64168694241687e-06, "loss": 0.7155, "step": 5635 }, { "epoch": 0.16454994014773291, "grad_norm": 0.7776394515302267, "learning_rate": 4.641524736415248e-06, "loss": 0.7452, "step": 5636 }, { "epoch": 0.16457913637558028, "grad_norm": 0.8562233928306582, "learning_rate": 4.641362530413625e-06, "loss": 0.7571, "step": 5637 }, { "epoch": 0.16460833260342764, "grad_norm": 0.8062138854388715, "learning_rate": 4.641200324412003e-06, "loss": 0.7438, "step": 5638 }, { "epoch": 0.164637528831275, "grad_norm": 0.7024647507320955, "learning_rate": 4.641038118410381e-06, "loss": 0.6139, "step": 5639 }, { "epoch": 0.16466672505912236, "grad_norm": 0.8306700548912979, "learning_rate": 4.640875912408759e-06, "loss": 0.7109, "step": 5640 }, { "epoch": 0.16469592128696972, "grad_norm": 0.7529704783829516, "learning_rate": 4.640713706407137e-06, "loss": 0.7071, "step": 5641 }, { "epoch": 0.16472511751481708, "grad_norm": 0.7571266152711987, "learning_rate": 4.640551500405515e-06, "loss": 0.6343, "step": 5642 }, { "epoch": 0.16475431374266444, "grad_norm": 0.8015062775785052, "learning_rate": 4.640389294403893e-06, "loss": 0.741, "step": 5643 }, { "epoch": 0.1647835099705118, "grad_norm": 0.8402878046549067, "learning_rate": 4.640227088402271e-06, "loss": 0.7099, "step": 5644 }, { "epoch": 0.16481270619835917, "grad_norm": 0.7318765214425264, "learning_rate": 4.640064882400649e-06, "loss": 0.6502, "step": 5645 }, { "epoch": 0.16484190242620653, "grad_norm": 0.7192213549182086, "learning_rate": 4.639902676399027e-06, "loss": 0.6184, "step": 5646 }, { "epoch": 0.1648710986540539, "grad_norm": 0.7184031663794541, "learning_rate": 4.639740470397405e-06, "loss": 0.643, "step": 5647 }, { "epoch": 0.16490029488190125, "grad_norm": 0.7433295163040085, "learning_rate": 4.639578264395783e-06, "loss": 0.5873, "step": 5648 }, { "epoch": 0.1649294911097486, "grad_norm": 0.7567563291953285, "learning_rate": 4.639416058394161e-06, "loss": 0.6176, "step": 5649 }, { "epoch": 0.16495868733759597, "grad_norm": 0.7119424042681148, "learning_rate": 4.639253852392539e-06, "loss": 0.6641, "step": 5650 }, { "epoch": 0.16498788356544333, "grad_norm": 0.8594223056773591, "learning_rate": 4.639091646390917e-06, "loss": 0.7151, "step": 5651 }, { "epoch": 0.1650170797932907, "grad_norm": 0.7742996406064234, "learning_rate": 4.6389294403892945e-06, "loss": 0.7162, "step": 5652 }, { "epoch": 0.16504627602113806, "grad_norm": 0.7482374466182498, "learning_rate": 4.6387672343876725e-06, "loss": 0.5525, "step": 5653 }, { "epoch": 0.16507547224898544, "grad_norm": 0.7293517539256837, "learning_rate": 4.6386050283860505e-06, "loss": 0.694, "step": 5654 }, { "epoch": 0.1651046684768328, "grad_norm": 0.6908914328590967, "learning_rate": 4.6384428223844285e-06, "loss": 0.6172, "step": 5655 }, { "epoch": 0.16513386470468017, "grad_norm": 0.7630531978060305, "learning_rate": 4.6382806163828065e-06, "loss": 0.6722, "step": 5656 }, { "epoch": 0.16516306093252753, "grad_norm": 0.8262087120192916, "learning_rate": 4.6381184103811845e-06, "loss": 0.8067, "step": 5657 }, { "epoch": 0.1651922571603749, "grad_norm": 0.7563951877563719, "learning_rate": 4.6379562043795625e-06, "loss": 0.6429, "step": 5658 }, { "epoch": 0.16522145338822225, "grad_norm": 0.7837266575330432, "learning_rate": 4.6377939983779405e-06, "loss": 0.7187, "step": 5659 }, { "epoch": 0.1652506496160696, "grad_norm": 0.7056919785413749, "learning_rate": 4.6376317923763185e-06, "loss": 0.6337, "step": 5660 }, { "epoch": 0.16527984584391697, "grad_norm": 0.7141749996738221, "learning_rate": 4.637469586374696e-06, "loss": 0.6592, "step": 5661 }, { "epoch": 0.16530904207176433, "grad_norm": 0.7694578316271887, "learning_rate": 4.637307380373074e-06, "loss": 0.6633, "step": 5662 }, { "epoch": 0.1653382382996117, "grad_norm": 0.7903628994983197, "learning_rate": 4.6371451743714525e-06, "loss": 0.7808, "step": 5663 }, { "epoch": 0.16536743452745906, "grad_norm": 0.9966063003227381, "learning_rate": 4.6369829683698305e-06, "loss": 0.8725, "step": 5664 }, { "epoch": 0.16539663075530642, "grad_norm": 0.7652497807858893, "learning_rate": 4.6368207623682086e-06, "loss": 0.6577, "step": 5665 }, { "epoch": 0.16542582698315378, "grad_norm": 0.7742869890831724, "learning_rate": 4.636658556366586e-06, "loss": 0.7208, "step": 5666 }, { "epoch": 0.16545502321100114, "grad_norm": 0.7572503287039524, "learning_rate": 4.636496350364964e-06, "loss": 0.7154, "step": 5667 }, { "epoch": 0.1654842194388485, "grad_norm": 0.7407502384365557, "learning_rate": 4.636334144363342e-06, "loss": 0.6463, "step": 5668 }, { "epoch": 0.16551341566669586, "grad_norm": 0.8017828313195783, "learning_rate": 4.63617193836172e-06, "loss": 0.7219, "step": 5669 }, { "epoch": 0.16554261189454322, "grad_norm": 0.7248382181087298, "learning_rate": 4.636009732360098e-06, "loss": 0.6563, "step": 5670 }, { "epoch": 0.16557180812239058, "grad_norm": 0.7354358091479243, "learning_rate": 4.635847526358476e-06, "loss": 0.6843, "step": 5671 }, { "epoch": 0.16560100435023795, "grad_norm": 0.7270287988161817, "learning_rate": 4.635685320356854e-06, "loss": 0.7116, "step": 5672 }, { "epoch": 0.1656302005780853, "grad_norm": 0.8146266179960178, "learning_rate": 4.635523114355232e-06, "loss": 0.7498, "step": 5673 }, { "epoch": 0.16565939680593267, "grad_norm": 0.7332293271126908, "learning_rate": 4.63536090835361e-06, "loss": 0.6534, "step": 5674 }, { "epoch": 0.16568859303378003, "grad_norm": 0.74012763828282, "learning_rate": 4.635198702351987e-06, "loss": 0.6719, "step": 5675 }, { "epoch": 0.1657177892616274, "grad_norm": 0.7754675901725759, "learning_rate": 4.635036496350365e-06, "loss": 0.717, "step": 5676 }, { "epoch": 0.16574698548947475, "grad_norm": 0.6992786505828418, "learning_rate": 4.634874290348743e-06, "loss": 0.6056, "step": 5677 }, { "epoch": 0.1657761817173221, "grad_norm": 0.7764705403080703, "learning_rate": 4.634712084347121e-06, "loss": 0.6929, "step": 5678 }, { "epoch": 0.16580537794516947, "grad_norm": 0.8381588195011314, "learning_rate": 4.634549878345499e-06, "loss": 0.6467, "step": 5679 }, { "epoch": 0.16583457417301684, "grad_norm": 0.8145629252439511, "learning_rate": 4.634387672343877e-06, "loss": 0.7097, "step": 5680 }, { "epoch": 0.1658637704008642, "grad_norm": 0.7664243274606094, "learning_rate": 4.634225466342255e-06, "loss": 0.6925, "step": 5681 }, { "epoch": 0.16589296662871156, "grad_norm": 1.1252366068479285, "learning_rate": 4.634063260340633e-06, "loss": 0.6515, "step": 5682 }, { "epoch": 0.16592216285655892, "grad_norm": 0.8320097738497164, "learning_rate": 4.633901054339011e-06, "loss": 0.7298, "step": 5683 }, { "epoch": 0.1659513590844063, "grad_norm": 0.7642219768924903, "learning_rate": 4.633738848337389e-06, "loss": 0.6856, "step": 5684 }, { "epoch": 0.16598055531225367, "grad_norm": 0.7714423540964597, "learning_rate": 4.633576642335767e-06, "loss": 0.7061, "step": 5685 }, { "epoch": 0.16600975154010103, "grad_norm": 0.7323465700611984, "learning_rate": 4.633414436334145e-06, "loss": 0.6758, "step": 5686 }, { "epoch": 0.1660389477679484, "grad_norm": 0.7951791171447046, "learning_rate": 4.633252230332523e-06, "loss": 0.7641, "step": 5687 }, { "epoch": 0.16606814399579575, "grad_norm": 1.0266673482973812, "learning_rate": 4.633090024330901e-06, "loss": 0.7116, "step": 5688 }, { "epoch": 0.16609734022364311, "grad_norm": 0.7695733112586305, "learning_rate": 4.632927818329279e-06, "loss": 0.6664, "step": 5689 }, { "epoch": 0.16612653645149048, "grad_norm": 0.7094881467476133, "learning_rate": 4.632765612327656e-06, "loss": 0.6592, "step": 5690 }, { "epoch": 0.16615573267933784, "grad_norm": 0.704866639584203, "learning_rate": 4.632603406326034e-06, "loss": 0.6082, "step": 5691 }, { "epoch": 0.1661849289071852, "grad_norm": 0.6697642511630935, "learning_rate": 4.632441200324412e-06, "loss": 0.6047, "step": 5692 }, { "epoch": 0.16621412513503256, "grad_norm": 0.7886866628551025, "learning_rate": 4.63227899432279e-06, "loss": 0.7191, "step": 5693 }, { "epoch": 0.16624332136287992, "grad_norm": 0.7891328819551384, "learning_rate": 4.632116788321168e-06, "loss": 0.6585, "step": 5694 }, { "epoch": 0.16627251759072728, "grad_norm": 0.7855649980593761, "learning_rate": 4.631954582319546e-06, "loss": 0.6916, "step": 5695 }, { "epoch": 0.16630171381857464, "grad_norm": 0.9570626777244773, "learning_rate": 4.631792376317924e-06, "loss": 0.7318, "step": 5696 }, { "epoch": 0.166330910046422, "grad_norm": 0.7278380342681595, "learning_rate": 4.631630170316302e-06, "loss": 0.6767, "step": 5697 }, { "epoch": 0.16636010627426936, "grad_norm": 0.7266382965654883, "learning_rate": 4.63146796431468e-06, "loss": 0.6213, "step": 5698 }, { "epoch": 0.16638930250211673, "grad_norm": 0.7725682306240566, "learning_rate": 4.631305758313057e-06, "loss": 0.7076, "step": 5699 }, { "epoch": 0.1664184987299641, "grad_norm": 0.7985212569929901, "learning_rate": 4.631143552311435e-06, "loss": 0.6765, "step": 5700 }, { "epoch": 0.16644769495781145, "grad_norm": 0.7872217186676917, "learning_rate": 4.630981346309814e-06, "loss": 0.6991, "step": 5701 }, { "epoch": 0.1664768911856588, "grad_norm": 0.6919264715337778, "learning_rate": 4.630819140308192e-06, "loss": 0.6425, "step": 5702 }, { "epoch": 0.16650608741350617, "grad_norm": 0.8093502730671119, "learning_rate": 4.63065693430657e-06, "loss": 0.7001, "step": 5703 }, { "epoch": 0.16653528364135353, "grad_norm": 0.6942321725808891, "learning_rate": 4.6304947283049474e-06, "loss": 0.625, "step": 5704 }, { "epoch": 0.1665644798692009, "grad_norm": 0.862209766354955, "learning_rate": 4.6303325223033254e-06, "loss": 0.8181, "step": 5705 }, { "epoch": 0.16659367609704825, "grad_norm": 0.8126892569092994, "learning_rate": 4.6301703163017034e-06, "loss": 0.7307, "step": 5706 }, { "epoch": 0.16662287232489562, "grad_norm": 0.7001761723484557, "learning_rate": 4.6300081103000815e-06, "loss": 0.6099, "step": 5707 }, { "epoch": 0.16665206855274298, "grad_norm": 0.876068095166527, "learning_rate": 4.6298459042984595e-06, "loss": 0.7778, "step": 5708 }, { "epoch": 0.16668126478059034, "grad_norm": 0.7739463197669209, "learning_rate": 4.6296836982968375e-06, "loss": 0.6838, "step": 5709 }, { "epoch": 0.1667104610084377, "grad_norm": 0.7400502568106805, "learning_rate": 4.6295214922952155e-06, "loss": 0.6098, "step": 5710 }, { "epoch": 0.16673965723628506, "grad_norm": 0.7134740633785991, "learning_rate": 4.6293592862935935e-06, "loss": 0.5975, "step": 5711 }, { "epoch": 0.16676885346413242, "grad_norm": 0.763725942375016, "learning_rate": 4.6291970802919715e-06, "loss": 0.7383, "step": 5712 }, { "epoch": 0.16679804969197978, "grad_norm": 0.689733887008244, "learning_rate": 4.629034874290349e-06, "loss": 0.6393, "step": 5713 }, { "epoch": 0.16682724591982717, "grad_norm": 0.7898254816970396, "learning_rate": 4.628872668288727e-06, "loss": 0.7746, "step": 5714 }, { "epoch": 0.16685644214767453, "grad_norm": 0.6960960976609452, "learning_rate": 4.628710462287105e-06, "loss": 0.5783, "step": 5715 }, { "epoch": 0.1668856383755219, "grad_norm": 0.7255499099477493, "learning_rate": 4.628548256285483e-06, "loss": 0.6497, "step": 5716 }, { "epoch": 0.16691483460336926, "grad_norm": 0.735380353898381, "learning_rate": 4.628386050283861e-06, "loss": 0.6543, "step": 5717 }, { "epoch": 0.16694403083121662, "grad_norm": 0.721491094621277, "learning_rate": 4.628223844282239e-06, "loss": 0.6462, "step": 5718 }, { "epoch": 0.16697322705906398, "grad_norm": 0.7484673440949902, "learning_rate": 4.628061638280617e-06, "loss": 0.6492, "step": 5719 }, { "epoch": 0.16700242328691134, "grad_norm": 0.7707542083851256, "learning_rate": 4.627899432278995e-06, "loss": 0.7473, "step": 5720 }, { "epoch": 0.1670316195147587, "grad_norm": 0.7900673079690647, "learning_rate": 4.627737226277373e-06, "loss": 0.7421, "step": 5721 }, { "epoch": 0.16706081574260606, "grad_norm": 0.7015765445580224, "learning_rate": 4.627575020275751e-06, "loss": 0.6282, "step": 5722 }, { "epoch": 0.16709001197045342, "grad_norm": 0.7204128448760542, "learning_rate": 4.627412814274129e-06, "loss": 0.6452, "step": 5723 }, { "epoch": 0.16711920819830078, "grad_norm": 0.7425356990547344, "learning_rate": 4.627250608272507e-06, "loss": 0.6468, "step": 5724 }, { "epoch": 0.16714840442614814, "grad_norm": 0.7056327489357611, "learning_rate": 4.627088402270885e-06, "loss": 0.5999, "step": 5725 }, { "epoch": 0.1671776006539955, "grad_norm": 0.7494987549503448, "learning_rate": 4.626926196269263e-06, "loss": 0.7284, "step": 5726 }, { "epoch": 0.16720679688184287, "grad_norm": 0.7967289800237891, "learning_rate": 4.626763990267641e-06, "loss": 0.7383, "step": 5727 }, { "epoch": 0.16723599310969023, "grad_norm": 0.7180967178081107, "learning_rate": 4.626601784266018e-06, "loss": 0.6802, "step": 5728 }, { "epoch": 0.1672651893375376, "grad_norm": 0.7691129059982933, "learning_rate": 4.626439578264396e-06, "loss": 0.7197, "step": 5729 }, { "epoch": 0.16729438556538495, "grad_norm": 0.7669880125298539, "learning_rate": 4.626277372262774e-06, "loss": 0.6856, "step": 5730 }, { "epoch": 0.1673235817932323, "grad_norm": 0.8740234474039412, "learning_rate": 4.626115166261152e-06, "loss": 0.8094, "step": 5731 }, { "epoch": 0.16735277802107967, "grad_norm": 0.7554847237188311, "learning_rate": 4.62595296025953e-06, "loss": 0.7287, "step": 5732 }, { "epoch": 0.16738197424892703, "grad_norm": 0.8088200743545892, "learning_rate": 4.625790754257908e-06, "loss": 0.7449, "step": 5733 }, { "epoch": 0.1674111704767744, "grad_norm": 0.7442509328976371, "learning_rate": 4.625628548256286e-06, "loss": 0.6809, "step": 5734 }, { "epoch": 0.16744036670462176, "grad_norm": 0.7023069914283967, "learning_rate": 4.625466342254664e-06, "loss": 0.6133, "step": 5735 }, { "epoch": 0.16746956293246912, "grad_norm": 1.3508108633766798, "learning_rate": 4.625304136253042e-06, "loss": 0.7615, "step": 5736 }, { "epoch": 0.16749875916031648, "grad_norm": 0.703033327405428, "learning_rate": 4.625141930251419e-06, "loss": 0.6368, "step": 5737 }, { "epoch": 0.16752795538816384, "grad_norm": 0.8458090996723038, "learning_rate": 4.624979724249797e-06, "loss": 0.7635, "step": 5738 }, { "epoch": 0.1675571516160112, "grad_norm": 0.8412456995838635, "learning_rate": 4.624817518248176e-06, "loss": 0.7908, "step": 5739 }, { "epoch": 0.16758634784385856, "grad_norm": 0.7958005819063949, "learning_rate": 4.624655312246554e-06, "loss": 0.7236, "step": 5740 }, { "epoch": 0.16761554407170592, "grad_norm": 0.7543980647787614, "learning_rate": 4.624493106244932e-06, "loss": 0.6843, "step": 5741 }, { "epoch": 0.16764474029955329, "grad_norm": 0.7422622122359287, "learning_rate": 4.624330900243309e-06, "loss": 0.7138, "step": 5742 }, { "epoch": 0.16767393652740065, "grad_norm": 0.6492006924245076, "learning_rate": 4.624168694241687e-06, "loss": 0.5747, "step": 5743 }, { "epoch": 0.16770313275524804, "grad_norm": 0.7183422100477942, "learning_rate": 4.624006488240065e-06, "loss": 0.6366, "step": 5744 }, { "epoch": 0.1677323289830954, "grad_norm": 0.77258915289939, "learning_rate": 4.623844282238443e-06, "loss": 0.7237, "step": 5745 }, { "epoch": 0.16776152521094276, "grad_norm": 0.7516353169422637, "learning_rate": 4.623682076236821e-06, "loss": 0.6901, "step": 5746 }, { "epoch": 0.16779072143879012, "grad_norm": 0.8759220686808993, "learning_rate": 4.623519870235199e-06, "loss": 0.684, "step": 5747 }, { "epoch": 0.16781991766663748, "grad_norm": 0.8267173161787181, "learning_rate": 4.623357664233577e-06, "loss": 0.7024, "step": 5748 }, { "epoch": 0.16784911389448484, "grad_norm": 0.7160477360343253, "learning_rate": 4.623195458231955e-06, "loss": 0.6203, "step": 5749 }, { "epoch": 0.1678783101223322, "grad_norm": 0.7419390094474789, "learning_rate": 4.623033252230333e-06, "loss": 0.727, "step": 5750 }, { "epoch": 0.16790750635017956, "grad_norm": 0.7102133634857082, "learning_rate": 4.62287104622871e-06, "loss": 0.6411, "step": 5751 }, { "epoch": 0.16793670257802693, "grad_norm": 0.7125545514255727, "learning_rate": 4.622708840227088e-06, "loss": 0.6258, "step": 5752 }, { "epoch": 0.1679658988058743, "grad_norm": 0.6765865865334918, "learning_rate": 4.622546634225466e-06, "loss": 0.5662, "step": 5753 }, { "epoch": 0.16799509503372165, "grad_norm": 0.7227786717703285, "learning_rate": 4.622384428223844e-06, "loss": 0.6316, "step": 5754 }, { "epoch": 0.168024291261569, "grad_norm": 0.8870992708693561, "learning_rate": 4.622222222222222e-06, "loss": 0.743, "step": 5755 }, { "epoch": 0.16805348748941637, "grad_norm": 0.7360418210819887, "learning_rate": 4.6220600162206e-06, "loss": 0.6572, "step": 5756 }, { "epoch": 0.16808268371726373, "grad_norm": 0.940139761623224, "learning_rate": 4.621897810218978e-06, "loss": 0.744, "step": 5757 }, { "epoch": 0.1681118799451111, "grad_norm": 0.7355556941673066, "learning_rate": 4.621735604217356e-06, "loss": 0.6731, "step": 5758 }, { "epoch": 0.16814107617295845, "grad_norm": 0.7246859100356968, "learning_rate": 4.621573398215734e-06, "loss": 0.6684, "step": 5759 }, { "epoch": 0.16817027240080581, "grad_norm": 0.7369353720614258, "learning_rate": 4.621411192214112e-06, "loss": 0.6374, "step": 5760 }, { "epoch": 0.16819946862865318, "grad_norm": 0.8573860818599963, "learning_rate": 4.6212489862124904e-06, "loss": 0.8481, "step": 5761 }, { "epoch": 0.16822866485650054, "grad_norm": 0.7715461960624739, "learning_rate": 4.6210867802108684e-06, "loss": 0.6995, "step": 5762 }, { "epoch": 0.1682578610843479, "grad_norm": 0.7342262905490969, "learning_rate": 4.6209245742092464e-06, "loss": 0.6928, "step": 5763 }, { "epoch": 0.16828705731219526, "grad_norm": 0.755122708287347, "learning_rate": 4.6207623682076245e-06, "loss": 0.7213, "step": 5764 }, { "epoch": 0.16831625354004262, "grad_norm": 0.7701220125044439, "learning_rate": 4.6206001622060025e-06, "loss": 0.6975, "step": 5765 }, { "epoch": 0.16834544976788998, "grad_norm": 0.7529146163431906, "learning_rate": 4.62043795620438e-06, "loss": 0.6852, "step": 5766 }, { "epoch": 0.16837464599573734, "grad_norm": 1.6255980249200328, "learning_rate": 4.620275750202758e-06, "loss": 0.7857, "step": 5767 }, { "epoch": 0.1684038422235847, "grad_norm": 0.7557489704265354, "learning_rate": 4.620113544201136e-06, "loss": 0.6271, "step": 5768 }, { "epoch": 0.16843303845143207, "grad_norm": 0.7792534961162083, "learning_rate": 4.619951338199514e-06, "loss": 0.7135, "step": 5769 }, { "epoch": 0.16846223467927943, "grad_norm": 0.7543234718903961, "learning_rate": 4.619789132197892e-06, "loss": 0.7084, "step": 5770 }, { "epoch": 0.1684914309071268, "grad_norm": 0.7372181966659668, "learning_rate": 4.61962692619627e-06, "loss": 0.6681, "step": 5771 }, { "epoch": 0.16852062713497415, "grad_norm": 0.7129992517018808, "learning_rate": 4.619464720194648e-06, "loss": 0.6117, "step": 5772 }, { "epoch": 0.1685498233628215, "grad_norm": 0.7216767421286461, "learning_rate": 4.619302514193026e-06, "loss": 0.6613, "step": 5773 }, { "epoch": 0.1685790195906689, "grad_norm": 0.697822400662939, "learning_rate": 4.619140308191403e-06, "loss": 0.5687, "step": 5774 }, { "epoch": 0.16860821581851626, "grad_norm": 0.7184836582685749, "learning_rate": 4.618978102189781e-06, "loss": 0.6347, "step": 5775 }, { "epoch": 0.16863741204636362, "grad_norm": 0.7886211768182697, "learning_rate": 4.618815896188159e-06, "loss": 0.6875, "step": 5776 }, { "epoch": 0.16866660827421098, "grad_norm": 0.7017681466625935, "learning_rate": 4.618653690186538e-06, "loss": 0.5936, "step": 5777 }, { "epoch": 0.16869580450205834, "grad_norm": 0.7459794903892687, "learning_rate": 4.618491484184916e-06, "loss": 0.6557, "step": 5778 }, { "epoch": 0.1687250007299057, "grad_norm": 0.9352934520054909, "learning_rate": 4.618329278183294e-06, "loss": 0.7358, "step": 5779 }, { "epoch": 0.16875419695775307, "grad_norm": 0.7207209366190428, "learning_rate": 4.618167072181671e-06, "loss": 0.6909, "step": 5780 }, { "epoch": 0.16878339318560043, "grad_norm": 0.7022050231094186, "learning_rate": 4.618004866180049e-06, "loss": 0.6099, "step": 5781 }, { "epoch": 0.1688125894134478, "grad_norm": 1.2393085121386036, "learning_rate": 4.617842660178427e-06, "loss": 0.722, "step": 5782 }, { "epoch": 0.16884178564129515, "grad_norm": 0.786767142172956, "learning_rate": 4.617680454176805e-06, "loss": 0.7215, "step": 5783 }, { "epoch": 0.1688709818691425, "grad_norm": 0.771895182944365, "learning_rate": 4.617518248175183e-06, "loss": 0.6504, "step": 5784 }, { "epoch": 0.16890017809698987, "grad_norm": 0.7495679814620857, "learning_rate": 4.617356042173561e-06, "loss": 0.6248, "step": 5785 }, { "epoch": 0.16892937432483723, "grad_norm": 0.7696566510039907, "learning_rate": 4.617193836171939e-06, "loss": 0.6668, "step": 5786 }, { "epoch": 0.1689585705526846, "grad_norm": 0.7608474774051994, "learning_rate": 4.617031630170317e-06, "loss": 0.7335, "step": 5787 }, { "epoch": 0.16898776678053196, "grad_norm": 0.702369202619859, "learning_rate": 4.616869424168695e-06, "loss": 0.6068, "step": 5788 }, { "epoch": 0.16901696300837932, "grad_norm": 0.8120853580113199, "learning_rate": 4.616707218167072e-06, "loss": 0.7209, "step": 5789 }, { "epoch": 0.16904615923622668, "grad_norm": 0.7524797355374706, "learning_rate": 4.61654501216545e-06, "loss": 0.6839, "step": 5790 }, { "epoch": 0.16907535546407404, "grad_norm": 0.7364352904023663, "learning_rate": 4.616382806163828e-06, "loss": 0.7138, "step": 5791 }, { "epoch": 0.1691045516919214, "grad_norm": 0.7626938552804573, "learning_rate": 4.616220600162206e-06, "loss": 0.68, "step": 5792 }, { "epoch": 0.16913374791976876, "grad_norm": 0.748427854368026, "learning_rate": 4.616058394160584e-06, "loss": 0.6743, "step": 5793 }, { "epoch": 0.16916294414761612, "grad_norm": 0.7365378333498579, "learning_rate": 4.615896188158962e-06, "loss": 0.6024, "step": 5794 }, { "epoch": 0.16919214037546348, "grad_norm": 0.7644038274401366, "learning_rate": 4.61573398215734e-06, "loss": 0.673, "step": 5795 }, { "epoch": 0.16922133660331085, "grad_norm": 0.8229467393014794, "learning_rate": 4.615571776155718e-06, "loss": 0.7472, "step": 5796 }, { "epoch": 0.1692505328311582, "grad_norm": 0.7928213469938541, "learning_rate": 4.615409570154096e-06, "loss": 0.7767, "step": 5797 }, { "epoch": 0.16927972905900557, "grad_norm": 0.7829546045030592, "learning_rate": 4.615247364152474e-06, "loss": 0.7297, "step": 5798 }, { "epoch": 0.16930892528685293, "grad_norm": 0.7568299525996234, "learning_rate": 4.615085158150852e-06, "loss": 0.7497, "step": 5799 }, { "epoch": 0.1693381215147003, "grad_norm": 1.0785386361851077, "learning_rate": 4.61492295214923e-06, "loss": 0.6844, "step": 5800 }, { "epoch": 0.16936731774254765, "grad_norm": 0.8472115865766725, "learning_rate": 4.614760746147608e-06, "loss": 0.8298, "step": 5801 }, { "epoch": 0.169396513970395, "grad_norm": 0.8059030273571631, "learning_rate": 4.614598540145986e-06, "loss": 0.7453, "step": 5802 }, { "epoch": 0.16942571019824237, "grad_norm": 0.721384573144945, "learning_rate": 4.614436334144364e-06, "loss": 0.6598, "step": 5803 }, { "epoch": 0.16945490642608976, "grad_norm": 0.7887449264187312, "learning_rate": 4.614274128142741e-06, "loss": 0.7702, "step": 5804 }, { "epoch": 0.16948410265393712, "grad_norm": 0.7792867821691326, "learning_rate": 4.614111922141119e-06, "loss": 0.6739, "step": 5805 }, { "epoch": 0.16951329888178449, "grad_norm": 0.7330909048983574, "learning_rate": 4.613949716139497e-06, "loss": 0.6934, "step": 5806 }, { "epoch": 0.16954249510963185, "grad_norm": 0.8537900962133286, "learning_rate": 4.613787510137875e-06, "loss": 0.8244, "step": 5807 }, { "epoch": 0.1695716913374792, "grad_norm": 0.8864974452157476, "learning_rate": 4.613625304136253e-06, "loss": 0.7782, "step": 5808 }, { "epoch": 0.16960088756532657, "grad_norm": 0.826042803434645, "learning_rate": 4.613463098134631e-06, "loss": 0.7425, "step": 5809 }, { "epoch": 0.16963008379317393, "grad_norm": 0.7162839490712839, "learning_rate": 4.613300892133009e-06, "loss": 0.6095, "step": 5810 }, { "epoch": 0.1696592800210213, "grad_norm": 0.7701339591522278, "learning_rate": 4.613138686131387e-06, "loss": 0.6693, "step": 5811 }, { "epoch": 0.16968847624886865, "grad_norm": 0.713755014157906, "learning_rate": 4.6129764801297645e-06, "loss": 0.6513, "step": 5812 }, { "epoch": 0.169717672476716, "grad_norm": 0.781684667595427, "learning_rate": 4.6128142741281425e-06, "loss": 0.7244, "step": 5813 }, { "epoch": 0.16974686870456338, "grad_norm": 0.8737046529871497, "learning_rate": 4.612652068126521e-06, "loss": 0.7162, "step": 5814 }, { "epoch": 0.16977606493241074, "grad_norm": 0.7360734085937085, "learning_rate": 4.612489862124899e-06, "loss": 0.6726, "step": 5815 }, { "epoch": 0.1698052611602581, "grad_norm": 0.7330105820249063, "learning_rate": 4.612327656123277e-06, "loss": 0.614, "step": 5816 }, { "epoch": 0.16983445738810546, "grad_norm": 0.7722749579849963, "learning_rate": 4.612165450121655e-06, "loss": 0.6633, "step": 5817 }, { "epoch": 0.16986365361595282, "grad_norm": 1.3000027977999165, "learning_rate": 4.612003244120033e-06, "loss": 0.7652, "step": 5818 }, { "epoch": 0.16989284984380018, "grad_norm": 0.7888129084653911, "learning_rate": 4.611841038118411e-06, "loss": 0.6951, "step": 5819 }, { "epoch": 0.16992204607164754, "grad_norm": 0.7697155812506156, "learning_rate": 4.611678832116789e-06, "loss": 0.6954, "step": 5820 }, { "epoch": 0.1699512422994949, "grad_norm": 0.868449023240542, "learning_rate": 4.611516626115167e-06, "loss": 0.8057, "step": 5821 }, { "epoch": 0.16998043852734226, "grad_norm": 0.7591528553427968, "learning_rate": 4.611354420113545e-06, "loss": 0.707, "step": 5822 }, { "epoch": 0.17000963475518963, "grad_norm": 0.6997533263554613, "learning_rate": 4.611192214111923e-06, "loss": 0.6382, "step": 5823 }, { "epoch": 0.170038830983037, "grad_norm": 0.8031384356717846, "learning_rate": 4.611030008110301e-06, "loss": 0.7997, "step": 5824 }, { "epoch": 0.17006802721088435, "grad_norm": 0.7981658611799208, "learning_rate": 4.610867802108679e-06, "loss": 0.7097, "step": 5825 }, { "epoch": 0.1700972234387317, "grad_norm": 0.8303726028805758, "learning_rate": 4.610705596107057e-06, "loss": 0.7557, "step": 5826 }, { "epoch": 0.17012641966657907, "grad_norm": 0.7549675565589816, "learning_rate": 4.610543390105434e-06, "loss": 0.6398, "step": 5827 }, { "epoch": 0.17015561589442643, "grad_norm": 0.750614123619045, "learning_rate": 4.610381184103812e-06, "loss": 0.6896, "step": 5828 }, { "epoch": 0.1701848121222738, "grad_norm": 0.7167616055753747, "learning_rate": 4.61021897810219e-06, "loss": 0.6398, "step": 5829 }, { "epoch": 0.17021400835012115, "grad_norm": 0.7695210146339218, "learning_rate": 4.610056772100568e-06, "loss": 0.7227, "step": 5830 }, { "epoch": 0.17024320457796852, "grad_norm": 0.9591130830985973, "learning_rate": 4.609894566098946e-06, "loss": 0.7517, "step": 5831 }, { "epoch": 0.17027240080581588, "grad_norm": 0.7269684486013172, "learning_rate": 4.609732360097324e-06, "loss": 0.6967, "step": 5832 }, { "epoch": 0.17030159703366324, "grad_norm": 0.7377431489560606, "learning_rate": 4.609570154095702e-06, "loss": 0.6817, "step": 5833 }, { "epoch": 0.1703307932615106, "grad_norm": 0.6811585651118347, "learning_rate": 4.60940794809408e-06, "loss": 0.5594, "step": 5834 }, { "epoch": 0.170359989489358, "grad_norm": 0.7002874889074004, "learning_rate": 4.609245742092458e-06, "loss": 0.6218, "step": 5835 }, { "epoch": 0.17038918571720535, "grad_norm": 0.7185442441235762, "learning_rate": 4.609083536090836e-06, "loss": 0.6387, "step": 5836 }, { "epoch": 0.1704183819450527, "grad_norm": 0.6997601434485125, "learning_rate": 4.608921330089214e-06, "loss": 0.6363, "step": 5837 }, { "epoch": 0.17044757817290007, "grad_norm": 0.8381391641167089, "learning_rate": 4.608759124087592e-06, "loss": 0.6351, "step": 5838 }, { "epoch": 0.17047677440074743, "grad_norm": 0.7657478059788104, "learning_rate": 4.60859691808597e-06, "loss": 0.7238, "step": 5839 }, { "epoch": 0.1705059706285948, "grad_norm": 0.7385006595013969, "learning_rate": 4.608434712084348e-06, "loss": 0.6281, "step": 5840 }, { "epoch": 0.17053516685644216, "grad_norm": 0.8798337484029867, "learning_rate": 4.608272506082726e-06, "loss": 0.7017, "step": 5841 }, { "epoch": 0.17056436308428952, "grad_norm": 0.7686885121672627, "learning_rate": 4.608110300081103e-06, "loss": 0.6879, "step": 5842 }, { "epoch": 0.17059355931213688, "grad_norm": 0.7897334634142749, "learning_rate": 4.607948094079481e-06, "loss": 0.7305, "step": 5843 }, { "epoch": 0.17062275553998424, "grad_norm": 0.7972521796741368, "learning_rate": 4.607785888077859e-06, "loss": 0.6922, "step": 5844 }, { "epoch": 0.1706519517678316, "grad_norm": 0.8192896449668718, "learning_rate": 4.607623682076237e-06, "loss": 0.7413, "step": 5845 }, { "epoch": 0.17068114799567896, "grad_norm": 0.7862394413242333, "learning_rate": 4.607461476074615e-06, "loss": 0.6418, "step": 5846 }, { "epoch": 0.17071034422352632, "grad_norm": 0.8405014088205738, "learning_rate": 4.607299270072993e-06, "loss": 0.7622, "step": 5847 }, { "epoch": 0.17073954045137368, "grad_norm": 0.8111390471356761, "learning_rate": 4.607137064071371e-06, "loss": 0.6626, "step": 5848 }, { "epoch": 0.17076873667922104, "grad_norm": 0.9120245306534646, "learning_rate": 4.606974858069749e-06, "loss": 0.7464, "step": 5849 }, { "epoch": 0.1707979329070684, "grad_norm": 0.7946169811164805, "learning_rate": 4.606812652068126e-06, "loss": 0.776, "step": 5850 }, { "epoch": 0.17082712913491577, "grad_norm": 0.7362756983684887, "learning_rate": 4.606650446066504e-06, "loss": 0.6573, "step": 5851 }, { "epoch": 0.17085632536276313, "grad_norm": 0.7354483175691394, "learning_rate": 4.606488240064883e-06, "loss": 0.6944, "step": 5852 }, { "epoch": 0.1708855215906105, "grad_norm": 0.8185144389158941, "learning_rate": 4.606326034063261e-06, "loss": 0.7408, "step": 5853 }, { "epoch": 0.17091471781845785, "grad_norm": 0.7124376173765402, "learning_rate": 4.606163828061639e-06, "loss": 0.5885, "step": 5854 }, { "epoch": 0.1709439140463052, "grad_norm": 0.7527392609292304, "learning_rate": 4.606001622060017e-06, "loss": 0.6026, "step": 5855 }, { "epoch": 0.17097311027415257, "grad_norm": 0.7262354206891307, "learning_rate": 4.605839416058394e-06, "loss": 0.6, "step": 5856 }, { "epoch": 0.17100230650199993, "grad_norm": 0.6938297580764682, "learning_rate": 4.605677210056772e-06, "loss": 0.6438, "step": 5857 }, { "epoch": 0.1710315027298473, "grad_norm": 0.7858317224119667, "learning_rate": 4.60551500405515e-06, "loss": 0.6866, "step": 5858 }, { "epoch": 0.17106069895769466, "grad_norm": 0.7460646149890624, "learning_rate": 4.605352798053528e-06, "loss": 0.7127, "step": 5859 }, { "epoch": 0.17108989518554202, "grad_norm": 0.844934893686795, "learning_rate": 4.605190592051906e-06, "loss": 0.7082, "step": 5860 }, { "epoch": 0.17111909141338938, "grad_norm": 0.8140110473436354, "learning_rate": 4.605028386050284e-06, "loss": 0.803, "step": 5861 }, { "epoch": 0.17114828764123674, "grad_norm": 0.7358354862775935, "learning_rate": 4.604866180048662e-06, "loss": 0.6403, "step": 5862 }, { "epoch": 0.1711774838690841, "grad_norm": 0.841862720438692, "learning_rate": 4.60470397404704e-06, "loss": 0.6825, "step": 5863 }, { "epoch": 0.17120668009693146, "grad_norm": 1.0821731879457077, "learning_rate": 4.604541768045418e-06, "loss": 0.7651, "step": 5864 }, { "epoch": 0.17123587632477885, "grad_norm": 0.7236959438743503, "learning_rate": 4.6043795620437955e-06, "loss": 0.6978, "step": 5865 }, { "epoch": 0.1712650725526262, "grad_norm": 0.7183447202805657, "learning_rate": 4.6042173560421735e-06, "loss": 0.6451, "step": 5866 }, { "epoch": 0.17129426878047357, "grad_norm": 1.0396971055559456, "learning_rate": 4.6040551500405515e-06, "loss": 0.6447, "step": 5867 }, { "epoch": 0.17132346500832094, "grad_norm": 0.7134181382297885, "learning_rate": 4.6038929440389295e-06, "loss": 0.6385, "step": 5868 }, { "epoch": 0.1713526612361683, "grad_norm": 0.7895820354663146, "learning_rate": 4.6037307380373075e-06, "loss": 0.7127, "step": 5869 }, { "epoch": 0.17138185746401566, "grad_norm": 0.7877160613368267, "learning_rate": 4.6035685320356855e-06, "loss": 0.6689, "step": 5870 }, { "epoch": 0.17141105369186302, "grad_norm": 0.8833362511932744, "learning_rate": 4.6034063260340636e-06, "loss": 0.6785, "step": 5871 }, { "epoch": 0.17144024991971038, "grad_norm": 0.7517641086609386, "learning_rate": 4.6032441200324416e-06, "loss": 0.7121, "step": 5872 }, { "epoch": 0.17146944614755774, "grad_norm": 0.807425861917134, "learning_rate": 4.6030819140308196e-06, "loss": 0.7218, "step": 5873 }, { "epoch": 0.1714986423754051, "grad_norm": 0.7542326418556701, "learning_rate": 4.6029197080291976e-06, "loss": 0.6975, "step": 5874 }, { "epoch": 0.17152783860325246, "grad_norm": 0.747732265300923, "learning_rate": 4.602757502027576e-06, "loss": 0.697, "step": 5875 }, { "epoch": 0.17155703483109982, "grad_norm": 0.7925291767342828, "learning_rate": 4.602595296025954e-06, "loss": 0.7671, "step": 5876 }, { "epoch": 0.1715862310589472, "grad_norm": 0.8447461317252909, "learning_rate": 4.602433090024332e-06, "loss": 0.7236, "step": 5877 }, { "epoch": 0.17161542728679455, "grad_norm": 0.7975043912487685, "learning_rate": 4.60227088402271e-06, "loss": 0.7794, "step": 5878 }, { "epoch": 0.1716446235146419, "grad_norm": 0.7227116760958442, "learning_rate": 4.602108678021087e-06, "loss": 0.6539, "step": 5879 }, { "epoch": 0.17167381974248927, "grad_norm": 0.7933084861496085, "learning_rate": 4.601946472019465e-06, "loss": 0.8229, "step": 5880 }, { "epoch": 0.17170301597033663, "grad_norm": 0.7741311384375427, "learning_rate": 4.601784266017843e-06, "loss": 0.6996, "step": 5881 }, { "epoch": 0.171732212198184, "grad_norm": 0.7477129547806669, "learning_rate": 4.601622060016221e-06, "loss": 0.6851, "step": 5882 }, { "epoch": 0.17176140842603135, "grad_norm": 0.7440624374504967, "learning_rate": 4.601459854014599e-06, "loss": 0.6695, "step": 5883 }, { "epoch": 0.17179060465387871, "grad_norm": 0.7511720619286808, "learning_rate": 4.601297648012977e-06, "loss": 0.7029, "step": 5884 }, { "epoch": 0.17181980088172608, "grad_norm": 0.7099464239330955, "learning_rate": 4.601135442011355e-06, "loss": 0.6431, "step": 5885 }, { "epoch": 0.17184899710957344, "grad_norm": 0.7103613365234451, "learning_rate": 4.600973236009733e-06, "loss": 0.6586, "step": 5886 }, { "epoch": 0.1718781933374208, "grad_norm": 0.8073894639120524, "learning_rate": 4.600811030008111e-06, "loss": 0.6858, "step": 5887 }, { "epoch": 0.17190738956526816, "grad_norm": 0.7527110562207712, "learning_rate": 4.600648824006488e-06, "loss": 0.706, "step": 5888 }, { "epoch": 0.17193658579311552, "grad_norm": 0.7376801358750456, "learning_rate": 4.600486618004866e-06, "loss": 0.6527, "step": 5889 }, { "epoch": 0.17196578202096288, "grad_norm": 0.7456737185268049, "learning_rate": 4.600324412003245e-06, "loss": 0.632, "step": 5890 }, { "epoch": 0.17199497824881024, "grad_norm": 0.7419263007456139, "learning_rate": 4.600162206001623e-06, "loss": 0.6776, "step": 5891 }, { "epoch": 0.1720241744766576, "grad_norm": 0.7420024798742086, "learning_rate": 4.600000000000001e-06, "loss": 0.6767, "step": 5892 }, { "epoch": 0.17205337070450497, "grad_norm": 0.7457026407049316, "learning_rate": 4.599837793998379e-06, "loss": 0.6802, "step": 5893 }, { "epoch": 0.17208256693235233, "grad_norm": 0.7572436253971851, "learning_rate": 4.599675587996756e-06, "loss": 0.674, "step": 5894 }, { "epoch": 0.17211176316019972, "grad_norm": 0.8704039855984619, "learning_rate": 4.599513381995134e-06, "loss": 0.6573, "step": 5895 }, { "epoch": 0.17214095938804708, "grad_norm": 0.7485749633812074, "learning_rate": 4.599351175993512e-06, "loss": 0.6604, "step": 5896 }, { "epoch": 0.17217015561589444, "grad_norm": 0.7879621828450385, "learning_rate": 4.59918896999189e-06, "loss": 0.7373, "step": 5897 }, { "epoch": 0.1721993518437418, "grad_norm": 0.7427979073787674, "learning_rate": 4.599026763990268e-06, "loss": 0.6627, "step": 5898 }, { "epoch": 0.17222854807158916, "grad_norm": 0.7072533542932424, "learning_rate": 4.598864557988646e-06, "loss": 0.6264, "step": 5899 }, { "epoch": 0.17225774429943652, "grad_norm": 0.8043778052624945, "learning_rate": 4.598702351987024e-06, "loss": 0.7849, "step": 5900 }, { "epoch": 0.17228694052728388, "grad_norm": 0.7694690520500974, "learning_rate": 4.598540145985402e-06, "loss": 0.6409, "step": 5901 }, { "epoch": 0.17231613675513124, "grad_norm": 0.7448266771185513, "learning_rate": 4.59837793998378e-06, "loss": 0.6844, "step": 5902 }, { "epoch": 0.1723453329829786, "grad_norm": 0.7175426858486527, "learning_rate": 4.598215733982157e-06, "loss": 0.6273, "step": 5903 }, { "epoch": 0.17237452921082597, "grad_norm": 0.7300165660244327, "learning_rate": 4.598053527980535e-06, "loss": 0.6656, "step": 5904 }, { "epoch": 0.17240372543867333, "grad_norm": 0.7309390209046425, "learning_rate": 4.597891321978913e-06, "loss": 0.6277, "step": 5905 }, { "epoch": 0.1724329216665207, "grad_norm": 0.7428289911553382, "learning_rate": 4.597729115977291e-06, "loss": 0.6616, "step": 5906 }, { "epoch": 0.17246211789436805, "grad_norm": 0.7744874675755679, "learning_rate": 4.597566909975669e-06, "loss": 0.7412, "step": 5907 }, { "epoch": 0.1724913141222154, "grad_norm": 0.787611468670435, "learning_rate": 4.597404703974047e-06, "loss": 0.6647, "step": 5908 }, { "epoch": 0.17252051035006277, "grad_norm": 0.7311908928393058, "learning_rate": 4.597242497972425e-06, "loss": 0.64, "step": 5909 }, { "epoch": 0.17254970657791013, "grad_norm": 0.681804568517144, "learning_rate": 4.597080291970803e-06, "loss": 0.6047, "step": 5910 }, { "epoch": 0.1725789028057575, "grad_norm": 0.7373033598365438, "learning_rate": 4.596918085969181e-06, "loss": 0.7116, "step": 5911 }, { "epoch": 0.17260809903360486, "grad_norm": 0.7764191572729628, "learning_rate": 4.596755879967559e-06, "loss": 0.6669, "step": 5912 }, { "epoch": 0.17263729526145222, "grad_norm": 0.6996768716141527, "learning_rate": 4.596593673965937e-06, "loss": 0.6139, "step": 5913 }, { "epoch": 0.17266649148929958, "grad_norm": 0.7052798585984602, "learning_rate": 4.596431467964315e-06, "loss": 0.5913, "step": 5914 }, { "epoch": 0.17269568771714694, "grad_norm": 0.7414726711592108, "learning_rate": 4.596269261962693e-06, "loss": 0.6433, "step": 5915 }, { "epoch": 0.1727248839449943, "grad_norm": 0.7973099280502546, "learning_rate": 4.596107055961071e-06, "loss": 0.677, "step": 5916 }, { "epoch": 0.17275408017284166, "grad_norm": 0.8134188213313771, "learning_rate": 4.5959448499594485e-06, "loss": 0.7939, "step": 5917 }, { "epoch": 0.17278327640068902, "grad_norm": 0.7705450154597631, "learning_rate": 4.5957826439578265e-06, "loss": 0.729, "step": 5918 }, { "epoch": 0.17281247262853638, "grad_norm": 0.7483161250397535, "learning_rate": 4.5956204379562045e-06, "loss": 0.668, "step": 5919 }, { "epoch": 0.17284166885638375, "grad_norm": 0.7907506642485181, "learning_rate": 4.5954582319545825e-06, "loss": 0.6777, "step": 5920 }, { "epoch": 0.1728708650842311, "grad_norm": 0.9322128961673755, "learning_rate": 4.5952960259529605e-06, "loss": 0.7339, "step": 5921 }, { "epoch": 0.17290006131207847, "grad_norm": 0.794733132361582, "learning_rate": 4.5951338199513385e-06, "loss": 0.7007, "step": 5922 }, { "epoch": 0.17292925753992583, "grad_norm": 0.7262533162762741, "learning_rate": 4.5949716139497165e-06, "loss": 0.6431, "step": 5923 }, { "epoch": 0.1729584537677732, "grad_norm": 0.8592590836048977, "learning_rate": 4.5948094079480945e-06, "loss": 0.6588, "step": 5924 }, { "epoch": 0.17298764999562058, "grad_norm": 0.7413105649942093, "learning_rate": 4.5946472019464725e-06, "loss": 0.682, "step": 5925 }, { "epoch": 0.17301684622346794, "grad_norm": 0.8402950297344207, "learning_rate": 4.59448499594485e-06, "loss": 0.8234, "step": 5926 }, { "epoch": 0.1730460424513153, "grad_norm": 0.7931468783450708, "learning_rate": 4.594322789943228e-06, "loss": 0.7213, "step": 5927 }, { "epoch": 0.17307523867916266, "grad_norm": 0.7430464769361252, "learning_rate": 4.5941605839416066e-06, "loss": 0.7192, "step": 5928 }, { "epoch": 0.17310443490701002, "grad_norm": 0.7129549962264912, "learning_rate": 4.5939983779399846e-06, "loss": 0.671, "step": 5929 }, { "epoch": 0.17313363113485739, "grad_norm": 0.7913480366763492, "learning_rate": 4.5938361719383626e-06, "loss": 0.7268, "step": 5930 }, { "epoch": 0.17316282736270475, "grad_norm": 0.7438859301610427, "learning_rate": 4.5936739659367406e-06, "loss": 0.7128, "step": 5931 }, { "epoch": 0.1731920235905521, "grad_norm": 0.7327289028739483, "learning_rate": 4.593511759935118e-06, "loss": 0.6592, "step": 5932 }, { "epoch": 0.17322121981839947, "grad_norm": 0.8254994036185703, "learning_rate": 4.593349553933496e-06, "loss": 0.7946, "step": 5933 }, { "epoch": 0.17325041604624683, "grad_norm": 0.7508191119024374, "learning_rate": 4.593187347931874e-06, "loss": 0.6717, "step": 5934 }, { "epoch": 0.1732796122740942, "grad_norm": 1.677624057706463, "learning_rate": 4.593025141930252e-06, "loss": 0.6919, "step": 5935 }, { "epoch": 0.17330880850194155, "grad_norm": 0.7435821720660074, "learning_rate": 4.59286293592863e-06, "loss": 0.6923, "step": 5936 }, { "epoch": 0.1733380047297889, "grad_norm": 0.7616511143105464, "learning_rate": 4.592700729927008e-06, "loss": 0.7092, "step": 5937 }, { "epoch": 0.17336720095763627, "grad_norm": 0.7273247413862844, "learning_rate": 4.592538523925386e-06, "loss": 0.6182, "step": 5938 }, { "epoch": 0.17339639718548364, "grad_norm": 0.7285258769684805, "learning_rate": 4.592376317923764e-06, "loss": 0.6709, "step": 5939 }, { "epoch": 0.173425593413331, "grad_norm": 0.7782789215892736, "learning_rate": 4.592214111922142e-06, "loss": 0.6994, "step": 5940 }, { "epoch": 0.17345478964117836, "grad_norm": 0.7624954320668034, "learning_rate": 4.592051905920519e-06, "loss": 0.7549, "step": 5941 }, { "epoch": 0.17348398586902572, "grad_norm": 0.8367707486376906, "learning_rate": 4.591889699918897e-06, "loss": 0.7108, "step": 5942 }, { "epoch": 0.17351318209687308, "grad_norm": 0.7503736311051806, "learning_rate": 4.591727493917275e-06, "loss": 0.6661, "step": 5943 }, { "epoch": 0.17354237832472044, "grad_norm": 0.7537846710040506, "learning_rate": 4.591565287915653e-06, "loss": 0.6699, "step": 5944 }, { "epoch": 0.1735715745525678, "grad_norm": 0.7514944106325905, "learning_rate": 4.591403081914031e-06, "loss": 0.6921, "step": 5945 }, { "epoch": 0.17360077078041516, "grad_norm": 0.7386367575123567, "learning_rate": 4.59124087591241e-06, "loss": 0.72, "step": 5946 }, { "epoch": 0.17362996700826253, "grad_norm": 0.7684657173512449, "learning_rate": 4.591078669910787e-06, "loss": 0.6956, "step": 5947 }, { "epoch": 0.1736591632361099, "grad_norm": 0.7454822120967906, "learning_rate": 4.590916463909165e-06, "loss": 0.6491, "step": 5948 }, { "epoch": 0.17368835946395725, "grad_norm": 0.738633083878345, "learning_rate": 4.590754257907543e-06, "loss": 0.7141, "step": 5949 }, { "epoch": 0.1737175556918046, "grad_norm": 0.8413505896968602, "learning_rate": 4.590592051905921e-06, "loss": 0.7838, "step": 5950 }, { "epoch": 0.17374675191965197, "grad_norm": 0.7390789913581968, "learning_rate": 4.590429845904299e-06, "loss": 0.666, "step": 5951 }, { "epoch": 0.17377594814749933, "grad_norm": 0.6822546394070643, "learning_rate": 4.590267639902677e-06, "loss": 0.594, "step": 5952 }, { "epoch": 0.1738051443753467, "grad_norm": 0.8148417564152652, "learning_rate": 4.590105433901055e-06, "loss": 0.8594, "step": 5953 }, { "epoch": 0.17383434060319405, "grad_norm": 0.7348995275883756, "learning_rate": 4.589943227899433e-06, "loss": 0.6888, "step": 5954 }, { "epoch": 0.17386353683104144, "grad_norm": 0.7804266747188335, "learning_rate": 4.58978102189781e-06, "loss": 0.6514, "step": 5955 }, { "epoch": 0.1738927330588888, "grad_norm": 0.9666305590932033, "learning_rate": 4.589618815896188e-06, "loss": 0.7225, "step": 5956 }, { "epoch": 0.17392192928673617, "grad_norm": 0.7806621587141104, "learning_rate": 4.589456609894566e-06, "loss": 0.6387, "step": 5957 }, { "epoch": 0.17395112551458353, "grad_norm": 0.9382514423881032, "learning_rate": 4.589294403892944e-06, "loss": 0.7741, "step": 5958 }, { "epoch": 0.1739803217424309, "grad_norm": 0.7285299900693412, "learning_rate": 4.589132197891322e-06, "loss": 0.6809, "step": 5959 }, { "epoch": 0.17400951797027825, "grad_norm": 0.7968519070462972, "learning_rate": 4.5889699918897e-06, "loss": 0.759, "step": 5960 }, { "epoch": 0.1740387141981256, "grad_norm": 0.7754284416449614, "learning_rate": 4.588807785888078e-06, "loss": 0.6915, "step": 5961 }, { "epoch": 0.17406791042597297, "grad_norm": 0.7569205806267536, "learning_rate": 4.588645579886456e-06, "loss": 0.7371, "step": 5962 }, { "epoch": 0.17409710665382033, "grad_norm": 0.7158402705003415, "learning_rate": 4.588483373884834e-06, "loss": 0.638, "step": 5963 }, { "epoch": 0.1741263028816677, "grad_norm": 0.7397741582215017, "learning_rate": 4.588321167883211e-06, "loss": 0.7207, "step": 5964 }, { "epoch": 0.17415549910951506, "grad_norm": 0.6775661466317863, "learning_rate": 4.58815896188159e-06, "loss": 0.5671, "step": 5965 }, { "epoch": 0.17418469533736242, "grad_norm": 0.800168351246143, "learning_rate": 4.587996755879968e-06, "loss": 0.7786, "step": 5966 }, { "epoch": 0.17421389156520978, "grad_norm": 0.8497032086842338, "learning_rate": 4.587834549878346e-06, "loss": 0.7354, "step": 5967 }, { "epoch": 0.17424308779305714, "grad_norm": 0.7172597748233898, "learning_rate": 4.587672343876724e-06, "loss": 0.6254, "step": 5968 }, { "epoch": 0.1742722840209045, "grad_norm": 0.6905524433010386, "learning_rate": 4.587510137875102e-06, "loss": 0.5942, "step": 5969 }, { "epoch": 0.17430148024875186, "grad_norm": 0.698131706616741, "learning_rate": 4.5873479318734795e-06, "loss": 0.6136, "step": 5970 }, { "epoch": 0.17433067647659922, "grad_norm": 0.8807387637871671, "learning_rate": 4.5871857258718575e-06, "loss": 0.7333, "step": 5971 }, { "epoch": 0.17435987270444658, "grad_norm": 0.741483387391125, "learning_rate": 4.5870235198702355e-06, "loss": 0.6744, "step": 5972 }, { "epoch": 0.17438906893229394, "grad_norm": 0.7498182672483489, "learning_rate": 4.5868613138686135e-06, "loss": 0.6615, "step": 5973 }, { "epoch": 0.1744182651601413, "grad_norm": 0.7707143445655364, "learning_rate": 4.5866991078669915e-06, "loss": 0.7278, "step": 5974 }, { "epoch": 0.17444746138798867, "grad_norm": 0.7377272106389621, "learning_rate": 4.5865369018653695e-06, "loss": 0.6365, "step": 5975 }, { "epoch": 0.17447665761583603, "grad_norm": 0.8100509397599969, "learning_rate": 4.5863746958637475e-06, "loss": 0.7883, "step": 5976 }, { "epoch": 0.1745058538436834, "grad_norm": 0.7137360708254286, "learning_rate": 4.5862124898621255e-06, "loss": 0.6055, "step": 5977 }, { "epoch": 0.17453505007153075, "grad_norm": 0.6813087564890243, "learning_rate": 4.5860502838605035e-06, "loss": 0.5524, "step": 5978 }, { "epoch": 0.1745642462993781, "grad_norm": 0.7653963209856368, "learning_rate": 4.585888077858881e-06, "loss": 0.632, "step": 5979 }, { "epoch": 0.17459344252722547, "grad_norm": 0.7527918905942044, "learning_rate": 4.585725871857259e-06, "loss": 0.6152, "step": 5980 }, { "epoch": 0.17462263875507283, "grad_norm": 0.7815660975398133, "learning_rate": 4.585563665855637e-06, "loss": 0.6269, "step": 5981 }, { "epoch": 0.1746518349829202, "grad_norm": 0.741285676413808, "learning_rate": 4.585401459854015e-06, "loss": 0.656, "step": 5982 }, { "epoch": 0.17468103121076756, "grad_norm": 0.7073355734001118, "learning_rate": 4.585239253852393e-06, "loss": 0.6139, "step": 5983 }, { "epoch": 0.17471022743861492, "grad_norm": 0.7386560929743753, "learning_rate": 4.5850770478507715e-06, "loss": 0.6286, "step": 5984 }, { "epoch": 0.1747394236664623, "grad_norm": 0.8356082334283027, "learning_rate": 4.584914841849149e-06, "loss": 0.7401, "step": 5985 }, { "epoch": 0.17476861989430967, "grad_norm": 0.7682267885769962, "learning_rate": 4.584752635847527e-06, "loss": 0.667, "step": 5986 }, { "epoch": 0.17479781612215703, "grad_norm": 0.8898631179724223, "learning_rate": 4.584590429845905e-06, "loss": 0.7183, "step": 5987 }, { "epoch": 0.1748270123500044, "grad_norm": 0.7744528095062404, "learning_rate": 4.584428223844283e-06, "loss": 0.6898, "step": 5988 }, { "epoch": 0.17485620857785175, "grad_norm": 0.7576819246608437, "learning_rate": 4.584266017842661e-06, "loss": 0.695, "step": 5989 }, { "epoch": 0.1748854048056991, "grad_norm": 0.8132481644953361, "learning_rate": 4.584103811841039e-06, "loss": 0.7584, "step": 5990 }, { "epoch": 0.17491460103354647, "grad_norm": 0.7902809763195611, "learning_rate": 4.583941605839417e-06, "loss": 0.7418, "step": 5991 }, { "epoch": 0.17494379726139384, "grad_norm": 0.7771609368328396, "learning_rate": 4.583779399837795e-06, "loss": 0.7551, "step": 5992 }, { "epoch": 0.1749729934892412, "grad_norm": 0.7254286331112396, "learning_rate": 4.583617193836172e-06, "loss": 0.6493, "step": 5993 }, { "epoch": 0.17500218971708856, "grad_norm": 0.7753793917008474, "learning_rate": 4.58345498783455e-06, "loss": 0.6517, "step": 5994 }, { "epoch": 0.17503138594493592, "grad_norm": 0.7304893139027095, "learning_rate": 4.583292781832928e-06, "loss": 0.6319, "step": 5995 }, { "epoch": 0.17506058217278328, "grad_norm": 0.7313388188261214, "learning_rate": 4.583130575831306e-06, "loss": 0.6939, "step": 5996 }, { "epoch": 0.17508977840063064, "grad_norm": 0.743772934258285, "learning_rate": 4.582968369829684e-06, "loss": 0.7156, "step": 5997 }, { "epoch": 0.175118974628478, "grad_norm": 0.8180903014262967, "learning_rate": 4.582806163828062e-06, "loss": 0.7401, "step": 5998 }, { "epoch": 0.17514817085632536, "grad_norm": 0.7155769826195233, "learning_rate": 4.58264395782644e-06, "loss": 0.6306, "step": 5999 }, { "epoch": 0.17517736708417272, "grad_norm": 0.7814820691764485, "learning_rate": 4.582481751824818e-06, "loss": 0.7613, "step": 6000 }, { "epoch": 0.17520656331202009, "grad_norm": 0.7434156330459968, "learning_rate": 4.582319545823196e-06, "loss": 0.6161, "step": 6001 }, { "epoch": 0.17523575953986745, "grad_norm": 0.758268897300749, "learning_rate": 4.582157339821573e-06, "loss": 0.7008, "step": 6002 }, { "epoch": 0.1752649557677148, "grad_norm": 0.6882029113474109, "learning_rate": 4.581995133819952e-06, "loss": 0.5969, "step": 6003 }, { "epoch": 0.17529415199556217, "grad_norm": 0.7538185839976502, "learning_rate": 4.58183292781833e-06, "loss": 0.6297, "step": 6004 }, { "epoch": 0.17532334822340953, "grad_norm": 0.8050420018301133, "learning_rate": 4.581670721816708e-06, "loss": 0.6925, "step": 6005 }, { "epoch": 0.1753525444512569, "grad_norm": 0.7242599273485788, "learning_rate": 4.581508515815086e-06, "loss": 0.6794, "step": 6006 }, { "epoch": 0.17538174067910425, "grad_norm": 0.7270281095517517, "learning_rate": 4.581346309813464e-06, "loss": 0.5414, "step": 6007 }, { "epoch": 0.17541093690695161, "grad_norm": 0.7034088249436227, "learning_rate": 4.581184103811841e-06, "loss": 0.5904, "step": 6008 }, { "epoch": 0.17544013313479898, "grad_norm": 0.9241873739809515, "learning_rate": 4.581021897810219e-06, "loss": 0.8001, "step": 6009 }, { "epoch": 0.17546932936264634, "grad_norm": 0.7647565847331314, "learning_rate": 4.580859691808597e-06, "loss": 0.6617, "step": 6010 }, { "epoch": 0.1754985255904937, "grad_norm": 0.7474580836769963, "learning_rate": 4.580697485806975e-06, "loss": 0.7196, "step": 6011 }, { "epoch": 0.17552772181834106, "grad_norm": 0.8073556979212767, "learning_rate": 4.580535279805353e-06, "loss": 0.7062, "step": 6012 }, { "epoch": 0.17555691804618842, "grad_norm": 0.7667604299882468, "learning_rate": 4.580373073803731e-06, "loss": 0.7041, "step": 6013 }, { "epoch": 0.17558611427403578, "grad_norm": 0.7535828507841656, "learning_rate": 4.580210867802109e-06, "loss": 0.6946, "step": 6014 }, { "epoch": 0.17561531050188314, "grad_norm": 0.7361055933462539, "learning_rate": 4.580048661800487e-06, "loss": 0.686, "step": 6015 }, { "epoch": 0.17564450672973053, "grad_norm": 0.7385086401025296, "learning_rate": 4.579886455798865e-06, "loss": 0.6745, "step": 6016 }, { "epoch": 0.1756737029575779, "grad_norm": 0.743065751756865, "learning_rate": 4.579724249797242e-06, "loss": 0.634, "step": 6017 }, { "epoch": 0.17570289918542525, "grad_norm": 0.7725901619222899, "learning_rate": 4.57956204379562e-06, "loss": 0.7616, "step": 6018 }, { "epoch": 0.17573209541327262, "grad_norm": 0.7709163128531548, "learning_rate": 4.579399837793998e-06, "loss": 0.7597, "step": 6019 }, { "epoch": 0.17576129164111998, "grad_norm": 0.6610994924562519, "learning_rate": 4.579237631792376e-06, "loss": 0.5374, "step": 6020 }, { "epoch": 0.17579048786896734, "grad_norm": 0.7730702749453962, "learning_rate": 4.579075425790754e-06, "loss": 0.7341, "step": 6021 }, { "epoch": 0.1758196840968147, "grad_norm": 0.7602102992601915, "learning_rate": 4.578913219789132e-06, "loss": 0.7088, "step": 6022 }, { "epoch": 0.17584888032466206, "grad_norm": 0.7117790762713894, "learning_rate": 4.5787510137875104e-06, "loss": 0.636, "step": 6023 }, { "epoch": 0.17587807655250942, "grad_norm": 0.6988019492322654, "learning_rate": 4.5785888077858884e-06, "loss": 0.6294, "step": 6024 }, { "epoch": 0.17590727278035678, "grad_norm": 0.7360988503629546, "learning_rate": 4.5784266017842664e-06, "loss": 0.6638, "step": 6025 }, { "epoch": 0.17593646900820414, "grad_norm": 0.8121865249257019, "learning_rate": 4.5782643957826444e-06, "loss": 0.7895, "step": 6026 }, { "epoch": 0.1759656652360515, "grad_norm": 1.1188534555900576, "learning_rate": 4.5781021897810225e-06, "loss": 0.8259, "step": 6027 }, { "epoch": 0.17599486146389887, "grad_norm": 0.7420991407584527, "learning_rate": 4.5779399837794005e-06, "loss": 0.6593, "step": 6028 }, { "epoch": 0.17602405769174623, "grad_norm": 0.7200501567274176, "learning_rate": 4.5777777777777785e-06, "loss": 0.6706, "step": 6029 }, { "epoch": 0.1760532539195936, "grad_norm": 0.7322096985811827, "learning_rate": 4.5776155717761565e-06, "loss": 0.5914, "step": 6030 }, { "epoch": 0.17608245014744095, "grad_norm": 0.7517783302935706, "learning_rate": 4.577453365774534e-06, "loss": 0.6669, "step": 6031 }, { "epoch": 0.1761116463752883, "grad_norm": 0.7828252108361811, "learning_rate": 4.577291159772912e-06, "loss": 0.6872, "step": 6032 }, { "epoch": 0.17614084260313567, "grad_norm": 0.7391417291544566, "learning_rate": 4.57712895377129e-06, "loss": 0.6968, "step": 6033 }, { "epoch": 0.17617003883098303, "grad_norm": 0.72918794308049, "learning_rate": 4.576966747769668e-06, "loss": 0.673, "step": 6034 }, { "epoch": 0.1761992350588304, "grad_norm": 0.7986382161465237, "learning_rate": 4.576804541768046e-06, "loss": 0.7995, "step": 6035 }, { "epoch": 0.17622843128667776, "grad_norm": 0.7271069934352051, "learning_rate": 4.576642335766424e-06, "loss": 0.6807, "step": 6036 }, { "epoch": 0.17625762751452512, "grad_norm": 0.7601654804584133, "learning_rate": 4.576480129764802e-06, "loss": 0.6925, "step": 6037 }, { "epoch": 0.17628682374237248, "grad_norm": 0.8705547335583211, "learning_rate": 4.57631792376318e-06, "loss": 0.8554, "step": 6038 }, { "epoch": 0.17631601997021984, "grad_norm": 0.713581343330893, "learning_rate": 4.576155717761558e-06, "loss": 0.6328, "step": 6039 }, { "epoch": 0.1763452161980672, "grad_norm": 0.7353170127083378, "learning_rate": 4.575993511759935e-06, "loss": 0.6734, "step": 6040 }, { "epoch": 0.17637441242591456, "grad_norm": 0.8224506262457126, "learning_rate": 4.575831305758314e-06, "loss": 0.7286, "step": 6041 }, { "epoch": 0.17640360865376192, "grad_norm": 0.7939207224462098, "learning_rate": 4.575669099756692e-06, "loss": 0.8159, "step": 6042 }, { "epoch": 0.17643280488160928, "grad_norm": 0.7516011452222129, "learning_rate": 4.57550689375507e-06, "loss": 0.6763, "step": 6043 }, { "epoch": 0.17646200110945665, "grad_norm": 0.8824043889710029, "learning_rate": 4.575344687753448e-06, "loss": 0.8201, "step": 6044 }, { "epoch": 0.176491197337304, "grad_norm": 0.8046760427349906, "learning_rate": 4.575182481751826e-06, "loss": 0.7774, "step": 6045 }, { "epoch": 0.1765203935651514, "grad_norm": 1.4719127093779738, "learning_rate": 4.575020275750203e-06, "loss": 0.7373, "step": 6046 }, { "epoch": 0.17654958979299876, "grad_norm": 0.9752227903450363, "learning_rate": 4.574858069748581e-06, "loss": 0.7906, "step": 6047 }, { "epoch": 0.17657878602084612, "grad_norm": 0.7944233613117325, "learning_rate": 4.574695863746959e-06, "loss": 0.7387, "step": 6048 }, { "epoch": 0.17660798224869348, "grad_norm": 0.8295568294246318, "learning_rate": 4.574533657745337e-06, "loss": 0.746, "step": 6049 }, { "epoch": 0.17663717847654084, "grad_norm": 0.7399709574730731, "learning_rate": 4.574371451743715e-06, "loss": 0.6893, "step": 6050 }, { "epoch": 0.1766663747043882, "grad_norm": 0.7388294943799742, "learning_rate": 4.574209245742093e-06, "loss": 0.6815, "step": 6051 }, { "epoch": 0.17669557093223556, "grad_norm": 0.7013381194270792, "learning_rate": 4.574047039740471e-06, "loss": 0.6135, "step": 6052 }, { "epoch": 0.17672476716008292, "grad_norm": 0.7814247647399984, "learning_rate": 4.573884833738849e-06, "loss": 0.6944, "step": 6053 }, { "epoch": 0.17675396338793029, "grad_norm": 0.8278969074947135, "learning_rate": 4.573722627737227e-06, "loss": 0.779, "step": 6054 }, { "epoch": 0.17678315961577765, "grad_norm": 0.7063057016634551, "learning_rate": 4.573560421735604e-06, "loss": 0.608, "step": 6055 }, { "epoch": 0.176812355843625, "grad_norm": 0.7279096153857664, "learning_rate": 4.573398215733982e-06, "loss": 0.7114, "step": 6056 }, { "epoch": 0.17684155207147237, "grad_norm": 0.7296713988749881, "learning_rate": 4.57323600973236e-06, "loss": 0.7094, "step": 6057 }, { "epoch": 0.17687074829931973, "grad_norm": 0.7318477846946017, "learning_rate": 4.573073803730738e-06, "loss": 0.6679, "step": 6058 }, { "epoch": 0.1768999445271671, "grad_norm": 0.7535029799355353, "learning_rate": 4.572911597729116e-06, "loss": 0.6771, "step": 6059 }, { "epoch": 0.17692914075501445, "grad_norm": 0.8177314831018635, "learning_rate": 4.572749391727494e-06, "loss": 0.7564, "step": 6060 }, { "epoch": 0.1769583369828618, "grad_norm": 0.7925888678574065, "learning_rate": 4.572587185725872e-06, "loss": 0.7657, "step": 6061 }, { "epoch": 0.17698753321070917, "grad_norm": 0.6968285163382053, "learning_rate": 4.57242497972425e-06, "loss": 0.5587, "step": 6062 }, { "epoch": 0.17701672943855654, "grad_norm": 0.7986878902448019, "learning_rate": 4.572262773722628e-06, "loss": 0.7379, "step": 6063 }, { "epoch": 0.1770459256664039, "grad_norm": 0.8577125323527077, "learning_rate": 4.572100567721006e-06, "loss": 0.7532, "step": 6064 }, { "epoch": 0.17707512189425126, "grad_norm": 0.7223212046814398, "learning_rate": 4.571938361719384e-06, "loss": 0.6766, "step": 6065 }, { "epoch": 0.17710431812209862, "grad_norm": 0.7671931519406376, "learning_rate": 4.571776155717762e-06, "loss": 0.6333, "step": 6066 }, { "epoch": 0.17713351434994598, "grad_norm": 0.7407995762927985, "learning_rate": 4.57161394971614e-06, "loss": 0.6852, "step": 6067 }, { "epoch": 0.17716271057779334, "grad_norm": 0.7567845320424739, "learning_rate": 4.571451743714518e-06, "loss": 0.6601, "step": 6068 }, { "epoch": 0.1771919068056407, "grad_norm": 0.764150368645924, "learning_rate": 4.571289537712895e-06, "loss": 0.7052, "step": 6069 }, { "epoch": 0.17722110303348806, "grad_norm": 0.7207763312185428, "learning_rate": 4.571127331711273e-06, "loss": 0.6228, "step": 6070 }, { "epoch": 0.17725029926133543, "grad_norm": 0.7115841963427771, "learning_rate": 4.570965125709651e-06, "loss": 0.7122, "step": 6071 }, { "epoch": 0.1772794954891828, "grad_norm": 0.7283216295500784, "learning_rate": 4.570802919708029e-06, "loss": 0.6224, "step": 6072 }, { "epoch": 0.17730869171703015, "grad_norm": 0.7881393138717061, "learning_rate": 4.570640713706407e-06, "loss": 0.749, "step": 6073 }, { "epoch": 0.1773378879448775, "grad_norm": 0.888641091268248, "learning_rate": 4.570478507704785e-06, "loss": 0.8332, "step": 6074 }, { "epoch": 0.17736708417272487, "grad_norm": 0.681941750415076, "learning_rate": 4.570316301703163e-06, "loss": 0.5835, "step": 6075 }, { "epoch": 0.17739628040057226, "grad_norm": 0.7439846914879381, "learning_rate": 4.570154095701541e-06, "loss": 0.6737, "step": 6076 }, { "epoch": 0.17742547662841962, "grad_norm": 0.6994031472647828, "learning_rate": 4.569991889699919e-06, "loss": 0.5962, "step": 6077 }, { "epoch": 0.17745467285626698, "grad_norm": 0.7751325826528899, "learning_rate": 4.5698296836982966e-06, "loss": 0.7332, "step": 6078 }, { "epoch": 0.17748386908411434, "grad_norm": 0.9171826882685548, "learning_rate": 4.569667477696675e-06, "loss": 0.7367, "step": 6079 }, { "epoch": 0.1775130653119617, "grad_norm": 0.73464929291889, "learning_rate": 4.569505271695053e-06, "loss": 0.6236, "step": 6080 }, { "epoch": 0.17754226153980907, "grad_norm": 0.7665266529700044, "learning_rate": 4.5693430656934314e-06, "loss": 0.6737, "step": 6081 }, { "epoch": 0.17757145776765643, "grad_norm": 0.7439866347856584, "learning_rate": 4.5691808596918094e-06, "loss": 0.6347, "step": 6082 }, { "epoch": 0.1776006539955038, "grad_norm": 0.8116954259812836, "learning_rate": 4.5690186536901874e-06, "loss": 0.7974, "step": 6083 }, { "epoch": 0.17762985022335115, "grad_norm": 0.7712531848612169, "learning_rate": 4.568856447688565e-06, "loss": 0.7111, "step": 6084 }, { "epoch": 0.1776590464511985, "grad_norm": 0.7234776596363665, "learning_rate": 4.568694241686943e-06, "loss": 0.6522, "step": 6085 }, { "epoch": 0.17768824267904587, "grad_norm": 0.7301034586789908, "learning_rate": 4.568532035685321e-06, "loss": 0.6544, "step": 6086 }, { "epoch": 0.17771743890689323, "grad_norm": 0.7610556223716946, "learning_rate": 4.568369829683699e-06, "loss": 0.6758, "step": 6087 }, { "epoch": 0.1777466351347406, "grad_norm": 0.7507250970215165, "learning_rate": 4.568207623682077e-06, "loss": 0.6671, "step": 6088 }, { "epoch": 0.17777583136258795, "grad_norm": 0.7811070770925658, "learning_rate": 4.568045417680455e-06, "loss": 0.7127, "step": 6089 }, { "epoch": 0.17780502759043532, "grad_norm": 0.7816675523376202, "learning_rate": 4.567883211678833e-06, "loss": 0.6437, "step": 6090 }, { "epoch": 0.17783422381828268, "grad_norm": 0.7118569257555516, "learning_rate": 4.567721005677211e-06, "loss": 0.6533, "step": 6091 }, { "epoch": 0.17786342004613004, "grad_norm": 0.7149839643024714, "learning_rate": 4.567558799675589e-06, "loss": 0.6331, "step": 6092 }, { "epoch": 0.1778926162739774, "grad_norm": 0.7563674048486989, "learning_rate": 4.567396593673966e-06, "loss": 0.7105, "step": 6093 }, { "epoch": 0.17792181250182476, "grad_norm": 0.782492104656731, "learning_rate": 4.567234387672344e-06, "loss": 0.7508, "step": 6094 }, { "epoch": 0.17795100872967212, "grad_norm": 0.8021356465565374, "learning_rate": 4.567072181670722e-06, "loss": 0.7101, "step": 6095 }, { "epoch": 0.17798020495751948, "grad_norm": 0.7358976107462079, "learning_rate": 4.5669099756691e-06, "loss": 0.6125, "step": 6096 }, { "epoch": 0.17800940118536684, "grad_norm": 0.7396033045778121, "learning_rate": 4.566747769667479e-06, "loss": 0.6709, "step": 6097 }, { "epoch": 0.1780385974132142, "grad_norm": 0.782572019285329, "learning_rate": 4.566585563665856e-06, "loss": 0.7513, "step": 6098 }, { "epoch": 0.17806779364106157, "grad_norm": 0.6914296615642169, "learning_rate": 4.566423357664234e-06, "loss": 0.611, "step": 6099 }, { "epoch": 0.17809698986890893, "grad_norm": 0.7328451201580853, "learning_rate": 4.566261151662612e-06, "loss": 0.6701, "step": 6100 }, { "epoch": 0.1781261860967563, "grad_norm": 0.779932576867571, "learning_rate": 4.56609894566099e-06, "loss": 0.7114, "step": 6101 }, { "epoch": 0.17815538232460365, "grad_norm": 0.7511307868543526, "learning_rate": 4.565936739659368e-06, "loss": 0.7006, "step": 6102 }, { "epoch": 0.178184578552451, "grad_norm": 0.7472275233970688, "learning_rate": 4.565774533657746e-06, "loss": 0.6968, "step": 6103 }, { "epoch": 0.17821377478029837, "grad_norm": 0.7288690767850439, "learning_rate": 4.565612327656124e-06, "loss": 0.6381, "step": 6104 }, { "epoch": 0.17824297100814573, "grad_norm": 0.8160841554518382, "learning_rate": 4.565450121654502e-06, "loss": 0.8084, "step": 6105 }, { "epoch": 0.17827216723599312, "grad_norm": 0.7569853882215496, "learning_rate": 4.56528791565288e-06, "loss": 0.6765, "step": 6106 }, { "epoch": 0.17830136346384048, "grad_norm": 0.7797150252678844, "learning_rate": 4.565125709651257e-06, "loss": 0.6711, "step": 6107 }, { "epoch": 0.17833055969168785, "grad_norm": 0.7788738837284549, "learning_rate": 4.564963503649635e-06, "loss": 0.718, "step": 6108 }, { "epoch": 0.1783597559195352, "grad_norm": 0.7556768698053979, "learning_rate": 4.564801297648013e-06, "loss": 0.643, "step": 6109 }, { "epoch": 0.17838895214738257, "grad_norm": 0.8052554166957847, "learning_rate": 4.564639091646391e-06, "loss": 0.7711, "step": 6110 }, { "epoch": 0.17841814837522993, "grad_norm": 0.6987350072737721, "learning_rate": 4.564476885644769e-06, "loss": 0.5851, "step": 6111 }, { "epoch": 0.1784473446030773, "grad_norm": 0.7905409190151237, "learning_rate": 4.564314679643147e-06, "loss": 0.6665, "step": 6112 }, { "epoch": 0.17847654083092465, "grad_norm": 0.7473255193446456, "learning_rate": 4.564152473641525e-06, "loss": 0.6511, "step": 6113 }, { "epoch": 0.178505737058772, "grad_norm": 0.7796693891419342, "learning_rate": 4.563990267639903e-06, "loss": 0.7242, "step": 6114 }, { "epoch": 0.17853493328661937, "grad_norm": 0.7604477486727964, "learning_rate": 4.563828061638281e-06, "loss": 0.7294, "step": 6115 }, { "epoch": 0.17856412951446674, "grad_norm": 0.7453484535083971, "learning_rate": 4.563665855636659e-06, "loss": 0.6669, "step": 6116 }, { "epoch": 0.1785933257423141, "grad_norm": 0.7113684247982874, "learning_rate": 4.563503649635037e-06, "loss": 0.6638, "step": 6117 }, { "epoch": 0.17862252197016146, "grad_norm": 0.7244849243838516, "learning_rate": 4.563341443633415e-06, "loss": 0.6728, "step": 6118 }, { "epoch": 0.17865171819800882, "grad_norm": 0.7643842601325944, "learning_rate": 4.563179237631793e-06, "loss": 0.751, "step": 6119 }, { "epoch": 0.17868091442585618, "grad_norm": 0.7683032655578402, "learning_rate": 4.563017031630171e-06, "loss": 0.6392, "step": 6120 }, { "epoch": 0.17871011065370354, "grad_norm": 0.9284069857181966, "learning_rate": 4.562854825628549e-06, "loss": 0.7404, "step": 6121 }, { "epoch": 0.1787393068815509, "grad_norm": 0.8339773453129551, "learning_rate": 4.562692619626926e-06, "loss": 0.6653, "step": 6122 }, { "epoch": 0.17876850310939826, "grad_norm": 0.695880943531144, "learning_rate": 4.562530413625304e-06, "loss": 0.6112, "step": 6123 }, { "epoch": 0.17879769933724562, "grad_norm": 0.7491579292832224, "learning_rate": 4.562368207623682e-06, "loss": 0.7241, "step": 6124 }, { "epoch": 0.17882689556509299, "grad_norm": 0.7925077410077497, "learning_rate": 4.56220600162206e-06, "loss": 0.7681, "step": 6125 }, { "epoch": 0.17885609179294035, "grad_norm": 0.7320675518935402, "learning_rate": 4.562043795620438e-06, "loss": 0.6913, "step": 6126 }, { "epoch": 0.1788852880207877, "grad_norm": 1.0780532529449245, "learning_rate": 4.561881589618816e-06, "loss": 0.6866, "step": 6127 }, { "epoch": 0.17891448424863507, "grad_norm": 0.7525596322094867, "learning_rate": 4.561719383617194e-06, "loss": 0.7129, "step": 6128 }, { "epoch": 0.17894368047648243, "grad_norm": 0.9039339945995414, "learning_rate": 4.561557177615572e-06, "loss": 0.8349, "step": 6129 }, { "epoch": 0.1789728767043298, "grad_norm": 0.6242841536333709, "learning_rate": 4.5613949716139495e-06, "loss": 0.5266, "step": 6130 }, { "epoch": 0.17900207293217715, "grad_norm": 0.8252813554710955, "learning_rate": 4.5612327656123275e-06, "loss": 0.7406, "step": 6131 }, { "epoch": 0.17903126916002451, "grad_norm": 0.7163856557501378, "learning_rate": 4.5610705596107055e-06, "loss": 0.6973, "step": 6132 }, { "epoch": 0.17906046538787188, "grad_norm": 0.7337561371243507, "learning_rate": 4.5609083536090835e-06, "loss": 0.7222, "step": 6133 }, { "epoch": 0.17908966161571924, "grad_norm": 0.8053332144457791, "learning_rate": 4.5607461476074616e-06, "loss": 0.6334, "step": 6134 }, { "epoch": 0.1791188578435666, "grad_norm": 0.7481813014858958, "learning_rate": 4.56058394160584e-06, "loss": 0.6815, "step": 6135 }, { "epoch": 0.179148054071414, "grad_norm": 0.7326815729710371, "learning_rate": 4.5604217356042176e-06, "loss": 0.6508, "step": 6136 }, { "epoch": 0.17917725029926135, "grad_norm": 0.7304589852787933, "learning_rate": 4.5602595296025956e-06, "loss": 0.6814, "step": 6137 }, { "epoch": 0.1792064465271087, "grad_norm": 0.8231271119048124, "learning_rate": 4.560097323600974e-06, "loss": 0.6857, "step": 6138 }, { "epoch": 0.17923564275495607, "grad_norm": 0.7348335517124113, "learning_rate": 4.559935117599352e-06, "loss": 0.6554, "step": 6139 }, { "epoch": 0.17926483898280343, "grad_norm": 0.7601088336974119, "learning_rate": 4.55977291159773e-06, "loss": 0.74, "step": 6140 }, { "epoch": 0.1792940352106508, "grad_norm": 0.7448491291288324, "learning_rate": 4.559610705596108e-06, "loss": 0.6329, "step": 6141 }, { "epoch": 0.17932323143849815, "grad_norm": 0.7608768191240192, "learning_rate": 4.559448499594486e-06, "loss": 0.686, "step": 6142 }, { "epoch": 0.17935242766634552, "grad_norm": 0.7324005600103153, "learning_rate": 4.559286293592864e-06, "loss": 0.7096, "step": 6143 }, { "epoch": 0.17938162389419288, "grad_norm": 0.8323187391925084, "learning_rate": 4.559124087591242e-06, "loss": 0.8144, "step": 6144 }, { "epoch": 0.17941082012204024, "grad_norm": 0.7212401015446636, "learning_rate": 4.558961881589619e-06, "loss": 0.6815, "step": 6145 }, { "epoch": 0.1794400163498876, "grad_norm": 0.734870380268109, "learning_rate": 4.558799675587997e-06, "loss": 0.6574, "step": 6146 }, { "epoch": 0.17946921257773496, "grad_norm": 0.6952754950148821, "learning_rate": 4.558637469586375e-06, "loss": 0.5551, "step": 6147 }, { "epoch": 0.17949840880558232, "grad_norm": 0.7498215806851611, "learning_rate": 4.558475263584753e-06, "loss": 0.6057, "step": 6148 }, { "epoch": 0.17952760503342968, "grad_norm": 0.7747025542190292, "learning_rate": 4.558313057583131e-06, "loss": 0.7049, "step": 6149 }, { "epoch": 0.17955680126127704, "grad_norm": 0.7995357849288216, "learning_rate": 4.558150851581509e-06, "loss": 0.7275, "step": 6150 }, { "epoch": 0.1795859974891244, "grad_norm": 0.760993496756382, "learning_rate": 4.557988645579887e-06, "loss": 0.6908, "step": 6151 }, { "epoch": 0.17961519371697177, "grad_norm": 0.8124804487577814, "learning_rate": 4.557826439578265e-06, "loss": 0.7898, "step": 6152 }, { "epoch": 0.17964438994481913, "grad_norm": 0.7458185600050588, "learning_rate": 4.557664233576643e-06, "loss": 0.6736, "step": 6153 }, { "epoch": 0.1796735861726665, "grad_norm": 0.7683434760840754, "learning_rate": 4.557502027575021e-06, "loss": 0.7159, "step": 6154 }, { "epoch": 0.17970278240051385, "grad_norm": 0.7908103412300742, "learning_rate": 4.557339821573399e-06, "loss": 0.7254, "step": 6155 }, { "epoch": 0.1797319786283612, "grad_norm": 0.6993417735453472, "learning_rate": 4.557177615571777e-06, "loss": 0.6404, "step": 6156 }, { "epoch": 0.17976117485620857, "grad_norm": 0.8908432046086381, "learning_rate": 4.557015409570155e-06, "loss": 0.7045, "step": 6157 }, { "epoch": 0.17979037108405593, "grad_norm": 0.6919053530617614, "learning_rate": 4.556853203568533e-06, "loss": 0.6713, "step": 6158 }, { "epoch": 0.1798195673119033, "grad_norm": 0.7709240431126054, "learning_rate": 4.556690997566911e-06, "loss": 0.7678, "step": 6159 }, { "epoch": 0.17984876353975066, "grad_norm": 0.9077792663543868, "learning_rate": 4.556528791565288e-06, "loss": 0.7217, "step": 6160 }, { "epoch": 0.17987795976759802, "grad_norm": 0.785945651314759, "learning_rate": 4.556366585563666e-06, "loss": 0.7336, "step": 6161 }, { "epoch": 0.17990715599544538, "grad_norm": 0.8153468948797772, "learning_rate": 4.556204379562044e-06, "loss": 0.7298, "step": 6162 }, { "epoch": 0.17993635222329274, "grad_norm": 0.785323806296315, "learning_rate": 4.556042173560422e-06, "loss": 0.767, "step": 6163 }, { "epoch": 0.1799655484511401, "grad_norm": 0.8242407224395302, "learning_rate": 4.5558799675588e-06, "loss": 0.7718, "step": 6164 }, { "epoch": 0.17999474467898746, "grad_norm": 0.8228732689384142, "learning_rate": 4.555717761557178e-06, "loss": 0.8164, "step": 6165 }, { "epoch": 0.18002394090683485, "grad_norm": 0.7662958902324309, "learning_rate": 4.555555555555556e-06, "loss": 0.701, "step": 6166 }, { "epoch": 0.1800531371346822, "grad_norm": 0.9008354922069763, "learning_rate": 4.555393349553934e-06, "loss": 0.7539, "step": 6167 }, { "epoch": 0.18008233336252957, "grad_norm": 0.7494580689492929, "learning_rate": 4.555231143552311e-06, "loss": 0.6602, "step": 6168 }, { "epoch": 0.18011152959037693, "grad_norm": 0.8092831491673615, "learning_rate": 4.555068937550689e-06, "loss": 0.7776, "step": 6169 }, { "epoch": 0.1801407258182243, "grad_norm": 0.8304103781398077, "learning_rate": 4.554906731549067e-06, "loss": 0.74, "step": 6170 }, { "epoch": 0.18016992204607166, "grad_norm": 0.7585584664022531, "learning_rate": 4.554744525547445e-06, "loss": 0.704, "step": 6171 }, { "epoch": 0.18019911827391902, "grad_norm": 0.8947137946712782, "learning_rate": 4.554582319545823e-06, "loss": 0.7387, "step": 6172 }, { "epoch": 0.18022831450176638, "grad_norm": 0.777789450964439, "learning_rate": 4.554420113544202e-06, "loss": 0.674, "step": 6173 }, { "epoch": 0.18025751072961374, "grad_norm": 0.6717408562455636, "learning_rate": 4.554257907542579e-06, "loss": 0.5394, "step": 6174 }, { "epoch": 0.1802867069574611, "grad_norm": 0.7954972317207503, "learning_rate": 4.554095701540957e-06, "loss": 0.7595, "step": 6175 }, { "epoch": 0.18031590318530846, "grad_norm": 0.6848729524460068, "learning_rate": 4.553933495539335e-06, "loss": 0.6177, "step": 6176 }, { "epoch": 0.18034509941315582, "grad_norm": 0.7655717962487844, "learning_rate": 4.553771289537713e-06, "loss": 0.6478, "step": 6177 }, { "epoch": 0.18037429564100319, "grad_norm": 0.7283340593863785, "learning_rate": 4.553609083536091e-06, "loss": 0.6808, "step": 6178 }, { "epoch": 0.18040349186885055, "grad_norm": 0.6820058234694164, "learning_rate": 4.553446877534469e-06, "loss": 0.5961, "step": 6179 }, { "epoch": 0.1804326880966979, "grad_norm": 0.7212826829652792, "learning_rate": 4.553284671532847e-06, "loss": 0.5886, "step": 6180 }, { "epoch": 0.18046188432454527, "grad_norm": 0.7253339396517134, "learning_rate": 4.553122465531225e-06, "loss": 0.6618, "step": 6181 }, { "epoch": 0.18049108055239263, "grad_norm": 0.7209535142294097, "learning_rate": 4.552960259529603e-06, "loss": 0.6171, "step": 6182 }, { "epoch": 0.18052027678024, "grad_norm": 0.730348268491296, "learning_rate": 4.5527980535279805e-06, "loss": 0.6471, "step": 6183 }, { "epoch": 0.18054947300808735, "grad_norm": 0.797459338495959, "learning_rate": 4.5526358475263585e-06, "loss": 0.6009, "step": 6184 }, { "epoch": 0.1805786692359347, "grad_norm": 0.711848391851877, "learning_rate": 4.5524736415247365e-06, "loss": 0.6118, "step": 6185 }, { "epoch": 0.18060786546378207, "grad_norm": 0.8925700626080476, "learning_rate": 4.5523114355231145e-06, "loss": 0.8131, "step": 6186 }, { "epoch": 0.18063706169162944, "grad_norm": 0.7093882842783604, "learning_rate": 4.5521492295214925e-06, "loss": 0.6705, "step": 6187 }, { "epoch": 0.1806662579194768, "grad_norm": 0.7441220955311592, "learning_rate": 4.5519870235198705e-06, "loss": 0.7272, "step": 6188 }, { "epoch": 0.18069545414732416, "grad_norm": 0.7599704848027758, "learning_rate": 4.5518248175182485e-06, "loss": 0.7051, "step": 6189 }, { "epoch": 0.18072465037517152, "grad_norm": 0.8080130212914958, "learning_rate": 4.5516626115166265e-06, "loss": 0.8517, "step": 6190 }, { "epoch": 0.18075384660301888, "grad_norm": 0.8147192440030195, "learning_rate": 4.5515004055150046e-06, "loss": 0.7088, "step": 6191 }, { "epoch": 0.18078304283086624, "grad_norm": 0.8998761786493493, "learning_rate": 4.5513381995133826e-06, "loss": 0.6942, "step": 6192 }, { "epoch": 0.1808122390587136, "grad_norm": 0.7412496154914643, "learning_rate": 4.5511759935117606e-06, "loss": 0.6308, "step": 6193 }, { "epoch": 0.18084143528656096, "grad_norm": 0.7293487228375776, "learning_rate": 4.5510137875101386e-06, "loss": 0.6842, "step": 6194 }, { "epoch": 0.18087063151440833, "grad_norm": 0.8654097763913844, "learning_rate": 4.550851581508517e-06, "loss": 0.7645, "step": 6195 }, { "epoch": 0.18089982774225571, "grad_norm": 0.7130589981595827, "learning_rate": 4.550689375506895e-06, "loss": 0.6433, "step": 6196 }, { "epoch": 0.18092902397010308, "grad_norm": 1.2264169758818289, "learning_rate": 4.550527169505273e-06, "loss": 0.8299, "step": 6197 }, { "epoch": 0.18095822019795044, "grad_norm": 0.8375518750780034, "learning_rate": 4.55036496350365e-06, "loss": 0.7373, "step": 6198 }, { "epoch": 0.1809874164257978, "grad_norm": 1.0410476686762713, "learning_rate": 4.550202757502028e-06, "loss": 0.8108, "step": 6199 }, { "epoch": 0.18101661265364516, "grad_norm": 0.79809748413494, "learning_rate": 4.550040551500406e-06, "loss": 0.7279, "step": 6200 }, { "epoch": 0.18104580888149252, "grad_norm": 0.9994438187796444, "learning_rate": 4.549878345498784e-06, "loss": 0.6972, "step": 6201 }, { "epoch": 0.18107500510933988, "grad_norm": 0.7374676926878367, "learning_rate": 4.549716139497162e-06, "loss": 0.6913, "step": 6202 }, { "epoch": 0.18110420133718724, "grad_norm": 0.8812671792708446, "learning_rate": 4.54955393349554e-06, "loss": 0.6891, "step": 6203 }, { "epoch": 0.1811333975650346, "grad_norm": 0.76275548709431, "learning_rate": 4.549391727493918e-06, "loss": 0.6976, "step": 6204 }, { "epoch": 0.18116259379288197, "grad_norm": 0.723581470293222, "learning_rate": 4.549229521492296e-06, "loss": 0.6159, "step": 6205 }, { "epoch": 0.18119179002072933, "grad_norm": 0.7527784546976666, "learning_rate": 4.549067315490673e-06, "loss": 0.6918, "step": 6206 }, { "epoch": 0.1812209862485767, "grad_norm": 0.7889052530543514, "learning_rate": 4.548905109489051e-06, "loss": 0.7271, "step": 6207 }, { "epoch": 0.18125018247642405, "grad_norm": 0.6818937850435929, "learning_rate": 4.548742903487429e-06, "loss": 0.5831, "step": 6208 }, { "epoch": 0.1812793787042714, "grad_norm": 0.6900669011658342, "learning_rate": 4.548580697485807e-06, "loss": 0.6165, "step": 6209 }, { "epoch": 0.18130857493211877, "grad_norm": 0.8075789958427251, "learning_rate": 4.548418491484185e-06, "loss": 0.7981, "step": 6210 }, { "epoch": 0.18133777115996613, "grad_norm": 0.7691299839388962, "learning_rate": 4.548256285482564e-06, "loss": 0.6785, "step": 6211 }, { "epoch": 0.1813669673878135, "grad_norm": 0.747994599599063, "learning_rate": 4.548094079480941e-06, "loss": 0.6911, "step": 6212 }, { "epoch": 0.18139616361566085, "grad_norm": 0.706574580500827, "learning_rate": 4.547931873479319e-06, "loss": 0.608, "step": 6213 }, { "epoch": 0.18142535984350822, "grad_norm": 0.8846004932328777, "learning_rate": 4.547769667477697e-06, "loss": 0.6737, "step": 6214 }, { "epoch": 0.18145455607135558, "grad_norm": 0.8549651019483523, "learning_rate": 4.547607461476075e-06, "loss": 0.7623, "step": 6215 }, { "epoch": 0.18148375229920294, "grad_norm": 0.6626934511812078, "learning_rate": 4.547445255474453e-06, "loss": 0.5395, "step": 6216 }, { "epoch": 0.1815129485270503, "grad_norm": 0.7262242969677885, "learning_rate": 4.547283049472831e-06, "loss": 0.6613, "step": 6217 }, { "epoch": 0.18154214475489766, "grad_norm": 0.7906004728005207, "learning_rate": 4.547120843471209e-06, "loss": 0.8013, "step": 6218 }, { "epoch": 0.18157134098274502, "grad_norm": 0.7885492158375432, "learning_rate": 4.546958637469587e-06, "loss": 0.6807, "step": 6219 }, { "epoch": 0.18160053721059238, "grad_norm": 0.7929092692948911, "learning_rate": 4.546796431467965e-06, "loss": 0.7931, "step": 6220 }, { "epoch": 0.18162973343843974, "grad_norm": 0.751395910709638, "learning_rate": 4.546634225466342e-06, "loss": 0.653, "step": 6221 }, { "epoch": 0.1816589296662871, "grad_norm": 0.812909472919796, "learning_rate": 4.54647201946472e-06, "loss": 0.7666, "step": 6222 }, { "epoch": 0.18168812589413447, "grad_norm": 0.7340053766908395, "learning_rate": 4.546309813463098e-06, "loss": 0.6151, "step": 6223 }, { "epoch": 0.18171732212198183, "grad_norm": 0.7992788637342433, "learning_rate": 4.546147607461476e-06, "loss": 0.7104, "step": 6224 }, { "epoch": 0.1817465183498292, "grad_norm": 0.7823496613197891, "learning_rate": 4.545985401459854e-06, "loss": 0.6998, "step": 6225 }, { "epoch": 0.18177571457767655, "grad_norm": 0.7387923260868622, "learning_rate": 4.545823195458232e-06, "loss": 0.6202, "step": 6226 }, { "epoch": 0.18180491080552394, "grad_norm": 1.105784884518087, "learning_rate": 4.54566098945661e-06, "loss": 0.7706, "step": 6227 }, { "epoch": 0.1818341070333713, "grad_norm": 0.7128105103994723, "learning_rate": 4.545498783454988e-06, "loss": 0.6372, "step": 6228 }, { "epoch": 0.18186330326121866, "grad_norm": 0.8041174042911083, "learning_rate": 4.545336577453366e-06, "loss": 0.7353, "step": 6229 }, { "epoch": 0.18189249948906602, "grad_norm": 0.790921276962021, "learning_rate": 4.545174371451744e-06, "loss": 0.7068, "step": 6230 }, { "epoch": 0.18192169571691338, "grad_norm": 0.7098528796485553, "learning_rate": 4.545012165450122e-06, "loss": 0.5632, "step": 6231 }, { "epoch": 0.18195089194476075, "grad_norm": 0.8026903864444351, "learning_rate": 4.5448499594485e-06, "loss": 0.7562, "step": 6232 }, { "epoch": 0.1819800881726081, "grad_norm": 0.92819844081524, "learning_rate": 4.544687753446878e-06, "loss": 0.6416, "step": 6233 }, { "epoch": 0.18200928440045547, "grad_norm": 0.7960245624160305, "learning_rate": 4.544525547445256e-06, "loss": 0.7471, "step": 6234 }, { "epoch": 0.18203848062830283, "grad_norm": 0.7090824602060488, "learning_rate": 4.544363341443634e-06, "loss": 0.6236, "step": 6235 }, { "epoch": 0.1820676768561502, "grad_norm": 0.7832507063577082, "learning_rate": 4.5442011354420115e-06, "loss": 0.713, "step": 6236 }, { "epoch": 0.18209687308399755, "grad_norm": 0.7679426699142075, "learning_rate": 4.5440389294403895e-06, "loss": 0.6706, "step": 6237 }, { "epoch": 0.1821260693118449, "grad_norm": 0.7404351925472296, "learning_rate": 4.5438767234387675e-06, "loss": 0.6449, "step": 6238 }, { "epoch": 0.18215526553969227, "grad_norm": 0.8198705933630863, "learning_rate": 4.5437145174371455e-06, "loss": 0.7017, "step": 6239 }, { "epoch": 0.18218446176753963, "grad_norm": 0.7611280306263397, "learning_rate": 4.5435523114355235e-06, "loss": 0.6834, "step": 6240 }, { "epoch": 0.182213657995387, "grad_norm": 0.7788006124953629, "learning_rate": 4.5433901054339015e-06, "loss": 0.6487, "step": 6241 }, { "epoch": 0.18224285422323436, "grad_norm": 0.9283218111907353, "learning_rate": 4.5432278994322795e-06, "loss": 0.7601, "step": 6242 }, { "epoch": 0.18227205045108172, "grad_norm": 0.8741059870218818, "learning_rate": 4.5430656934306575e-06, "loss": 0.7008, "step": 6243 }, { "epoch": 0.18230124667892908, "grad_norm": 0.6980905477276603, "learning_rate": 4.542903487429035e-06, "loss": 0.5947, "step": 6244 }, { "epoch": 0.18233044290677644, "grad_norm": 0.6918458869265381, "learning_rate": 4.542741281427413e-06, "loss": 0.6145, "step": 6245 }, { "epoch": 0.1823596391346238, "grad_norm": 0.7402817174202622, "learning_rate": 4.542579075425791e-06, "loss": 0.711, "step": 6246 }, { "epoch": 0.18238883536247116, "grad_norm": 0.7492143514993608, "learning_rate": 4.542416869424169e-06, "loss": 0.6772, "step": 6247 }, { "epoch": 0.18241803159031852, "grad_norm": 0.6909430856109504, "learning_rate": 4.542254663422547e-06, "loss": 0.6062, "step": 6248 }, { "epoch": 0.18244722781816589, "grad_norm": 0.7371856305424916, "learning_rate": 4.5420924574209256e-06, "loss": 0.6404, "step": 6249 }, { "epoch": 0.18247642404601325, "grad_norm": 0.7005666813522553, "learning_rate": 4.541930251419303e-06, "loss": 0.6563, "step": 6250 }, { "epoch": 0.1825056202738606, "grad_norm": 0.7182642103715248, "learning_rate": 4.541768045417681e-06, "loss": 0.6032, "step": 6251 }, { "epoch": 0.18253481650170797, "grad_norm": 0.8289024433003379, "learning_rate": 4.541605839416059e-06, "loss": 0.7286, "step": 6252 }, { "epoch": 0.18256401272955533, "grad_norm": 0.810745321820002, "learning_rate": 4.541443633414437e-06, "loss": 0.6938, "step": 6253 }, { "epoch": 0.1825932089574027, "grad_norm": 0.7688757611899948, "learning_rate": 4.541281427412815e-06, "loss": 0.7369, "step": 6254 }, { "epoch": 0.18262240518525005, "grad_norm": 1.1943961593379062, "learning_rate": 4.541119221411193e-06, "loss": 0.6823, "step": 6255 }, { "epoch": 0.18265160141309741, "grad_norm": 0.7596500343744208, "learning_rate": 4.540957015409571e-06, "loss": 0.684, "step": 6256 }, { "epoch": 0.1826807976409448, "grad_norm": 0.7410409263794359, "learning_rate": 4.540794809407949e-06, "loss": 0.6495, "step": 6257 }, { "epoch": 0.18270999386879216, "grad_norm": 0.7465526540108889, "learning_rate": 4.540632603406327e-06, "loss": 0.6535, "step": 6258 }, { "epoch": 0.18273919009663953, "grad_norm": 0.7525404205120202, "learning_rate": 4.540470397404704e-06, "loss": 0.6523, "step": 6259 }, { "epoch": 0.1827683863244869, "grad_norm": 0.6988185002840498, "learning_rate": 4.540308191403082e-06, "loss": 0.6819, "step": 6260 }, { "epoch": 0.18279758255233425, "grad_norm": 0.6861541546425372, "learning_rate": 4.54014598540146e-06, "loss": 0.5791, "step": 6261 }, { "epoch": 0.1828267787801816, "grad_norm": 0.7925621144177137, "learning_rate": 4.539983779399838e-06, "loss": 0.7234, "step": 6262 }, { "epoch": 0.18285597500802897, "grad_norm": 0.7958604870906111, "learning_rate": 4.539821573398216e-06, "loss": 0.7794, "step": 6263 }, { "epoch": 0.18288517123587633, "grad_norm": 0.7493083381939211, "learning_rate": 4.539659367396594e-06, "loss": 0.7106, "step": 6264 }, { "epoch": 0.1829143674637237, "grad_norm": 0.7552732107632588, "learning_rate": 4.539497161394972e-06, "loss": 0.7051, "step": 6265 }, { "epoch": 0.18294356369157105, "grad_norm": 0.7700991810107225, "learning_rate": 4.53933495539335e-06, "loss": 0.7592, "step": 6266 }, { "epoch": 0.18297275991941842, "grad_norm": 0.8521190243719066, "learning_rate": 4.539172749391728e-06, "loss": 0.8522, "step": 6267 }, { "epoch": 0.18300195614726578, "grad_norm": 0.8042161414266631, "learning_rate": 4.539010543390106e-06, "loss": 0.7125, "step": 6268 }, { "epoch": 0.18303115237511314, "grad_norm": 0.7872412078730304, "learning_rate": 4.538848337388484e-06, "loss": 0.6856, "step": 6269 }, { "epoch": 0.1830603486029605, "grad_norm": 0.8019722214413915, "learning_rate": 4.538686131386862e-06, "loss": 0.6675, "step": 6270 }, { "epoch": 0.18308954483080786, "grad_norm": 0.7648709770594665, "learning_rate": 4.53852392538524e-06, "loss": 0.7291, "step": 6271 }, { "epoch": 0.18311874105865522, "grad_norm": 0.9075176950036627, "learning_rate": 4.538361719383618e-06, "loss": 0.7593, "step": 6272 }, { "epoch": 0.18314793728650258, "grad_norm": 0.7182566188001368, "learning_rate": 4.538199513381995e-06, "loss": 0.6315, "step": 6273 }, { "epoch": 0.18317713351434994, "grad_norm": 0.7933644457785707, "learning_rate": 4.538037307380373e-06, "loss": 0.7594, "step": 6274 }, { "epoch": 0.1832063297421973, "grad_norm": 0.7891622412991002, "learning_rate": 4.537875101378751e-06, "loss": 0.7753, "step": 6275 }, { "epoch": 0.18323552597004467, "grad_norm": 0.8048986451462672, "learning_rate": 4.537712895377129e-06, "loss": 0.7657, "step": 6276 }, { "epoch": 0.18326472219789203, "grad_norm": 0.7615692319434615, "learning_rate": 4.537550689375507e-06, "loss": 0.6617, "step": 6277 }, { "epoch": 0.1832939184257394, "grad_norm": 0.8197348610458752, "learning_rate": 4.537388483373885e-06, "loss": 0.7391, "step": 6278 }, { "epoch": 0.18332311465358675, "grad_norm": 0.7460101659131532, "learning_rate": 4.537226277372263e-06, "loss": 0.6115, "step": 6279 }, { "epoch": 0.1833523108814341, "grad_norm": 0.8232883623704486, "learning_rate": 4.537064071370641e-06, "loss": 0.7464, "step": 6280 }, { "epoch": 0.18338150710928147, "grad_norm": 0.8430281217321433, "learning_rate": 4.536901865369019e-06, "loss": 0.7868, "step": 6281 }, { "epoch": 0.18341070333712883, "grad_norm": 31.816379814705986, "learning_rate": 4.536739659367396e-06, "loss": 1.1475, "step": 6282 }, { "epoch": 0.1834398995649762, "grad_norm": 0.7894789382219283, "learning_rate": 4.536577453365774e-06, "loss": 0.736, "step": 6283 }, { "epoch": 0.18346909579282356, "grad_norm": 0.8806037766834216, "learning_rate": 4.536415247364152e-06, "loss": 0.6412, "step": 6284 }, { "epoch": 0.18349829202067092, "grad_norm": 0.7071021024086687, "learning_rate": 4.53625304136253e-06, "loss": 0.5939, "step": 6285 }, { "epoch": 0.18352748824851828, "grad_norm": 0.8030371657634184, "learning_rate": 4.536090835360909e-06, "loss": 0.703, "step": 6286 }, { "epoch": 0.18355668447636567, "grad_norm": 0.9157933114987967, "learning_rate": 4.535928629359287e-06, "loss": 0.6913, "step": 6287 }, { "epoch": 0.18358588070421303, "grad_norm": 0.8207040320832072, "learning_rate": 4.5357664233576644e-06, "loss": 0.7688, "step": 6288 }, { "epoch": 0.1836150769320604, "grad_norm": 0.7411664371799008, "learning_rate": 4.5356042173560424e-06, "loss": 0.7024, "step": 6289 }, { "epoch": 0.18364427315990775, "grad_norm": 0.678312992240985, "learning_rate": 4.5354420113544205e-06, "loss": 0.5822, "step": 6290 }, { "epoch": 0.1836734693877551, "grad_norm": 0.9771590819897812, "learning_rate": 4.5352798053527985e-06, "loss": 0.7127, "step": 6291 }, { "epoch": 0.18370266561560247, "grad_norm": 0.7405714519386333, "learning_rate": 4.5351175993511765e-06, "loss": 0.7015, "step": 6292 }, { "epoch": 0.18373186184344983, "grad_norm": 0.7458361918901351, "learning_rate": 4.5349553933495545e-06, "loss": 0.6856, "step": 6293 }, { "epoch": 0.1837610580712972, "grad_norm": 0.7770887461523727, "learning_rate": 4.5347931873479325e-06, "loss": 0.7311, "step": 6294 }, { "epoch": 0.18379025429914456, "grad_norm": 0.7121416434281598, "learning_rate": 4.5346309813463105e-06, "loss": 0.6708, "step": 6295 }, { "epoch": 0.18381945052699192, "grad_norm": 0.8299822466285607, "learning_rate": 4.5344687753446885e-06, "loss": 0.6562, "step": 6296 }, { "epoch": 0.18384864675483928, "grad_norm": 0.7748422394010179, "learning_rate": 4.534306569343066e-06, "loss": 0.7498, "step": 6297 }, { "epoch": 0.18387784298268664, "grad_norm": 0.7233860178711303, "learning_rate": 4.534144363341444e-06, "loss": 0.6544, "step": 6298 }, { "epoch": 0.183907039210534, "grad_norm": 0.753003881026648, "learning_rate": 4.533982157339822e-06, "loss": 0.7362, "step": 6299 }, { "epoch": 0.18393623543838136, "grad_norm": 0.7880216403020772, "learning_rate": 4.5338199513382e-06, "loss": 0.7692, "step": 6300 }, { "epoch": 0.18396543166622872, "grad_norm": 0.7664353478570616, "learning_rate": 4.533657745336578e-06, "loss": 0.7062, "step": 6301 }, { "epoch": 0.18399462789407608, "grad_norm": 0.8284380241418579, "learning_rate": 4.533495539334956e-06, "loss": 0.7608, "step": 6302 }, { "epoch": 0.18402382412192345, "grad_norm": 0.7896224573118206, "learning_rate": 4.533333333333334e-06, "loss": 0.7015, "step": 6303 }, { "epoch": 0.1840530203497708, "grad_norm": 0.6937677526598974, "learning_rate": 4.533171127331712e-06, "loss": 0.6053, "step": 6304 }, { "epoch": 0.18408221657761817, "grad_norm": 0.6405434527942265, "learning_rate": 4.53300892133009e-06, "loss": 0.5342, "step": 6305 }, { "epoch": 0.18411141280546553, "grad_norm": 0.8352557866445207, "learning_rate": 4.532846715328468e-06, "loss": 0.7454, "step": 6306 }, { "epoch": 0.1841406090333129, "grad_norm": 0.7609451816582492, "learning_rate": 4.532684509326846e-06, "loss": 0.7212, "step": 6307 }, { "epoch": 0.18416980526116025, "grad_norm": 2.3091362803975652, "learning_rate": 4.532522303325224e-06, "loss": 0.7337, "step": 6308 }, { "epoch": 0.1841990014890076, "grad_norm": 0.790968322685979, "learning_rate": 4.532360097323602e-06, "loss": 0.656, "step": 6309 }, { "epoch": 0.18422819771685497, "grad_norm": 0.7900390437947121, "learning_rate": 4.53219789132198e-06, "loss": 0.7176, "step": 6310 }, { "epoch": 0.18425739394470234, "grad_norm": 0.7808313622881697, "learning_rate": 4.532035685320357e-06, "loss": 0.7509, "step": 6311 }, { "epoch": 0.1842865901725497, "grad_norm": 0.789924117714263, "learning_rate": 4.531873479318735e-06, "loss": 0.6952, "step": 6312 }, { "epoch": 0.18431578640039706, "grad_norm": 0.7592681944779168, "learning_rate": 4.531711273317113e-06, "loss": 0.6733, "step": 6313 }, { "epoch": 0.18434498262824442, "grad_norm": 0.8213568764347167, "learning_rate": 4.531549067315491e-06, "loss": 0.6836, "step": 6314 }, { "epoch": 0.18437417885609178, "grad_norm": 0.8088504716687717, "learning_rate": 4.531386861313869e-06, "loss": 0.6317, "step": 6315 }, { "epoch": 0.18440337508393914, "grad_norm": 0.7878029872215503, "learning_rate": 4.531224655312247e-06, "loss": 0.7499, "step": 6316 }, { "epoch": 0.18443257131178653, "grad_norm": 0.7881759283186849, "learning_rate": 4.531062449310625e-06, "loss": 0.6499, "step": 6317 }, { "epoch": 0.1844617675396339, "grad_norm": 0.7610413638822361, "learning_rate": 4.530900243309003e-06, "loss": 0.6905, "step": 6318 }, { "epoch": 0.18449096376748125, "grad_norm": 0.7661613355465644, "learning_rate": 4.530738037307381e-06, "loss": 0.6806, "step": 6319 }, { "epoch": 0.18452015999532861, "grad_norm": 0.7752677241225467, "learning_rate": 4.530575831305758e-06, "loss": 0.7398, "step": 6320 }, { "epoch": 0.18454935622317598, "grad_norm": 0.772333321001024, "learning_rate": 4.530413625304136e-06, "loss": 0.7123, "step": 6321 }, { "epoch": 0.18457855245102334, "grad_norm": 0.7973127180684353, "learning_rate": 4.530251419302514e-06, "loss": 0.6251, "step": 6322 }, { "epoch": 0.1846077486788707, "grad_norm": 0.8027993361181982, "learning_rate": 4.530089213300892e-06, "loss": 0.6565, "step": 6323 }, { "epoch": 0.18463694490671806, "grad_norm": 0.7709295827770617, "learning_rate": 4.529927007299271e-06, "loss": 0.7018, "step": 6324 }, { "epoch": 0.18466614113456542, "grad_norm": 0.7707260659953568, "learning_rate": 4.529764801297649e-06, "loss": 0.7229, "step": 6325 }, { "epoch": 0.18469533736241278, "grad_norm": 0.7462193717188693, "learning_rate": 4.529602595296026e-06, "loss": 0.6645, "step": 6326 }, { "epoch": 0.18472453359026014, "grad_norm": 0.7561686863205256, "learning_rate": 4.529440389294404e-06, "loss": 0.7443, "step": 6327 }, { "epoch": 0.1847537298181075, "grad_norm": 0.7315454962362941, "learning_rate": 4.529278183292782e-06, "loss": 0.6812, "step": 6328 }, { "epoch": 0.18478292604595487, "grad_norm": 0.8508526838918838, "learning_rate": 4.52911597729116e-06, "loss": 0.8069, "step": 6329 }, { "epoch": 0.18481212227380223, "grad_norm": 0.8225500142639998, "learning_rate": 4.528953771289538e-06, "loss": 0.7567, "step": 6330 }, { "epoch": 0.1848413185016496, "grad_norm": 0.7317308213883859, "learning_rate": 4.528791565287916e-06, "loss": 0.622, "step": 6331 }, { "epoch": 0.18487051472949695, "grad_norm": 0.7528144840744013, "learning_rate": 4.528629359286294e-06, "loss": 0.6546, "step": 6332 }, { "epoch": 0.1848997109573443, "grad_norm": 0.8099159264679001, "learning_rate": 4.528467153284672e-06, "loss": 0.6753, "step": 6333 }, { "epoch": 0.18492890718519167, "grad_norm": 0.7705910457281153, "learning_rate": 4.52830494728305e-06, "loss": 0.7305, "step": 6334 }, { "epoch": 0.18495810341303903, "grad_norm": 0.7825124692062042, "learning_rate": 4.528142741281427e-06, "loss": 0.7616, "step": 6335 }, { "epoch": 0.1849872996408864, "grad_norm": 0.7032191779223513, "learning_rate": 4.527980535279805e-06, "loss": 0.6587, "step": 6336 }, { "epoch": 0.18501649586873375, "grad_norm": 0.708453974864573, "learning_rate": 4.527818329278183e-06, "loss": 0.6531, "step": 6337 }, { "epoch": 0.18504569209658112, "grad_norm": 0.7559657669861266, "learning_rate": 4.527656123276561e-06, "loss": 0.672, "step": 6338 }, { "epoch": 0.18507488832442848, "grad_norm": 0.7740664342974898, "learning_rate": 4.527493917274939e-06, "loss": 0.6687, "step": 6339 }, { "epoch": 0.18510408455227584, "grad_norm": 0.7281442075394556, "learning_rate": 4.527331711273317e-06, "loss": 0.6404, "step": 6340 }, { "epoch": 0.1851332807801232, "grad_norm": 0.7587453941625807, "learning_rate": 4.527169505271695e-06, "loss": 0.6677, "step": 6341 }, { "epoch": 0.18516247700797056, "grad_norm": 0.8671075253839777, "learning_rate": 4.527007299270073e-06, "loss": 0.724, "step": 6342 }, { "epoch": 0.18519167323581792, "grad_norm": 0.8251661803253533, "learning_rate": 4.5268450932684514e-06, "loss": 0.7398, "step": 6343 }, { "epoch": 0.18522086946366528, "grad_norm": 0.8385810576201299, "learning_rate": 4.5266828872668294e-06, "loss": 0.7562, "step": 6344 }, { "epoch": 0.18525006569151264, "grad_norm": 0.7845058174170289, "learning_rate": 4.5265206812652074e-06, "loss": 0.6512, "step": 6345 }, { "epoch": 0.18527926191936, "grad_norm": 0.8115604652369538, "learning_rate": 4.5263584752635854e-06, "loss": 0.6758, "step": 6346 }, { "epoch": 0.1853084581472074, "grad_norm": 0.7412580664100148, "learning_rate": 4.5261962692619635e-06, "loss": 0.6486, "step": 6347 }, { "epoch": 0.18533765437505476, "grad_norm": 0.7341814742693223, "learning_rate": 4.5260340632603415e-06, "loss": 0.6904, "step": 6348 }, { "epoch": 0.18536685060290212, "grad_norm": 0.7403275065577415, "learning_rate": 4.525871857258719e-06, "loss": 0.6818, "step": 6349 }, { "epoch": 0.18539604683074948, "grad_norm": 0.7553257595034759, "learning_rate": 4.525709651257097e-06, "loss": 0.6878, "step": 6350 }, { "epoch": 0.18542524305859684, "grad_norm": 0.7694670679785174, "learning_rate": 4.525547445255475e-06, "loss": 0.7241, "step": 6351 }, { "epoch": 0.1854544392864442, "grad_norm": 0.7866413313095499, "learning_rate": 4.525385239253853e-06, "loss": 0.704, "step": 6352 }, { "epoch": 0.18548363551429156, "grad_norm": 0.7322854374545502, "learning_rate": 4.525223033252231e-06, "loss": 0.6791, "step": 6353 }, { "epoch": 0.18551283174213892, "grad_norm": 0.7505393210020836, "learning_rate": 4.525060827250609e-06, "loss": 0.6738, "step": 6354 }, { "epoch": 0.18554202796998628, "grad_norm": 0.953638688368246, "learning_rate": 4.524898621248987e-06, "loss": 0.6471, "step": 6355 }, { "epoch": 0.18557122419783365, "grad_norm": 0.9359189243237597, "learning_rate": 4.524736415247365e-06, "loss": 0.6711, "step": 6356 }, { "epoch": 0.185600420425681, "grad_norm": 0.6955975960765141, "learning_rate": 4.524574209245743e-06, "loss": 0.6017, "step": 6357 }, { "epoch": 0.18562961665352837, "grad_norm": 0.7756783166454496, "learning_rate": 4.52441200324412e-06, "loss": 0.6987, "step": 6358 }, { "epoch": 0.18565881288137573, "grad_norm": 0.809662451402123, "learning_rate": 4.524249797242498e-06, "loss": 0.7659, "step": 6359 }, { "epoch": 0.1856880091092231, "grad_norm": 0.7590868291220019, "learning_rate": 4.524087591240876e-06, "loss": 0.6575, "step": 6360 }, { "epoch": 0.18571720533707045, "grad_norm": 0.7523155102009128, "learning_rate": 4.523925385239254e-06, "loss": 0.6819, "step": 6361 }, { "epoch": 0.1857464015649178, "grad_norm": 0.7769963093562752, "learning_rate": 4.523763179237633e-06, "loss": 0.7078, "step": 6362 }, { "epoch": 0.18577559779276517, "grad_norm": 0.7049950866786373, "learning_rate": 4.523600973236011e-06, "loss": 0.5901, "step": 6363 }, { "epoch": 0.18580479402061253, "grad_norm": 0.7854070034851066, "learning_rate": 4.523438767234388e-06, "loss": 0.7486, "step": 6364 }, { "epoch": 0.1858339902484599, "grad_norm": 0.8969235000519317, "learning_rate": 4.523276561232766e-06, "loss": 0.7734, "step": 6365 }, { "epoch": 0.18586318647630726, "grad_norm": 0.8162952995697891, "learning_rate": 4.523114355231144e-06, "loss": 0.7738, "step": 6366 }, { "epoch": 0.18589238270415462, "grad_norm": 0.7728803111176294, "learning_rate": 4.522952149229522e-06, "loss": 0.73, "step": 6367 }, { "epoch": 0.18592157893200198, "grad_norm": 0.7838063386433998, "learning_rate": 4.5227899432279e-06, "loss": 0.7123, "step": 6368 }, { "epoch": 0.18595077515984934, "grad_norm": 0.7413490150482623, "learning_rate": 4.522627737226278e-06, "loss": 0.7355, "step": 6369 }, { "epoch": 0.1859799713876967, "grad_norm": 0.7077693748437188, "learning_rate": 4.522465531224656e-06, "loss": 0.5384, "step": 6370 }, { "epoch": 0.18600916761554406, "grad_norm": 0.7745883308807987, "learning_rate": 4.522303325223034e-06, "loss": 0.7194, "step": 6371 }, { "epoch": 0.18603836384339142, "grad_norm": 0.7177452049639784, "learning_rate": 4.522141119221412e-06, "loss": 0.6331, "step": 6372 }, { "epoch": 0.18606756007123879, "grad_norm": 0.6716994751960875, "learning_rate": 4.521978913219789e-06, "loss": 0.5988, "step": 6373 }, { "epoch": 0.18609675629908615, "grad_norm": 0.7646327778457077, "learning_rate": 4.521816707218167e-06, "loss": 0.7081, "step": 6374 }, { "epoch": 0.1861259525269335, "grad_norm": 0.7557339089978881, "learning_rate": 4.521654501216545e-06, "loss": 0.7388, "step": 6375 }, { "epoch": 0.18615514875478087, "grad_norm": 0.7525407578039324, "learning_rate": 4.521492295214923e-06, "loss": 0.636, "step": 6376 }, { "epoch": 0.18618434498262826, "grad_norm": 0.7250067451282396, "learning_rate": 4.521330089213301e-06, "loss": 0.6187, "step": 6377 }, { "epoch": 0.18621354121047562, "grad_norm": 0.7965880462508129, "learning_rate": 4.521167883211679e-06, "loss": 0.6826, "step": 6378 }, { "epoch": 0.18624273743832298, "grad_norm": 0.799296743934162, "learning_rate": 4.521005677210057e-06, "loss": 0.6741, "step": 6379 }, { "epoch": 0.18627193366617034, "grad_norm": 0.8032909786065462, "learning_rate": 4.520843471208435e-06, "loss": 0.7144, "step": 6380 }, { "epoch": 0.1863011298940177, "grad_norm": 0.8018694422435959, "learning_rate": 4.520681265206813e-06, "loss": 0.7664, "step": 6381 }, { "epoch": 0.18633032612186506, "grad_norm": 0.8009829767624396, "learning_rate": 4.520519059205191e-06, "loss": 0.7969, "step": 6382 }, { "epoch": 0.18635952234971243, "grad_norm": 0.9353679798109891, "learning_rate": 4.520356853203569e-06, "loss": 0.6537, "step": 6383 }, { "epoch": 0.1863887185775598, "grad_norm": 0.8404552557458391, "learning_rate": 4.520194647201947e-06, "loss": 0.7258, "step": 6384 }, { "epoch": 0.18641791480540715, "grad_norm": 0.7213606298036073, "learning_rate": 4.520032441200325e-06, "loss": 0.6922, "step": 6385 }, { "epoch": 0.1864471110332545, "grad_norm": 0.7330810192945539, "learning_rate": 4.519870235198703e-06, "loss": 0.6604, "step": 6386 }, { "epoch": 0.18647630726110187, "grad_norm": 0.7515859626254024, "learning_rate": 4.51970802919708e-06, "loss": 0.674, "step": 6387 }, { "epoch": 0.18650550348894923, "grad_norm": 0.7594886545712591, "learning_rate": 4.519545823195458e-06, "loss": 0.7387, "step": 6388 }, { "epoch": 0.1865346997167966, "grad_norm": 0.7972944899332228, "learning_rate": 4.519383617193836e-06, "loss": 0.7303, "step": 6389 }, { "epoch": 0.18656389594464395, "grad_norm": 0.7635022644889164, "learning_rate": 4.519221411192214e-06, "loss": 0.7217, "step": 6390 }, { "epoch": 0.18659309217249131, "grad_norm": 0.702218456124966, "learning_rate": 4.519059205190592e-06, "loss": 0.6809, "step": 6391 }, { "epoch": 0.18662228840033868, "grad_norm": 0.7060648916794273, "learning_rate": 4.51889699918897e-06, "loss": 0.6455, "step": 6392 }, { "epoch": 0.18665148462818604, "grad_norm": 0.6646100694650342, "learning_rate": 4.518734793187348e-06, "loss": 0.5521, "step": 6393 }, { "epoch": 0.1866806808560334, "grad_norm": 0.710647706033982, "learning_rate": 4.518572587185726e-06, "loss": 0.6699, "step": 6394 }, { "epoch": 0.18670987708388076, "grad_norm": 0.7842284084257797, "learning_rate": 4.518410381184104e-06, "loss": 0.6726, "step": 6395 }, { "epoch": 0.18673907331172812, "grad_norm": 0.7054242386041404, "learning_rate": 4.5182481751824815e-06, "loss": 0.6059, "step": 6396 }, { "epoch": 0.18676826953957548, "grad_norm": 0.7427763453555398, "learning_rate": 4.5180859691808596e-06, "loss": 0.7349, "step": 6397 }, { "epoch": 0.18679746576742284, "grad_norm": 0.821943335133172, "learning_rate": 4.5179237631792376e-06, "loss": 0.6855, "step": 6398 }, { "epoch": 0.1868266619952702, "grad_norm": 0.7505795206327758, "learning_rate": 4.5177615571776156e-06, "loss": 0.7385, "step": 6399 }, { "epoch": 0.18685585822311757, "grad_norm": 0.7456553724046762, "learning_rate": 4.517599351175994e-06, "loss": 0.6799, "step": 6400 }, { "epoch": 0.18688505445096493, "grad_norm": 0.7115233171009797, "learning_rate": 4.5174371451743724e-06, "loss": 0.6085, "step": 6401 }, { "epoch": 0.1869142506788123, "grad_norm": 0.7214910488548304, "learning_rate": 4.51727493917275e-06, "loss": 0.6649, "step": 6402 }, { "epoch": 0.18694344690665965, "grad_norm": 0.7224106307719698, "learning_rate": 4.517112733171128e-06, "loss": 0.6633, "step": 6403 }, { "epoch": 0.186972643134507, "grad_norm": 0.7434661252297654, "learning_rate": 4.516950527169506e-06, "loss": 0.6503, "step": 6404 }, { "epoch": 0.18700183936235437, "grad_norm": 0.7270186283937246, "learning_rate": 4.516788321167884e-06, "loss": 0.7024, "step": 6405 }, { "epoch": 0.18703103559020173, "grad_norm": 0.7680327274452093, "learning_rate": 4.516626115166262e-06, "loss": 0.656, "step": 6406 }, { "epoch": 0.18706023181804912, "grad_norm": 0.7429660503191454, "learning_rate": 4.51646390916464e-06, "loss": 0.6586, "step": 6407 }, { "epoch": 0.18708942804589648, "grad_norm": 0.8841961057395749, "learning_rate": 4.516301703163018e-06, "loss": 0.7502, "step": 6408 }, { "epoch": 0.18711862427374384, "grad_norm": 0.8934942492519115, "learning_rate": 4.516139497161396e-06, "loss": 0.7438, "step": 6409 }, { "epoch": 0.1871478205015912, "grad_norm": 0.7546218342643306, "learning_rate": 4.515977291159774e-06, "loss": 0.6688, "step": 6410 }, { "epoch": 0.18717701672943857, "grad_norm": 0.7338107416829032, "learning_rate": 4.515815085158151e-06, "loss": 0.6535, "step": 6411 }, { "epoch": 0.18720621295728593, "grad_norm": 0.9704791982158841, "learning_rate": 4.515652879156529e-06, "loss": 0.7788, "step": 6412 }, { "epoch": 0.1872354091851333, "grad_norm": 0.792274340045344, "learning_rate": 4.515490673154907e-06, "loss": 0.6621, "step": 6413 }, { "epoch": 0.18726460541298065, "grad_norm": 0.8078710815263371, "learning_rate": 4.515328467153285e-06, "loss": 0.7182, "step": 6414 }, { "epoch": 0.187293801640828, "grad_norm": 0.8521986463680704, "learning_rate": 4.515166261151663e-06, "loss": 0.8158, "step": 6415 }, { "epoch": 0.18732299786867537, "grad_norm": 0.8027048051357265, "learning_rate": 4.515004055150041e-06, "loss": 0.7399, "step": 6416 }, { "epoch": 0.18735219409652273, "grad_norm": 0.8056666969539474, "learning_rate": 4.514841849148419e-06, "loss": 0.7361, "step": 6417 }, { "epoch": 0.1873813903243701, "grad_norm": 0.8522068912156219, "learning_rate": 4.514679643146797e-06, "loss": 0.6893, "step": 6418 }, { "epoch": 0.18741058655221746, "grad_norm": 0.7776444660132853, "learning_rate": 4.514517437145175e-06, "loss": 0.7698, "step": 6419 }, { "epoch": 0.18743978278006482, "grad_norm": 0.8577616207842871, "learning_rate": 4.514355231143553e-06, "loss": 0.7455, "step": 6420 }, { "epoch": 0.18746897900791218, "grad_norm": 0.7559667570525442, "learning_rate": 4.514193025141931e-06, "loss": 0.6574, "step": 6421 }, { "epoch": 0.18749817523575954, "grad_norm": 0.7652148579274105, "learning_rate": 4.514030819140309e-06, "loss": 0.7612, "step": 6422 }, { "epoch": 0.1875273714636069, "grad_norm": 0.8264855808729339, "learning_rate": 4.513868613138687e-06, "loss": 0.7209, "step": 6423 }, { "epoch": 0.18755656769145426, "grad_norm": 0.7562939438914043, "learning_rate": 4.513706407137065e-06, "loss": 0.6947, "step": 6424 }, { "epoch": 0.18758576391930162, "grad_norm": 0.8801731525104155, "learning_rate": 4.513544201135442e-06, "loss": 0.7539, "step": 6425 }, { "epoch": 0.18761496014714898, "grad_norm": 0.7145181456729452, "learning_rate": 4.51338199513382e-06, "loss": 0.6383, "step": 6426 }, { "epoch": 0.18764415637499635, "grad_norm": 0.8656337415208148, "learning_rate": 4.513219789132198e-06, "loss": 0.8211, "step": 6427 }, { "epoch": 0.1876733526028437, "grad_norm": 0.738850550531858, "learning_rate": 4.513057583130576e-06, "loss": 0.7218, "step": 6428 }, { "epoch": 0.18770254883069107, "grad_norm": 0.7753091407433033, "learning_rate": 4.512895377128954e-06, "loss": 0.7737, "step": 6429 }, { "epoch": 0.18773174505853843, "grad_norm": 0.7409837790493464, "learning_rate": 4.512733171127332e-06, "loss": 0.7293, "step": 6430 }, { "epoch": 0.1877609412863858, "grad_norm": 0.7817427189653449, "learning_rate": 4.51257096512571e-06, "loss": 0.6949, "step": 6431 }, { "epoch": 0.18779013751423315, "grad_norm": 0.6994858513569102, "learning_rate": 4.512408759124088e-06, "loss": 0.6024, "step": 6432 }, { "epoch": 0.1878193337420805, "grad_norm": 0.7376275964291112, "learning_rate": 4.512246553122466e-06, "loss": 0.6982, "step": 6433 }, { "epoch": 0.18784852996992787, "grad_norm": 0.6901871784086129, "learning_rate": 4.512084347120843e-06, "loss": 0.594, "step": 6434 }, { "epoch": 0.18787772619777524, "grad_norm": 0.724340040389505, "learning_rate": 4.511922141119221e-06, "loss": 0.6565, "step": 6435 }, { "epoch": 0.1879069224256226, "grad_norm": 0.7690840275039784, "learning_rate": 4.511759935117599e-06, "loss": 0.6663, "step": 6436 }, { "epoch": 0.18793611865346996, "grad_norm": 0.7667176530111411, "learning_rate": 4.511597729115978e-06, "loss": 0.7118, "step": 6437 }, { "epoch": 0.18796531488131735, "grad_norm": 0.735309559502575, "learning_rate": 4.511435523114356e-06, "loss": 0.6514, "step": 6438 }, { "epoch": 0.1879945111091647, "grad_norm": 0.8202891018275527, "learning_rate": 4.511273317112734e-06, "loss": 0.7867, "step": 6439 }, { "epoch": 0.18802370733701207, "grad_norm": 0.6972501876009413, "learning_rate": 4.511111111111111e-06, "loss": 0.5912, "step": 6440 }, { "epoch": 0.18805290356485943, "grad_norm": 0.7624617771576109, "learning_rate": 4.510948905109489e-06, "loss": 0.6977, "step": 6441 }, { "epoch": 0.1880820997927068, "grad_norm": 0.7252332518446953, "learning_rate": 4.510786699107867e-06, "loss": 0.6626, "step": 6442 }, { "epoch": 0.18811129602055415, "grad_norm": 0.7454918195098047, "learning_rate": 4.510624493106245e-06, "loss": 0.7, "step": 6443 }, { "epoch": 0.18814049224840151, "grad_norm": 0.996660320779748, "learning_rate": 4.510462287104623e-06, "loss": 0.8644, "step": 6444 }, { "epoch": 0.18816968847624888, "grad_norm": 0.7193392412001077, "learning_rate": 4.510300081103001e-06, "loss": 0.6255, "step": 6445 }, { "epoch": 0.18819888470409624, "grad_norm": 0.7465559116130446, "learning_rate": 4.510137875101379e-06, "loss": 0.6424, "step": 6446 }, { "epoch": 0.1882280809319436, "grad_norm": 1.631586760672938, "learning_rate": 4.509975669099757e-06, "loss": 0.6618, "step": 6447 }, { "epoch": 0.18825727715979096, "grad_norm": 0.7188276154589921, "learning_rate": 4.509813463098135e-06, "loss": 0.6053, "step": 6448 }, { "epoch": 0.18828647338763832, "grad_norm": 0.740377504329711, "learning_rate": 4.5096512570965125e-06, "loss": 0.6867, "step": 6449 }, { "epoch": 0.18831566961548568, "grad_norm": 0.7879930115692003, "learning_rate": 4.5094890510948905e-06, "loss": 0.78, "step": 6450 }, { "epoch": 0.18834486584333304, "grad_norm": 0.7335412684817663, "learning_rate": 4.5093268450932685e-06, "loss": 0.6799, "step": 6451 }, { "epoch": 0.1883740620711804, "grad_norm": 0.8156611760363434, "learning_rate": 4.5091646390916465e-06, "loss": 0.6797, "step": 6452 }, { "epoch": 0.18840325829902776, "grad_norm": 0.7940400800774082, "learning_rate": 4.5090024330900245e-06, "loss": 0.7652, "step": 6453 }, { "epoch": 0.18843245452687513, "grad_norm": 0.7470324188133464, "learning_rate": 4.5088402270884026e-06, "loss": 0.6739, "step": 6454 }, { "epoch": 0.1884616507547225, "grad_norm": 0.7590569036383411, "learning_rate": 4.5086780210867806e-06, "loss": 0.6848, "step": 6455 }, { "epoch": 0.18849084698256985, "grad_norm": 0.8080058276448929, "learning_rate": 4.5085158150851586e-06, "loss": 0.7166, "step": 6456 }, { "epoch": 0.1885200432104172, "grad_norm": 0.781009295334596, "learning_rate": 4.5083536090835366e-06, "loss": 0.7466, "step": 6457 }, { "epoch": 0.18854923943826457, "grad_norm": 0.8142572830924449, "learning_rate": 4.508191403081915e-06, "loss": 0.6723, "step": 6458 }, { "epoch": 0.18857843566611193, "grad_norm": 0.7452341021515283, "learning_rate": 4.508029197080293e-06, "loss": 0.6892, "step": 6459 }, { "epoch": 0.1886076318939593, "grad_norm": 0.7864815372386382, "learning_rate": 4.507866991078671e-06, "loss": 0.6796, "step": 6460 }, { "epoch": 0.18863682812180665, "grad_norm": 0.7646878818314646, "learning_rate": 4.507704785077049e-06, "loss": 0.7248, "step": 6461 }, { "epoch": 0.18866602434965402, "grad_norm": 0.7674587413319038, "learning_rate": 4.507542579075427e-06, "loss": 0.6695, "step": 6462 }, { "epoch": 0.18869522057750138, "grad_norm": 0.78846485206283, "learning_rate": 4.507380373073804e-06, "loss": 0.7348, "step": 6463 }, { "epoch": 0.18872441680534874, "grad_norm": 0.7454182722269547, "learning_rate": 4.507218167072182e-06, "loss": 0.7268, "step": 6464 }, { "epoch": 0.1887536130331961, "grad_norm": 0.7230339220985534, "learning_rate": 4.50705596107056e-06, "loss": 0.6603, "step": 6465 }, { "epoch": 0.18878280926104346, "grad_norm": 0.7284378104859133, "learning_rate": 4.506893755068938e-06, "loss": 0.5789, "step": 6466 }, { "epoch": 0.18881200548889082, "grad_norm": 0.7313721414482383, "learning_rate": 4.506731549067316e-06, "loss": 0.6297, "step": 6467 }, { "epoch": 0.1888412017167382, "grad_norm": 0.8263334323261637, "learning_rate": 4.506569343065694e-06, "loss": 0.6909, "step": 6468 }, { "epoch": 0.18887039794458557, "grad_norm": 0.7545603693000057, "learning_rate": 4.506407137064072e-06, "loss": 0.7334, "step": 6469 }, { "epoch": 0.18889959417243293, "grad_norm": 0.7578985706836602, "learning_rate": 4.50624493106245e-06, "loss": 0.6667, "step": 6470 }, { "epoch": 0.1889287904002803, "grad_norm": 0.7279646347243467, "learning_rate": 4.506082725060828e-06, "loss": 0.6908, "step": 6471 }, { "epoch": 0.18895798662812766, "grad_norm": 0.7270463436530901, "learning_rate": 4.505920519059205e-06, "loss": 0.6627, "step": 6472 }, { "epoch": 0.18898718285597502, "grad_norm": 0.7780593752091891, "learning_rate": 4.505758313057583e-06, "loss": 0.6964, "step": 6473 }, { "epoch": 0.18901637908382238, "grad_norm": 0.6941661712852422, "learning_rate": 4.505596107055961e-06, "loss": 0.5547, "step": 6474 }, { "epoch": 0.18904557531166974, "grad_norm": 0.8269230291294024, "learning_rate": 4.50543390105434e-06, "loss": 0.7375, "step": 6475 }, { "epoch": 0.1890747715395171, "grad_norm": 0.7761397590031427, "learning_rate": 4.505271695052718e-06, "loss": 0.6478, "step": 6476 }, { "epoch": 0.18910396776736446, "grad_norm": 0.8068453706201237, "learning_rate": 4.505109489051096e-06, "loss": 0.6473, "step": 6477 }, { "epoch": 0.18913316399521182, "grad_norm": 0.8381796884552735, "learning_rate": 4.504947283049473e-06, "loss": 0.7466, "step": 6478 }, { "epoch": 0.18916236022305918, "grad_norm": 0.7654157893381024, "learning_rate": 4.504785077047851e-06, "loss": 0.717, "step": 6479 }, { "epoch": 0.18919155645090655, "grad_norm": 0.7489537289019285, "learning_rate": 4.504622871046229e-06, "loss": 0.6981, "step": 6480 }, { "epoch": 0.1892207526787539, "grad_norm": 0.7594377101155757, "learning_rate": 4.504460665044607e-06, "loss": 0.7157, "step": 6481 }, { "epoch": 0.18924994890660127, "grad_norm": 0.7546790212646802, "learning_rate": 4.504298459042985e-06, "loss": 0.6911, "step": 6482 }, { "epoch": 0.18927914513444863, "grad_norm": 0.7653520223694327, "learning_rate": 4.504136253041363e-06, "loss": 0.7261, "step": 6483 }, { "epoch": 0.189308341362296, "grad_norm": 0.8080280841455835, "learning_rate": 4.503974047039741e-06, "loss": 0.6789, "step": 6484 }, { "epoch": 0.18933753759014335, "grad_norm": 0.7248431608912831, "learning_rate": 4.503811841038119e-06, "loss": 0.6624, "step": 6485 }, { "epoch": 0.1893667338179907, "grad_norm": 0.6915517572227501, "learning_rate": 4.503649635036497e-06, "loss": 0.6145, "step": 6486 }, { "epoch": 0.18939593004583807, "grad_norm": 0.7316234501632781, "learning_rate": 4.503487429034874e-06, "loss": 0.6449, "step": 6487 }, { "epoch": 0.18942512627368543, "grad_norm": 0.9800134692900735, "learning_rate": 4.503325223033252e-06, "loss": 0.8088, "step": 6488 }, { "epoch": 0.1894543225015328, "grad_norm": 0.752423486368315, "learning_rate": 4.50316301703163e-06, "loss": 0.6769, "step": 6489 }, { "epoch": 0.18948351872938016, "grad_norm": 0.8761612051135718, "learning_rate": 4.503000811030008e-06, "loss": 0.7477, "step": 6490 }, { "epoch": 0.18951271495722752, "grad_norm": 0.7624000049666451, "learning_rate": 4.502838605028386e-06, "loss": 0.7443, "step": 6491 }, { "epoch": 0.18954191118507488, "grad_norm": 0.7770015355790449, "learning_rate": 4.502676399026764e-06, "loss": 0.6926, "step": 6492 }, { "epoch": 0.18957110741292224, "grad_norm": 0.8256213245648725, "learning_rate": 4.502514193025142e-06, "loss": 0.765, "step": 6493 }, { "epoch": 0.1896003036407696, "grad_norm": 0.7653174279365433, "learning_rate": 4.50235198702352e-06, "loss": 0.7437, "step": 6494 }, { "epoch": 0.18962949986861696, "grad_norm": 0.6873394912321994, "learning_rate": 4.502189781021898e-06, "loss": 0.6331, "step": 6495 }, { "epoch": 0.18965869609646432, "grad_norm": 0.7806230147510773, "learning_rate": 4.502027575020276e-06, "loss": 0.6569, "step": 6496 }, { "epoch": 0.18968789232431169, "grad_norm": 0.7016135176353687, "learning_rate": 4.501865369018654e-06, "loss": 0.5816, "step": 6497 }, { "epoch": 0.18971708855215907, "grad_norm": 0.771581040133424, "learning_rate": 4.501703163017032e-06, "loss": 0.711, "step": 6498 }, { "epoch": 0.18974628478000644, "grad_norm": 0.7129936718230688, "learning_rate": 4.50154095701541e-06, "loss": 0.6177, "step": 6499 }, { "epoch": 0.1897754810078538, "grad_norm": 0.7303203219510647, "learning_rate": 4.501378751013788e-06, "loss": 0.6501, "step": 6500 }, { "epoch": 0.18980467723570116, "grad_norm": 0.8208323774250705, "learning_rate": 4.5012165450121655e-06, "loss": 0.6426, "step": 6501 }, { "epoch": 0.18983387346354852, "grad_norm": 0.7520557365099587, "learning_rate": 4.5010543390105435e-06, "loss": 0.655, "step": 6502 }, { "epoch": 0.18986306969139588, "grad_norm": 0.9616095087838215, "learning_rate": 4.5008921330089215e-06, "loss": 0.7473, "step": 6503 }, { "epoch": 0.18989226591924324, "grad_norm": 0.7485705417415986, "learning_rate": 4.5007299270072995e-06, "loss": 0.7033, "step": 6504 }, { "epoch": 0.1899214621470906, "grad_norm": 0.7948284455323338, "learning_rate": 4.5005677210056775e-06, "loss": 0.7719, "step": 6505 }, { "epoch": 0.18995065837493796, "grad_norm": 0.7948681715081266, "learning_rate": 4.5004055150040555e-06, "loss": 0.7569, "step": 6506 }, { "epoch": 0.18997985460278533, "grad_norm": 0.7689140861688574, "learning_rate": 4.5002433090024335e-06, "loss": 0.6688, "step": 6507 }, { "epoch": 0.1900090508306327, "grad_norm": 0.7221356057606221, "learning_rate": 4.5000811030008115e-06, "loss": 0.6611, "step": 6508 }, { "epoch": 0.19003824705848005, "grad_norm": 0.672647198353077, "learning_rate": 4.4999188969991895e-06, "loss": 0.591, "step": 6509 }, { "epoch": 0.1900674432863274, "grad_norm": 0.8227593593681126, "learning_rate": 4.499756690997567e-06, "loss": 0.662, "step": 6510 }, { "epoch": 0.19009663951417477, "grad_norm": 0.7962771682137182, "learning_rate": 4.499594484995945e-06, "loss": 0.685, "step": 6511 }, { "epoch": 0.19012583574202213, "grad_norm": 0.7668076636795413, "learning_rate": 4.499432278994323e-06, "loss": 0.757, "step": 6512 }, { "epoch": 0.1901550319698695, "grad_norm": 0.7883372305253505, "learning_rate": 4.4992700729927016e-06, "loss": 0.7353, "step": 6513 }, { "epoch": 0.19018422819771685, "grad_norm": 0.7622283888097815, "learning_rate": 4.4991078669910796e-06, "loss": 0.7021, "step": 6514 }, { "epoch": 0.19021342442556421, "grad_norm": 0.7262354920794823, "learning_rate": 4.498945660989458e-06, "loss": 0.6819, "step": 6515 }, { "epoch": 0.19024262065341158, "grad_norm": 0.8314794209478665, "learning_rate": 4.498783454987835e-06, "loss": 0.7162, "step": 6516 }, { "epoch": 0.19027181688125894, "grad_norm": 0.9356132382891745, "learning_rate": 4.498621248986213e-06, "loss": 0.8125, "step": 6517 }, { "epoch": 0.1903010131091063, "grad_norm": 0.8112628035500108, "learning_rate": 4.498459042984591e-06, "loss": 0.7511, "step": 6518 }, { "epoch": 0.19033020933695366, "grad_norm": 0.6991307286809889, "learning_rate": 4.498296836982969e-06, "loss": 0.6367, "step": 6519 }, { "epoch": 0.19035940556480102, "grad_norm": 0.7424894112359443, "learning_rate": 4.498134630981347e-06, "loss": 0.7329, "step": 6520 }, { "epoch": 0.19038860179264838, "grad_norm": 0.7468774517430323, "learning_rate": 4.497972424979725e-06, "loss": 0.6691, "step": 6521 }, { "epoch": 0.19041779802049574, "grad_norm": 0.6999953953794544, "learning_rate": 4.497810218978103e-06, "loss": 0.5961, "step": 6522 }, { "epoch": 0.1904469942483431, "grad_norm": 0.8122531094720065, "learning_rate": 4.497648012976481e-06, "loss": 0.6805, "step": 6523 }, { "epoch": 0.19047619047619047, "grad_norm": 0.7416760439901662, "learning_rate": 4.497485806974858e-06, "loss": 0.7187, "step": 6524 }, { "epoch": 0.19050538670403783, "grad_norm": 0.7088164213862133, "learning_rate": 4.497323600973236e-06, "loss": 0.6419, "step": 6525 }, { "epoch": 0.1905345829318852, "grad_norm": 0.7730591130867697, "learning_rate": 4.497161394971614e-06, "loss": 0.6992, "step": 6526 }, { "epoch": 0.19056377915973255, "grad_norm": 0.813229917852062, "learning_rate": 4.496999188969992e-06, "loss": 0.7457, "step": 6527 }, { "epoch": 0.19059297538757994, "grad_norm": 0.7405251908257463, "learning_rate": 4.49683698296837e-06, "loss": 0.6396, "step": 6528 }, { "epoch": 0.1906221716154273, "grad_norm": 0.6715898244557483, "learning_rate": 4.496674776966748e-06, "loss": 0.5711, "step": 6529 }, { "epoch": 0.19065136784327466, "grad_norm": 0.7954686587143607, "learning_rate": 4.496512570965126e-06, "loss": 0.6895, "step": 6530 }, { "epoch": 0.19068056407112202, "grad_norm": 0.7749751083009777, "learning_rate": 4.496350364963504e-06, "loss": 0.6697, "step": 6531 }, { "epoch": 0.19070976029896938, "grad_norm": 0.7241995163655472, "learning_rate": 4.496188158961882e-06, "loss": 0.5822, "step": 6532 }, { "epoch": 0.19073895652681674, "grad_norm": 0.8259397956218505, "learning_rate": 4.49602595296026e-06, "loss": 0.6762, "step": 6533 }, { "epoch": 0.1907681527546641, "grad_norm": 1.1443122260556537, "learning_rate": 4.495863746958638e-06, "loss": 0.7831, "step": 6534 }, { "epoch": 0.19079734898251147, "grad_norm": 0.8252224141388615, "learning_rate": 4.495701540957016e-06, "loss": 0.7676, "step": 6535 }, { "epoch": 0.19082654521035883, "grad_norm": 0.7070099975246961, "learning_rate": 4.495539334955394e-06, "loss": 0.6551, "step": 6536 }, { "epoch": 0.1908557414382062, "grad_norm": 0.7448671648511392, "learning_rate": 4.495377128953772e-06, "loss": 0.7409, "step": 6537 }, { "epoch": 0.19088493766605355, "grad_norm": 0.7725866170710323, "learning_rate": 4.49521492295215e-06, "loss": 0.6666, "step": 6538 }, { "epoch": 0.1909141338939009, "grad_norm": 0.7017630470008318, "learning_rate": 4.495052716950527e-06, "loss": 0.5993, "step": 6539 }, { "epoch": 0.19094333012174827, "grad_norm": 0.7812620788667364, "learning_rate": 4.494890510948905e-06, "loss": 0.7384, "step": 6540 }, { "epoch": 0.19097252634959563, "grad_norm": 0.7024881833945137, "learning_rate": 4.494728304947283e-06, "loss": 0.5951, "step": 6541 }, { "epoch": 0.191001722577443, "grad_norm": 0.670822033199087, "learning_rate": 4.494566098945661e-06, "loss": 0.5655, "step": 6542 }, { "epoch": 0.19103091880529036, "grad_norm": 0.781527128126608, "learning_rate": 4.494403892944039e-06, "loss": 0.7862, "step": 6543 }, { "epoch": 0.19106011503313772, "grad_norm": 0.7648986465010533, "learning_rate": 4.494241686942417e-06, "loss": 0.6222, "step": 6544 }, { "epoch": 0.19108931126098508, "grad_norm": 0.7555772968926651, "learning_rate": 4.494079480940795e-06, "loss": 0.7201, "step": 6545 }, { "epoch": 0.19111850748883244, "grad_norm": 0.794593172061498, "learning_rate": 4.493917274939173e-06, "loss": 0.7037, "step": 6546 }, { "epoch": 0.1911477037166798, "grad_norm": 0.7698764135083046, "learning_rate": 4.493755068937551e-06, "loss": 0.6526, "step": 6547 }, { "epoch": 0.19117689994452716, "grad_norm": 0.7451991926010942, "learning_rate": 4.493592862935928e-06, "loss": 0.728, "step": 6548 }, { "epoch": 0.19120609617237452, "grad_norm": 0.8154607106672377, "learning_rate": 4.4934306569343064e-06, "loss": 0.7244, "step": 6549 }, { "epoch": 0.19123529240022188, "grad_norm": 0.769804777672382, "learning_rate": 4.4932684509326844e-06, "loss": 0.7772, "step": 6550 }, { "epoch": 0.19126448862806925, "grad_norm": 0.8191430646584007, "learning_rate": 4.493106244931063e-06, "loss": 0.7393, "step": 6551 }, { "epoch": 0.1912936848559166, "grad_norm": 0.7074687481562492, "learning_rate": 4.492944038929441e-06, "loss": 0.6508, "step": 6552 }, { "epoch": 0.19132288108376397, "grad_norm": 0.8098322630987373, "learning_rate": 4.492781832927819e-06, "loss": 0.7404, "step": 6553 }, { "epoch": 0.19135207731161133, "grad_norm": 0.7878523968213293, "learning_rate": 4.4926196269261965e-06, "loss": 0.6866, "step": 6554 }, { "epoch": 0.1913812735394587, "grad_norm": 0.7227344944260929, "learning_rate": 4.4924574209245745e-06, "loss": 0.592, "step": 6555 }, { "epoch": 0.19141046976730605, "grad_norm": 0.8078126882105369, "learning_rate": 4.4922952149229525e-06, "loss": 0.7604, "step": 6556 }, { "epoch": 0.1914396659951534, "grad_norm": 0.7085291684856061, "learning_rate": 4.4921330089213305e-06, "loss": 0.5937, "step": 6557 }, { "epoch": 0.1914688622230008, "grad_norm": 0.9793388650758054, "learning_rate": 4.4919708029197085e-06, "loss": 0.7232, "step": 6558 }, { "epoch": 0.19149805845084816, "grad_norm": 0.7760304354741391, "learning_rate": 4.4918085969180865e-06, "loss": 0.7321, "step": 6559 }, { "epoch": 0.19152725467869552, "grad_norm": 0.6986707694931247, "learning_rate": 4.4916463909164645e-06, "loss": 0.5792, "step": 6560 }, { "epoch": 0.19155645090654289, "grad_norm": 0.7887948493791739, "learning_rate": 4.4914841849148425e-06, "loss": 0.7195, "step": 6561 }, { "epoch": 0.19158564713439025, "grad_norm": 0.7583746008511703, "learning_rate": 4.49132197891322e-06, "loss": 0.6734, "step": 6562 }, { "epoch": 0.1916148433622376, "grad_norm": 0.7139249388783978, "learning_rate": 4.491159772911598e-06, "loss": 0.6166, "step": 6563 }, { "epoch": 0.19164403959008497, "grad_norm": 0.7296885095315598, "learning_rate": 4.490997566909976e-06, "loss": 0.6614, "step": 6564 }, { "epoch": 0.19167323581793233, "grad_norm": 0.7358340341410837, "learning_rate": 4.490835360908354e-06, "loss": 0.6657, "step": 6565 }, { "epoch": 0.1917024320457797, "grad_norm": 0.7286834954879919, "learning_rate": 4.490673154906732e-06, "loss": 0.6835, "step": 6566 }, { "epoch": 0.19173162827362705, "grad_norm": 0.9143412811076285, "learning_rate": 4.49051094890511e-06, "loss": 0.7105, "step": 6567 }, { "epoch": 0.19176082450147441, "grad_norm": 0.8520082048514406, "learning_rate": 4.490348742903488e-06, "loss": 0.7619, "step": 6568 }, { "epoch": 0.19179002072932178, "grad_norm": 0.9077355205354161, "learning_rate": 4.490186536901866e-06, "loss": 0.7172, "step": 6569 }, { "epoch": 0.19181921695716914, "grad_norm": 0.7703640564461492, "learning_rate": 4.490024330900244e-06, "loss": 0.721, "step": 6570 }, { "epoch": 0.1918484131850165, "grad_norm": 0.7971958369571936, "learning_rate": 4.489862124898622e-06, "loss": 0.6671, "step": 6571 }, { "epoch": 0.19187760941286386, "grad_norm": 0.7265251717122982, "learning_rate": 4.489699918897e-06, "loss": 0.6451, "step": 6572 }, { "epoch": 0.19190680564071122, "grad_norm": 0.8550038905834706, "learning_rate": 4.489537712895378e-06, "loss": 0.6998, "step": 6573 }, { "epoch": 0.19193600186855858, "grad_norm": 0.7730504741109988, "learning_rate": 4.489375506893756e-06, "loss": 0.6706, "step": 6574 }, { "epoch": 0.19196519809640594, "grad_norm": 0.8174461761412246, "learning_rate": 4.489213300892134e-06, "loss": 0.6799, "step": 6575 }, { "epoch": 0.1919943943242533, "grad_norm": 0.783284072600342, "learning_rate": 4.489051094890512e-06, "loss": 0.7284, "step": 6576 }, { "epoch": 0.19202359055210066, "grad_norm": 0.8283269588796595, "learning_rate": 4.488888888888889e-06, "loss": 0.7758, "step": 6577 }, { "epoch": 0.19205278677994803, "grad_norm": 0.7448150765063232, "learning_rate": 4.488726682887267e-06, "loss": 0.6774, "step": 6578 }, { "epoch": 0.1920819830077954, "grad_norm": 0.7374678111489653, "learning_rate": 4.488564476885645e-06, "loss": 0.6661, "step": 6579 }, { "epoch": 0.19211117923564275, "grad_norm": 0.7998060161245514, "learning_rate": 4.488402270884023e-06, "loss": 0.7509, "step": 6580 }, { "epoch": 0.1921403754634901, "grad_norm": 0.8131670361759462, "learning_rate": 4.488240064882401e-06, "loss": 0.6642, "step": 6581 }, { "epoch": 0.19216957169133747, "grad_norm": 0.7774236116524175, "learning_rate": 4.488077858880779e-06, "loss": 0.7537, "step": 6582 }, { "epoch": 0.19219876791918483, "grad_norm": 0.7451196219541981, "learning_rate": 4.487915652879157e-06, "loss": 0.6929, "step": 6583 }, { "epoch": 0.1922279641470322, "grad_norm": 0.7728422405354537, "learning_rate": 4.487753446877535e-06, "loss": 0.6986, "step": 6584 }, { "epoch": 0.19225716037487955, "grad_norm": 0.7180942241424265, "learning_rate": 4.487591240875913e-06, "loss": 0.6725, "step": 6585 }, { "epoch": 0.19228635660272692, "grad_norm": 0.8926197055992695, "learning_rate": 4.48742903487429e-06, "loss": 0.7525, "step": 6586 }, { "epoch": 0.19231555283057428, "grad_norm": 0.7171784365724184, "learning_rate": 4.487266828872668e-06, "loss": 0.5634, "step": 6587 }, { "epoch": 0.19234474905842167, "grad_norm": 0.7257683137645587, "learning_rate": 4.487104622871047e-06, "loss": 0.6153, "step": 6588 }, { "epoch": 0.19237394528626903, "grad_norm": 0.863808621399618, "learning_rate": 4.486942416869425e-06, "loss": 0.7374, "step": 6589 }, { "epoch": 0.1924031415141164, "grad_norm": 0.7159459337385949, "learning_rate": 4.486780210867803e-06, "loss": 0.611, "step": 6590 }, { "epoch": 0.19243233774196375, "grad_norm": 0.7084409591184934, "learning_rate": 4.486618004866181e-06, "loss": 0.5968, "step": 6591 }, { "epoch": 0.1924615339698111, "grad_norm": 0.8308102727098844, "learning_rate": 4.486455798864558e-06, "loss": 0.6965, "step": 6592 }, { "epoch": 0.19249073019765847, "grad_norm": 0.7985527652271768, "learning_rate": 4.486293592862936e-06, "loss": 0.7232, "step": 6593 }, { "epoch": 0.19251992642550583, "grad_norm": 0.6825863548077089, "learning_rate": 4.486131386861314e-06, "loss": 0.5668, "step": 6594 }, { "epoch": 0.1925491226533532, "grad_norm": 0.731068635493192, "learning_rate": 4.485969180859692e-06, "loss": 0.6055, "step": 6595 }, { "epoch": 0.19257831888120056, "grad_norm": 0.7675982136746866, "learning_rate": 4.48580697485807e-06, "loss": 0.7782, "step": 6596 }, { "epoch": 0.19260751510904792, "grad_norm": 1.2279937119798803, "learning_rate": 4.485644768856448e-06, "loss": 0.7239, "step": 6597 }, { "epoch": 0.19263671133689528, "grad_norm": 0.6876472456602608, "learning_rate": 4.485482562854826e-06, "loss": 0.5907, "step": 6598 }, { "epoch": 0.19266590756474264, "grad_norm": 0.7639072799456574, "learning_rate": 4.485320356853204e-06, "loss": 0.7309, "step": 6599 }, { "epoch": 0.19269510379259, "grad_norm": 0.8055401490395171, "learning_rate": 4.485158150851581e-06, "loss": 0.7395, "step": 6600 }, { "epoch": 0.19272430002043736, "grad_norm": 0.7562917000869533, "learning_rate": 4.484995944849959e-06, "loss": 0.6557, "step": 6601 }, { "epoch": 0.19275349624828472, "grad_norm": 0.8969003434150806, "learning_rate": 4.484833738848337e-06, "loss": 0.7309, "step": 6602 }, { "epoch": 0.19278269247613208, "grad_norm": 0.7257301727556654, "learning_rate": 4.484671532846715e-06, "loss": 0.6598, "step": 6603 }, { "epoch": 0.19281188870397944, "grad_norm": 0.7412042738480753, "learning_rate": 4.484509326845093e-06, "loss": 0.6771, "step": 6604 }, { "epoch": 0.1928410849318268, "grad_norm": 0.7471314239102468, "learning_rate": 4.484347120843471e-06, "loss": 0.7009, "step": 6605 }, { "epoch": 0.19287028115967417, "grad_norm": 0.8287598398054672, "learning_rate": 4.4841849148418494e-06, "loss": 0.7593, "step": 6606 }, { "epoch": 0.19289947738752153, "grad_norm": 0.7435977421953474, "learning_rate": 4.4840227088402274e-06, "loss": 0.6515, "step": 6607 }, { "epoch": 0.1929286736153689, "grad_norm": 0.7808776695637012, "learning_rate": 4.4838605028386054e-06, "loss": 0.7776, "step": 6608 }, { "epoch": 0.19295786984321625, "grad_norm": 0.9345403953387396, "learning_rate": 4.4836982968369834e-06, "loss": 0.7665, "step": 6609 }, { "epoch": 0.1929870660710636, "grad_norm": 0.7547183562148463, "learning_rate": 4.4835360908353615e-06, "loss": 0.6822, "step": 6610 }, { "epoch": 0.19301626229891097, "grad_norm": 0.7280746286251845, "learning_rate": 4.4833738848337395e-06, "loss": 0.6446, "step": 6611 }, { "epoch": 0.19304545852675833, "grad_norm": 0.7826447756975615, "learning_rate": 4.4832116788321175e-06, "loss": 0.7665, "step": 6612 }, { "epoch": 0.1930746547546057, "grad_norm": 0.7194236872484491, "learning_rate": 4.4830494728304955e-06, "loss": 0.627, "step": 6613 }, { "epoch": 0.19310385098245306, "grad_norm": 0.7621038062851712, "learning_rate": 4.4828872668288735e-06, "loss": 0.6684, "step": 6614 }, { "epoch": 0.19313304721030042, "grad_norm": 0.7581865026336561, "learning_rate": 4.482725060827251e-06, "loss": 0.6722, "step": 6615 }, { "epoch": 0.19316224343814778, "grad_norm": 0.7678068260704302, "learning_rate": 4.482562854825629e-06, "loss": 0.6768, "step": 6616 }, { "epoch": 0.19319143966599514, "grad_norm": 0.7034266999943801, "learning_rate": 4.482400648824007e-06, "loss": 0.6361, "step": 6617 }, { "epoch": 0.1932206358938425, "grad_norm": 0.7351380075101152, "learning_rate": 4.482238442822385e-06, "loss": 0.6556, "step": 6618 }, { "epoch": 0.1932498321216899, "grad_norm": 0.8848560914700855, "learning_rate": 4.482076236820763e-06, "loss": 0.6858, "step": 6619 }, { "epoch": 0.19327902834953725, "grad_norm": 0.710432057277922, "learning_rate": 4.481914030819141e-06, "loss": 0.6575, "step": 6620 }, { "epoch": 0.1933082245773846, "grad_norm": 0.7264937754105184, "learning_rate": 4.481751824817519e-06, "loss": 0.6735, "step": 6621 }, { "epoch": 0.19333742080523197, "grad_norm": 0.7464939200704518, "learning_rate": 4.481589618815897e-06, "loss": 0.6668, "step": 6622 }, { "epoch": 0.19336661703307934, "grad_norm": 0.8122317370438924, "learning_rate": 4.481427412814275e-06, "loss": 0.7234, "step": 6623 }, { "epoch": 0.1933958132609267, "grad_norm": 0.7521982775751852, "learning_rate": 4.481265206812652e-06, "loss": 0.6676, "step": 6624 }, { "epoch": 0.19342500948877406, "grad_norm": 0.7652412464936249, "learning_rate": 4.48110300081103e-06, "loss": 0.6931, "step": 6625 }, { "epoch": 0.19345420571662142, "grad_norm": 0.7793351981323933, "learning_rate": 4.480940794809409e-06, "loss": 0.7179, "step": 6626 }, { "epoch": 0.19348340194446878, "grad_norm": 0.7340097905836299, "learning_rate": 4.480778588807787e-06, "loss": 0.6452, "step": 6627 }, { "epoch": 0.19351259817231614, "grad_norm": 0.7183976494333795, "learning_rate": 4.480616382806165e-06, "loss": 0.6317, "step": 6628 }, { "epoch": 0.1935417944001635, "grad_norm": 0.705537664746519, "learning_rate": 4.480454176804543e-06, "loss": 0.662, "step": 6629 }, { "epoch": 0.19357099062801086, "grad_norm": 0.7478465308041734, "learning_rate": 4.48029197080292e-06, "loss": 0.6998, "step": 6630 }, { "epoch": 0.19360018685585823, "grad_norm": 0.7407718756105721, "learning_rate": 4.480129764801298e-06, "loss": 0.6896, "step": 6631 }, { "epoch": 0.1936293830837056, "grad_norm": 0.8080634575610085, "learning_rate": 4.479967558799676e-06, "loss": 0.7218, "step": 6632 }, { "epoch": 0.19365857931155295, "grad_norm": 0.7830873059387334, "learning_rate": 4.479805352798054e-06, "loss": 0.6731, "step": 6633 }, { "epoch": 0.1936877755394003, "grad_norm": 0.8461416494590999, "learning_rate": 4.479643146796432e-06, "loss": 0.7253, "step": 6634 }, { "epoch": 0.19371697176724767, "grad_norm": 0.8662978183463557, "learning_rate": 4.47948094079481e-06, "loss": 0.683, "step": 6635 }, { "epoch": 0.19374616799509503, "grad_norm": 0.6973842743432658, "learning_rate": 4.479318734793188e-06, "loss": 0.5887, "step": 6636 }, { "epoch": 0.1937753642229424, "grad_norm": 0.7548627349010515, "learning_rate": 4.479156528791566e-06, "loss": 0.7552, "step": 6637 }, { "epoch": 0.19380456045078975, "grad_norm": 0.7036131195190544, "learning_rate": 4.478994322789943e-06, "loss": 0.5911, "step": 6638 }, { "epoch": 0.19383375667863711, "grad_norm": 0.7819541457290512, "learning_rate": 4.478832116788321e-06, "loss": 0.728, "step": 6639 }, { "epoch": 0.19386295290648448, "grad_norm": 0.7602119312468077, "learning_rate": 4.478669910786699e-06, "loss": 0.6653, "step": 6640 }, { "epoch": 0.19389214913433184, "grad_norm": 0.7956112660307119, "learning_rate": 4.478507704785077e-06, "loss": 0.7001, "step": 6641 }, { "epoch": 0.1939213453621792, "grad_norm": 0.8475506167411119, "learning_rate": 4.478345498783455e-06, "loss": 0.6817, "step": 6642 }, { "epoch": 0.19395054159002656, "grad_norm": 0.765344667425914, "learning_rate": 4.478183292781833e-06, "loss": 0.6839, "step": 6643 }, { "epoch": 0.19397973781787392, "grad_norm": 0.7315907151345272, "learning_rate": 4.478021086780211e-06, "loss": 0.5855, "step": 6644 }, { "epoch": 0.19400893404572128, "grad_norm": 0.7775321707130741, "learning_rate": 4.477858880778589e-06, "loss": 0.7031, "step": 6645 }, { "epoch": 0.19403813027356864, "grad_norm": 0.8054352377851888, "learning_rate": 4.477696674776967e-06, "loss": 0.7148, "step": 6646 }, { "epoch": 0.194067326501416, "grad_norm": 0.7358760057249522, "learning_rate": 4.477534468775345e-06, "loss": 0.578, "step": 6647 }, { "epoch": 0.19409652272926337, "grad_norm": 0.7656685887155241, "learning_rate": 4.477372262773723e-06, "loss": 0.6923, "step": 6648 }, { "epoch": 0.19412571895711075, "grad_norm": 0.7301699963745769, "learning_rate": 4.477210056772101e-06, "loss": 0.6412, "step": 6649 }, { "epoch": 0.19415491518495812, "grad_norm": 0.8042305964872851, "learning_rate": 4.477047850770479e-06, "loss": 0.711, "step": 6650 }, { "epoch": 0.19418411141280548, "grad_norm": 0.749180648492964, "learning_rate": 4.476885644768857e-06, "loss": 0.6078, "step": 6651 }, { "epoch": 0.19421330764065284, "grad_norm": 0.7664448791347038, "learning_rate": 4.476723438767235e-06, "loss": 0.6577, "step": 6652 }, { "epoch": 0.1942425038685002, "grad_norm": 0.8046402857475742, "learning_rate": 4.476561232765612e-06, "loss": 0.7175, "step": 6653 }, { "epoch": 0.19427170009634756, "grad_norm": 0.7107255251273068, "learning_rate": 4.47639902676399e-06, "loss": 0.6038, "step": 6654 }, { "epoch": 0.19430089632419492, "grad_norm": 0.8195081547506866, "learning_rate": 4.476236820762368e-06, "loss": 0.7199, "step": 6655 }, { "epoch": 0.19433009255204228, "grad_norm": 0.7773798380241255, "learning_rate": 4.476074614760746e-06, "loss": 0.6911, "step": 6656 }, { "epoch": 0.19435928877988964, "grad_norm": 0.7545710335646357, "learning_rate": 4.475912408759124e-06, "loss": 0.6255, "step": 6657 }, { "epoch": 0.194388485007737, "grad_norm": 1.391405173376185, "learning_rate": 4.475750202757502e-06, "loss": 0.6069, "step": 6658 }, { "epoch": 0.19441768123558437, "grad_norm": 0.8069131385037651, "learning_rate": 4.47558799675588e-06, "loss": 0.75, "step": 6659 }, { "epoch": 0.19444687746343173, "grad_norm": 0.8026651262383061, "learning_rate": 4.475425790754258e-06, "loss": 0.7473, "step": 6660 }, { "epoch": 0.1944760736912791, "grad_norm": 0.8120427284994874, "learning_rate": 4.475263584752636e-06, "loss": 0.683, "step": 6661 }, { "epoch": 0.19450526991912645, "grad_norm": 0.9160843396588919, "learning_rate": 4.4751013787510136e-06, "loss": 0.7561, "step": 6662 }, { "epoch": 0.1945344661469738, "grad_norm": 0.7653656153915475, "learning_rate": 4.474939172749392e-06, "loss": 0.7265, "step": 6663 }, { "epoch": 0.19456366237482117, "grad_norm": 0.7651983941474627, "learning_rate": 4.4747769667477704e-06, "loss": 0.6042, "step": 6664 }, { "epoch": 0.19459285860266853, "grad_norm": 0.7404721732438255, "learning_rate": 4.4746147607461484e-06, "loss": 0.6649, "step": 6665 }, { "epoch": 0.1946220548305159, "grad_norm": 0.8044464585075211, "learning_rate": 4.4744525547445264e-06, "loss": 0.7772, "step": 6666 }, { "epoch": 0.19465125105836326, "grad_norm": 0.7909175503945675, "learning_rate": 4.474290348742904e-06, "loss": 0.7346, "step": 6667 }, { "epoch": 0.19468044728621062, "grad_norm": 0.7947643225220653, "learning_rate": 4.474128142741282e-06, "loss": 0.6737, "step": 6668 }, { "epoch": 0.19470964351405798, "grad_norm": 0.7218819989328742, "learning_rate": 4.47396593673966e-06, "loss": 0.6274, "step": 6669 }, { "epoch": 0.19473883974190534, "grad_norm": 0.7456626454919014, "learning_rate": 4.473803730738038e-06, "loss": 0.6688, "step": 6670 }, { "epoch": 0.1947680359697527, "grad_norm": 0.8166338790444174, "learning_rate": 4.473641524736416e-06, "loss": 0.7472, "step": 6671 }, { "epoch": 0.19479723219760006, "grad_norm": 0.7834812896793263, "learning_rate": 4.473479318734794e-06, "loss": 0.7434, "step": 6672 }, { "epoch": 0.19482642842544742, "grad_norm": 0.7572729395307269, "learning_rate": 4.473317112733172e-06, "loss": 0.6921, "step": 6673 }, { "epoch": 0.19485562465329478, "grad_norm": 0.7628134535471527, "learning_rate": 4.47315490673155e-06, "loss": 0.7234, "step": 6674 }, { "epoch": 0.19488482088114215, "grad_norm": 0.6966493296113205, "learning_rate": 4.472992700729928e-06, "loss": 0.5938, "step": 6675 }, { "epoch": 0.1949140171089895, "grad_norm": 0.7777432090197196, "learning_rate": 4.472830494728305e-06, "loss": 0.7654, "step": 6676 }, { "epoch": 0.19494321333683687, "grad_norm": 0.9235941031411274, "learning_rate": 4.472668288726683e-06, "loss": 0.63, "step": 6677 }, { "epoch": 0.19497240956468423, "grad_norm": 0.700818847589201, "learning_rate": 4.472506082725061e-06, "loss": 0.6657, "step": 6678 }, { "epoch": 0.19500160579253162, "grad_norm": 0.757778616954132, "learning_rate": 4.472343876723439e-06, "loss": 0.6958, "step": 6679 }, { "epoch": 0.19503080202037898, "grad_norm": 0.7936035527283405, "learning_rate": 4.472181670721817e-06, "loss": 0.7467, "step": 6680 }, { "epoch": 0.19505999824822634, "grad_norm": 0.7026607182171024, "learning_rate": 4.472019464720195e-06, "loss": 0.6068, "step": 6681 }, { "epoch": 0.1950891944760737, "grad_norm": 0.7130197796468443, "learning_rate": 4.471857258718573e-06, "loss": 0.6512, "step": 6682 }, { "epoch": 0.19511839070392106, "grad_norm": 0.7436092504453673, "learning_rate": 4.471695052716951e-06, "loss": 0.6358, "step": 6683 }, { "epoch": 0.19514758693176842, "grad_norm": 0.7266755785270541, "learning_rate": 4.471532846715329e-06, "loss": 0.6475, "step": 6684 }, { "epoch": 0.19517678315961579, "grad_norm": 0.7408235359491995, "learning_rate": 4.471370640713707e-06, "loss": 0.6592, "step": 6685 }, { "epoch": 0.19520597938746315, "grad_norm": 0.9099085790334739, "learning_rate": 4.471208434712085e-06, "loss": 0.6606, "step": 6686 }, { "epoch": 0.1952351756153105, "grad_norm": 0.7941772666243694, "learning_rate": 4.471046228710463e-06, "loss": 0.7305, "step": 6687 }, { "epoch": 0.19526437184315787, "grad_norm": 0.7453601598194908, "learning_rate": 4.470884022708841e-06, "loss": 0.6268, "step": 6688 }, { "epoch": 0.19529356807100523, "grad_norm": 0.7824191588412874, "learning_rate": 4.470721816707219e-06, "loss": 0.722, "step": 6689 }, { "epoch": 0.1953227642988526, "grad_norm": 0.7429711283758743, "learning_rate": 4.470559610705597e-06, "loss": 0.7149, "step": 6690 }, { "epoch": 0.19535196052669995, "grad_norm": 0.794451367380375, "learning_rate": 4.470397404703974e-06, "loss": 0.7547, "step": 6691 }, { "epoch": 0.1953811567545473, "grad_norm": 0.7399316570384998, "learning_rate": 4.470235198702352e-06, "loss": 0.6775, "step": 6692 }, { "epoch": 0.19541035298239467, "grad_norm": 0.9471189314346112, "learning_rate": 4.47007299270073e-06, "loss": 0.8858, "step": 6693 }, { "epoch": 0.19543954921024204, "grad_norm": 0.7094324433890462, "learning_rate": 4.469910786699108e-06, "loss": 0.6448, "step": 6694 }, { "epoch": 0.1954687454380894, "grad_norm": 0.7031987910887086, "learning_rate": 4.469748580697486e-06, "loss": 0.6296, "step": 6695 }, { "epoch": 0.19549794166593676, "grad_norm": 0.7776896887496788, "learning_rate": 4.469586374695864e-06, "loss": 0.6244, "step": 6696 }, { "epoch": 0.19552713789378412, "grad_norm": 0.6858476507414373, "learning_rate": 4.469424168694242e-06, "loss": 0.6076, "step": 6697 }, { "epoch": 0.19555633412163148, "grad_norm": 0.7409314916693513, "learning_rate": 4.46926196269262e-06, "loss": 0.6153, "step": 6698 }, { "epoch": 0.19558553034947884, "grad_norm": 0.8301575706172599, "learning_rate": 4.469099756690998e-06, "loss": 0.6966, "step": 6699 }, { "epoch": 0.1956147265773262, "grad_norm": 0.7885688225884588, "learning_rate": 4.468937550689375e-06, "loss": 0.7322, "step": 6700 }, { "epoch": 0.19564392280517356, "grad_norm": 0.7382570634416029, "learning_rate": 4.468775344687753e-06, "loss": 0.6813, "step": 6701 }, { "epoch": 0.19567311903302093, "grad_norm": 0.7767006356726943, "learning_rate": 4.468613138686132e-06, "loss": 0.7791, "step": 6702 }, { "epoch": 0.1957023152608683, "grad_norm": 0.733684731675578, "learning_rate": 4.46845093268451e-06, "loss": 0.7104, "step": 6703 }, { "epoch": 0.19573151148871565, "grad_norm": 0.7699089075735335, "learning_rate": 4.468288726682888e-06, "loss": 0.6965, "step": 6704 }, { "epoch": 0.195760707716563, "grad_norm": 0.7585447758099528, "learning_rate": 4.468126520681265e-06, "loss": 0.6391, "step": 6705 }, { "epoch": 0.19578990394441037, "grad_norm": 0.741414875056375, "learning_rate": 4.467964314679643e-06, "loss": 0.6333, "step": 6706 }, { "epoch": 0.19581910017225773, "grad_norm": 0.7212922689581471, "learning_rate": 4.467802108678021e-06, "loss": 0.5827, "step": 6707 }, { "epoch": 0.1958482964001051, "grad_norm": 0.7798935385065423, "learning_rate": 4.467639902676399e-06, "loss": 0.761, "step": 6708 }, { "epoch": 0.19587749262795248, "grad_norm": 0.7552232292157688, "learning_rate": 4.467477696674777e-06, "loss": 0.7117, "step": 6709 }, { "epoch": 0.19590668885579984, "grad_norm": 0.7648323433599841, "learning_rate": 4.467315490673155e-06, "loss": 0.7103, "step": 6710 }, { "epoch": 0.1959358850836472, "grad_norm": 0.9134125284420735, "learning_rate": 4.467153284671533e-06, "loss": 0.7021, "step": 6711 }, { "epoch": 0.19596508131149457, "grad_norm": 0.7479534026891613, "learning_rate": 4.466991078669911e-06, "loss": 0.6027, "step": 6712 }, { "epoch": 0.19599427753934193, "grad_norm": 0.686186669798346, "learning_rate": 4.466828872668289e-06, "loss": 0.634, "step": 6713 }, { "epoch": 0.1960234737671893, "grad_norm": 0.7804453909465272, "learning_rate": 4.4666666666666665e-06, "loss": 0.7294, "step": 6714 }, { "epoch": 0.19605266999503665, "grad_norm": 0.7460497610197152, "learning_rate": 4.4665044606650445e-06, "loss": 0.6608, "step": 6715 }, { "epoch": 0.196081866222884, "grad_norm": 0.7323577084173539, "learning_rate": 4.4663422546634225e-06, "loss": 0.6862, "step": 6716 }, { "epoch": 0.19611106245073137, "grad_norm": 0.6626648752261806, "learning_rate": 4.4661800486618006e-06, "loss": 0.5847, "step": 6717 }, { "epoch": 0.19614025867857873, "grad_norm": 0.7111656847874908, "learning_rate": 4.4660178426601786e-06, "loss": 0.6329, "step": 6718 }, { "epoch": 0.1961694549064261, "grad_norm": 0.8120272711330468, "learning_rate": 4.4658556366585566e-06, "loss": 0.7384, "step": 6719 }, { "epoch": 0.19619865113427346, "grad_norm": 0.8093577427117263, "learning_rate": 4.4656934306569346e-06, "loss": 0.7631, "step": 6720 }, { "epoch": 0.19622784736212082, "grad_norm": 0.7547221964414781, "learning_rate": 4.465531224655313e-06, "loss": 0.7021, "step": 6721 }, { "epoch": 0.19625704358996818, "grad_norm": 0.7915337162949314, "learning_rate": 4.465369018653691e-06, "loss": 0.7551, "step": 6722 }, { "epoch": 0.19628623981781554, "grad_norm": 0.7022623829851066, "learning_rate": 4.465206812652069e-06, "loss": 0.5664, "step": 6723 }, { "epoch": 0.1963154360456629, "grad_norm": 0.8069809764139679, "learning_rate": 4.465044606650447e-06, "loss": 0.7247, "step": 6724 }, { "epoch": 0.19634463227351026, "grad_norm": 1.0331686245720506, "learning_rate": 4.464882400648825e-06, "loss": 0.7661, "step": 6725 }, { "epoch": 0.19637382850135762, "grad_norm": 0.7271014627475435, "learning_rate": 4.464720194647203e-06, "loss": 0.6686, "step": 6726 }, { "epoch": 0.19640302472920498, "grad_norm": 0.8918180861922054, "learning_rate": 4.464557988645581e-06, "loss": 0.6523, "step": 6727 }, { "epoch": 0.19643222095705234, "grad_norm": 0.8123882476607374, "learning_rate": 4.464395782643959e-06, "loss": 0.673, "step": 6728 }, { "epoch": 0.1964614171848997, "grad_norm": 0.7277652587945778, "learning_rate": 4.464233576642336e-06, "loss": 0.7016, "step": 6729 }, { "epoch": 0.19649061341274707, "grad_norm": 0.7520925088174955, "learning_rate": 4.464071370640714e-06, "loss": 0.7121, "step": 6730 }, { "epoch": 0.19651980964059443, "grad_norm": 0.7209122935320232, "learning_rate": 4.463909164639092e-06, "loss": 0.6069, "step": 6731 }, { "epoch": 0.1965490058684418, "grad_norm": 0.8634121243889578, "learning_rate": 4.46374695863747e-06, "loss": 0.7474, "step": 6732 }, { "epoch": 0.19657820209628915, "grad_norm": 0.7441393783616755, "learning_rate": 4.463584752635848e-06, "loss": 0.6878, "step": 6733 }, { "epoch": 0.1966073983241365, "grad_norm": 0.6958866646624952, "learning_rate": 4.463422546634226e-06, "loss": 0.6394, "step": 6734 }, { "epoch": 0.19663659455198387, "grad_norm": 0.7450056004470927, "learning_rate": 4.463260340632604e-06, "loss": 0.6844, "step": 6735 }, { "epoch": 0.19666579077983123, "grad_norm": 0.7309322406120667, "learning_rate": 4.463098134630982e-06, "loss": 0.675, "step": 6736 }, { "epoch": 0.1966949870076786, "grad_norm": 0.7474260086539792, "learning_rate": 4.46293592862936e-06, "loss": 0.6459, "step": 6737 }, { "epoch": 0.19672418323552596, "grad_norm": 0.7316893375520247, "learning_rate": 4.462773722627737e-06, "loss": 0.6781, "step": 6738 }, { "epoch": 0.19675337946337335, "grad_norm": 0.8256526017283999, "learning_rate": 4.462611516626116e-06, "loss": 0.7775, "step": 6739 }, { "epoch": 0.1967825756912207, "grad_norm": 0.7834509243788488, "learning_rate": 4.462449310624494e-06, "loss": 0.788, "step": 6740 }, { "epoch": 0.19681177191906807, "grad_norm": 0.7608611815720686, "learning_rate": 4.462287104622872e-06, "loss": 0.6645, "step": 6741 }, { "epoch": 0.19684096814691543, "grad_norm": 0.7559848565556753, "learning_rate": 4.46212489862125e-06, "loss": 0.6523, "step": 6742 }, { "epoch": 0.1968701643747628, "grad_norm": 0.7858603443184379, "learning_rate": 4.461962692619627e-06, "loss": 0.7166, "step": 6743 }, { "epoch": 0.19689936060261015, "grad_norm": 0.7562771781491411, "learning_rate": 4.461800486618005e-06, "loss": 0.7752, "step": 6744 }, { "epoch": 0.1969285568304575, "grad_norm": 0.7781496852486649, "learning_rate": 4.461638280616383e-06, "loss": 0.6525, "step": 6745 }, { "epoch": 0.19695775305830487, "grad_norm": 0.7249850624138767, "learning_rate": 4.461476074614761e-06, "loss": 0.6354, "step": 6746 }, { "epoch": 0.19698694928615224, "grad_norm": 0.7594804290023266, "learning_rate": 4.461313868613139e-06, "loss": 0.6806, "step": 6747 }, { "epoch": 0.1970161455139996, "grad_norm": 0.7422526406400395, "learning_rate": 4.461151662611517e-06, "loss": 0.7296, "step": 6748 }, { "epoch": 0.19704534174184696, "grad_norm": 0.7659728728577819, "learning_rate": 4.460989456609895e-06, "loss": 0.7056, "step": 6749 }, { "epoch": 0.19707453796969432, "grad_norm": 0.8416947545761059, "learning_rate": 4.460827250608273e-06, "loss": 0.7444, "step": 6750 }, { "epoch": 0.19710373419754168, "grad_norm": 0.7780030043790963, "learning_rate": 4.460665044606651e-06, "loss": 0.7638, "step": 6751 }, { "epoch": 0.19713293042538904, "grad_norm": 0.9775326924521311, "learning_rate": 4.460502838605028e-06, "loss": 0.789, "step": 6752 }, { "epoch": 0.1971621266532364, "grad_norm": 0.7409716132663009, "learning_rate": 4.460340632603406e-06, "loss": 0.6752, "step": 6753 }, { "epoch": 0.19719132288108376, "grad_norm": 0.7624920176775927, "learning_rate": 4.460178426601784e-06, "loss": 0.7216, "step": 6754 }, { "epoch": 0.19722051910893112, "grad_norm": 0.7978353923357224, "learning_rate": 4.460016220600162e-06, "loss": 0.786, "step": 6755 }, { "epoch": 0.19724971533677849, "grad_norm": 0.8014938829352432, "learning_rate": 4.45985401459854e-06, "loss": 0.7294, "step": 6756 }, { "epoch": 0.19727891156462585, "grad_norm": 0.7660821139410678, "learning_rate": 4.459691808596918e-06, "loss": 0.6601, "step": 6757 }, { "epoch": 0.1973081077924732, "grad_norm": 0.7688009242850065, "learning_rate": 4.459529602595296e-06, "loss": 0.7142, "step": 6758 }, { "epoch": 0.19733730402032057, "grad_norm": 0.7258683685964821, "learning_rate": 4.459367396593674e-06, "loss": 0.6693, "step": 6759 }, { "epoch": 0.19736650024816793, "grad_norm": 0.7558917862452146, "learning_rate": 4.459205190592052e-06, "loss": 0.698, "step": 6760 }, { "epoch": 0.1973956964760153, "grad_norm": 0.7763416213919621, "learning_rate": 4.45904298459043e-06, "loss": 0.6878, "step": 6761 }, { "epoch": 0.19742489270386265, "grad_norm": 0.7624072437386912, "learning_rate": 4.458880778588808e-06, "loss": 0.7151, "step": 6762 }, { "epoch": 0.19745408893171001, "grad_norm": 0.6942349153951111, "learning_rate": 4.458718572587186e-06, "loss": 0.5848, "step": 6763 }, { "epoch": 0.19748328515955738, "grad_norm": 0.7484139378703993, "learning_rate": 4.458556366585564e-06, "loss": 0.711, "step": 6764 }, { "epoch": 0.19751248138740474, "grad_norm": 0.8337167674706791, "learning_rate": 4.458394160583942e-06, "loss": 0.6961, "step": 6765 }, { "epoch": 0.1975416776152521, "grad_norm": 0.8056537997333636, "learning_rate": 4.45823195458232e-06, "loss": 0.7753, "step": 6766 }, { "epoch": 0.19757087384309946, "grad_norm": 0.9173014325544538, "learning_rate": 4.4580697485806975e-06, "loss": 0.762, "step": 6767 }, { "epoch": 0.19760007007094682, "grad_norm": 0.7976884097429809, "learning_rate": 4.4579075425790755e-06, "loss": 0.6544, "step": 6768 }, { "epoch": 0.1976292662987942, "grad_norm": 0.9322818888242075, "learning_rate": 4.4577453365774535e-06, "loss": 0.7204, "step": 6769 }, { "epoch": 0.19765846252664157, "grad_norm": 0.7571002014098459, "learning_rate": 4.4575831305758315e-06, "loss": 0.6588, "step": 6770 }, { "epoch": 0.19768765875448893, "grad_norm": 0.7244499977989488, "learning_rate": 4.4574209245742095e-06, "loss": 0.7043, "step": 6771 }, { "epoch": 0.1977168549823363, "grad_norm": 0.804092792509916, "learning_rate": 4.4572587185725875e-06, "loss": 0.7403, "step": 6772 }, { "epoch": 0.19774605121018365, "grad_norm": 0.8052423808570301, "learning_rate": 4.4570965125709655e-06, "loss": 0.6854, "step": 6773 }, { "epoch": 0.19777524743803102, "grad_norm": 0.7564669942101104, "learning_rate": 4.4569343065693436e-06, "loss": 0.7464, "step": 6774 }, { "epoch": 0.19780444366587838, "grad_norm": 0.7079805654218501, "learning_rate": 4.456772100567721e-06, "loss": 0.614, "step": 6775 }, { "epoch": 0.19783363989372574, "grad_norm": 0.7932740828103696, "learning_rate": 4.456609894566099e-06, "loss": 0.7951, "step": 6776 }, { "epoch": 0.1978628361215731, "grad_norm": 0.7383797089451862, "learning_rate": 4.4564476885644776e-06, "loss": 0.6998, "step": 6777 }, { "epoch": 0.19789203234942046, "grad_norm": 0.8668134705037428, "learning_rate": 4.456285482562856e-06, "loss": 0.6821, "step": 6778 }, { "epoch": 0.19792122857726782, "grad_norm": 0.8301141712529153, "learning_rate": 4.456123276561234e-06, "loss": 0.7123, "step": 6779 }, { "epoch": 0.19795042480511518, "grad_norm": 0.6812952697558388, "learning_rate": 4.455961070559612e-06, "loss": 0.6319, "step": 6780 }, { "epoch": 0.19797962103296254, "grad_norm": 0.7185809806669422, "learning_rate": 4.455798864557989e-06, "loss": 0.5539, "step": 6781 }, { "epoch": 0.1980088172608099, "grad_norm": 0.7723218226640415, "learning_rate": 4.455636658556367e-06, "loss": 0.687, "step": 6782 }, { "epoch": 0.19803801348865727, "grad_norm": 0.7519507148357958, "learning_rate": 4.455474452554745e-06, "loss": 0.7196, "step": 6783 }, { "epoch": 0.19806720971650463, "grad_norm": 0.7431889121467402, "learning_rate": 4.455312246553123e-06, "loss": 0.6457, "step": 6784 }, { "epoch": 0.198096405944352, "grad_norm": 0.8411979456834691, "learning_rate": 4.455150040551501e-06, "loss": 0.7788, "step": 6785 }, { "epoch": 0.19812560217219935, "grad_norm": 0.7472510137151659, "learning_rate": 4.454987834549879e-06, "loss": 0.6651, "step": 6786 }, { "epoch": 0.1981547984000467, "grad_norm": 0.7689521987172523, "learning_rate": 4.454825628548257e-06, "loss": 0.6727, "step": 6787 }, { "epoch": 0.19818399462789407, "grad_norm": 0.7932429691634532, "learning_rate": 4.454663422546635e-06, "loss": 0.8076, "step": 6788 }, { "epoch": 0.19821319085574143, "grad_norm": 0.7597195615807996, "learning_rate": 4.454501216545013e-06, "loss": 0.7109, "step": 6789 }, { "epoch": 0.1982423870835888, "grad_norm": 0.7371245444893559, "learning_rate": 4.45433901054339e-06, "loss": 0.6651, "step": 6790 }, { "epoch": 0.19827158331143616, "grad_norm": 0.7377514910402038, "learning_rate": 4.454176804541768e-06, "loss": 0.6382, "step": 6791 }, { "epoch": 0.19830077953928352, "grad_norm": 0.7418222794860827, "learning_rate": 4.454014598540146e-06, "loss": 0.6816, "step": 6792 }, { "epoch": 0.19832997576713088, "grad_norm": 0.7344195890419367, "learning_rate": 4.453852392538524e-06, "loss": 0.6169, "step": 6793 }, { "epoch": 0.19835917199497824, "grad_norm": 0.7744975949963324, "learning_rate": 4.453690186536902e-06, "loss": 0.7223, "step": 6794 }, { "epoch": 0.1983883682228256, "grad_norm": 0.7941391052713658, "learning_rate": 4.45352798053528e-06, "loss": 0.7463, "step": 6795 }, { "epoch": 0.19841756445067296, "grad_norm": 0.7742663724438058, "learning_rate": 4.453365774533658e-06, "loss": 0.723, "step": 6796 }, { "epoch": 0.19844676067852032, "grad_norm": 0.7457654233634033, "learning_rate": 4.453203568532036e-06, "loss": 0.616, "step": 6797 }, { "epoch": 0.19847595690636768, "grad_norm": 0.7303980553749383, "learning_rate": 4.453041362530414e-06, "loss": 0.6873, "step": 6798 }, { "epoch": 0.19850515313421507, "grad_norm": 0.7105614925822338, "learning_rate": 4.452879156528792e-06, "loss": 0.5887, "step": 6799 }, { "epoch": 0.19853434936206243, "grad_norm": 0.810298333189725, "learning_rate": 4.45271695052717e-06, "loss": 0.7084, "step": 6800 }, { "epoch": 0.1985635455899098, "grad_norm": 0.7517612256737873, "learning_rate": 4.452554744525548e-06, "loss": 0.7741, "step": 6801 }, { "epoch": 0.19859274181775716, "grad_norm": 0.7120328150743124, "learning_rate": 4.452392538523926e-06, "loss": 0.5933, "step": 6802 }, { "epoch": 0.19862193804560452, "grad_norm": 0.6992421453394998, "learning_rate": 4.452230332522304e-06, "loss": 0.5905, "step": 6803 }, { "epoch": 0.19865113427345188, "grad_norm": 0.6609786208136403, "learning_rate": 4.452068126520682e-06, "loss": 0.5795, "step": 6804 }, { "epoch": 0.19868033050129924, "grad_norm": 0.900429016272701, "learning_rate": 4.451905920519059e-06, "loss": 0.7037, "step": 6805 }, { "epoch": 0.1987095267291466, "grad_norm": 0.7834754671089141, "learning_rate": 4.451743714517437e-06, "loss": 0.7329, "step": 6806 }, { "epoch": 0.19873872295699396, "grad_norm": 0.7548294125341717, "learning_rate": 4.451581508515815e-06, "loss": 0.6666, "step": 6807 }, { "epoch": 0.19876791918484132, "grad_norm": 0.7402824815295529, "learning_rate": 4.451419302514193e-06, "loss": 0.705, "step": 6808 }, { "epoch": 0.19879711541268869, "grad_norm": 0.7302864256844674, "learning_rate": 4.451257096512571e-06, "loss": 0.6655, "step": 6809 }, { "epoch": 0.19882631164053605, "grad_norm": 0.7728731594563023, "learning_rate": 4.451094890510949e-06, "loss": 0.7103, "step": 6810 }, { "epoch": 0.1988555078683834, "grad_norm": 0.7395057772972707, "learning_rate": 4.450932684509327e-06, "loss": 0.6596, "step": 6811 }, { "epoch": 0.19888470409623077, "grad_norm": 0.7614132052055064, "learning_rate": 4.450770478507705e-06, "loss": 0.675, "step": 6812 }, { "epoch": 0.19891390032407813, "grad_norm": 0.8324054001477266, "learning_rate": 4.4506082725060824e-06, "loss": 0.7722, "step": 6813 }, { "epoch": 0.1989430965519255, "grad_norm": 0.7413573697589092, "learning_rate": 4.4504460665044604e-06, "loss": 0.6773, "step": 6814 }, { "epoch": 0.19897229277977285, "grad_norm": 0.7643921428328233, "learning_rate": 4.450283860502839e-06, "loss": 0.6367, "step": 6815 }, { "epoch": 0.1990014890076202, "grad_norm": 0.7682218606687503, "learning_rate": 4.450121654501217e-06, "loss": 0.656, "step": 6816 }, { "epoch": 0.19903068523546757, "grad_norm": 0.8964033439955286, "learning_rate": 4.449959448499595e-06, "loss": 0.7894, "step": 6817 }, { "epoch": 0.19905988146331494, "grad_norm": 0.7668957090645823, "learning_rate": 4.449797242497973e-06, "loss": 0.7256, "step": 6818 }, { "epoch": 0.1990890776911623, "grad_norm": 0.7944332304233747, "learning_rate": 4.4496350364963505e-06, "loss": 0.7478, "step": 6819 }, { "epoch": 0.19911827391900966, "grad_norm": 0.7730227470785571, "learning_rate": 4.4494728304947285e-06, "loss": 0.7112, "step": 6820 }, { "epoch": 0.19914747014685702, "grad_norm": 0.7335245068297744, "learning_rate": 4.4493106244931065e-06, "loss": 0.6211, "step": 6821 }, { "epoch": 0.19917666637470438, "grad_norm": 0.753399620299027, "learning_rate": 4.4491484184914845e-06, "loss": 0.6411, "step": 6822 }, { "epoch": 0.19920586260255174, "grad_norm": 0.7111438960477267, "learning_rate": 4.4489862124898625e-06, "loss": 0.6606, "step": 6823 }, { "epoch": 0.1992350588303991, "grad_norm": 0.7156917935979555, "learning_rate": 4.4488240064882405e-06, "loss": 0.6434, "step": 6824 }, { "epoch": 0.19926425505824646, "grad_norm": 0.7930061296074364, "learning_rate": 4.4486618004866185e-06, "loss": 0.7039, "step": 6825 }, { "epoch": 0.19929345128609383, "grad_norm": 0.7482783879455849, "learning_rate": 4.4484995944849965e-06, "loss": 0.6942, "step": 6826 }, { "epoch": 0.1993226475139412, "grad_norm": 0.719546903691262, "learning_rate": 4.4483373884833745e-06, "loss": 0.6572, "step": 6827 }, { "epoch": 0.19935184374178855, "grad_norm": 0.7590719165215336, "learning_rate": 4.448175182481752e-06, "loss": 0.6736, "step": 6828 }, { "epoch": 0.1993810399696359, "grad_norm": 0.7547622110266762, "learning_rate": 4.44801297648013e-06, "loss": 0.7089, "step": 6829 }, { "epoch": 0.1994102361974833, "grad_norm": 0.7174903628662397, "learning_rate": 4.447850770478508e-06, "loss": 0.662, "step": 6830 }, { "epoch": 0.19943943242533066, "grad_norm": 0.843795357132884, "learning_rate": 4.447688564476886e-06, "loss": 0.8311, "step": 6831 }, { "epoch": 0.19946862865317802, "grad_norm": 0.7408486068432016, "learning_rate": 4.447526358475264e-06, "loss": 0.6227, "step": 6832 }, { "epoch": 0.19949782488102538, "grad_norm": 0.7927279746854998, "learning_rate": 4.447364152473642e-06, "loss": 0.7673, "step": 6833 }, { "epoch": 0.19952702110887274, "grad_norm": 0.7624453376936299, "learning_rate": 4.44720194647202e-06, "loss": 0.6933, "step": 6834 }, { "epoch": 0.1995562173367201, "grad_norm": 0.6779049622023355, "learning_rate": 4.447039740470398e-06, "loss": 0.5885, "step": 6835 }, { "epoch": 0.19958541356456747, "grad_norm": 0.8046502911987324, "learning_rate": 4.446877534468776e-06, "loss": 0.6773, "step": 6836 }, { "epoch": 0.19961460979241483, "grad_norm": 0.763834284980376, "learning_rate": 4.446715328467154e-06, "loss": 0.7177, "step": 6837 }, { "epoch": 0.1996438060202622, "grad_norm": 0.8902668943630497, "learning_rate": 4.446553122465532e-06, "loss": 0.6938, "step": 6838 }, { "epoch": 0.19967300224810955, "grad_norm": 0.7086811804867069, "learning_rate": 4.44639091646391e-06, "loss": 0.6173, "step": 6839 }, { "epoch": 0.1997021984759569, "grad_norm": 0.7584013971741148, "learning_rate": 4.446228710462288e-06, "loss": 0.6398, "step": 6840 }, { "epoch": 0.19973139470380427, "grad_norm": 0.7993487898174653, "learning_rate": 4.446066504460666e-06, "loss": 0.7087, "step": 6841 }, { "epoch": 0.19976059093165163, "grad_norm": 0.8088818757212302, "learning_rate": 4.445904298459044e-06, "loss": 0.6339, "step": 6842 }, { "epoch": 0.199789787159499, "grad_norm": 0.7138288234095236, "learning_rate": 4.445742092457421e-06, "loss": 0.6623, "step": 6843 }, { "epoch": 0.19981898338734636, "grad_norm": 0.7403932024743557, "learning_rate": 4.445579886455799e-06, "loss": 0.6281, "step": 6844 }, { "epoch": 0.19984817961519372, "grad_norm": 0.7508061510303622, "learning_rate": 4.445417680454177e-06, "loss": 0.6906, "step": 6845 }, { "epoch": 0.19987737584304108, "grad_norm": 0.7529227673841061, "learning_rate": 4.445255474452555e-06, "loss": 0.6711, "step": 6846 }, { "epoch": 0.19990657207088844, "grad_norm": 0.7203173454203636, "learning_rate": 4.445093268450933e-06, "loss": 0.6075, "step": 6847 }, { "epoch": 0.1999357682987358, "grad_norm": 0.7492417356095257, "learning_rate": 4.444931062449311e-06, "loss": 0.6469, "step": 6848 }, { "epoch": 0.19996496452658316, "grad_norm": 0.7715462221358763, "learning_rate": 4.444768856447689e-06, "loss": 0.6019, "step": 6849 }, { "epoch": 0.19999416075443052, "grad_norm": 0.799733502857891, "learning_rate": 4.444606650446067e-06, "loss": 0.6587, "step": 6850 }, { "epoch": 0.20002335698227788, "grad_norm": 0.7433445421193849, "learning_rate": 4.444444444444444e-06, "loss": 0.613, "step": 6851 }, { "epoch": 0.20005255321012524, "grad_norm": 0.7036190775631624, "learning_rate": 4.444282238442822e-06, "loss": 0.5907, "step": 6852 }, { "epoch": 0.2000817494379726, "grad_norm": 0.8008190487135675, "learning_rate": 4.444120032441201e-06, "loss": 0.7634, "step": 6853 }, { "epoch": 0.20011094566581997, "grad_norm": 0.8018267699887933, "learning_rate": 4.443957826439579e-06, "loss": 0.7521, "step": 6854 }, { "epoch": 0.20014014189366733, "grad_norm": 0.8695547805885193, "learning_rate": 4.443795620437957e-06, "loss": 0.8045, "step": 6855 }, { "epoch": 0.2001693381215147, "grad_norm": 0.7862581455362986, "learning_rate": 4.443633414436335e-06, "loss": 0.7499, "step": 6856 }, { "epoch": 0.20019853434936205, "grad_norm": 0.7444213729629453, "learning_rate": 4.443471208434712e-06, "loss": 0.6419, "step": 6857 }, { "epoch": 0.2002277305772094, "grad_norm": 0.7821903110629994, "learning_rate": 4.44330900243309e-06, "loss": 0.6816, "step": 6858 }, { "epoch": 0.20025692680505677, "grad_norm": 0.7872250463164201, "learning_rate": 4.443146796431468e-06, "loss": 0.6918, "step": 6859 }, { "epoch": 0.20028612303290416, "grad_norm": 0.7143306927926691, "learning_rate": 4.442984590429846e-06, "loss": 0.64, "step": 6860 }, { "epoch": 0.20031531926075152, "grad_norm": 0.7706743414951641, "learning_rate": 4.442822384428224e-06, "loss": 0.706, "step": 6861 }, { "epoch": 0.20034451548859888, "grad_norm": 0.7642888321785345, "learning_rate": 4.442660178426602e-06, "loss": 0.7126, "step": 6862 }, { "epoch": 0.20037371171644625, "grad_norm": 0.7272021954203531, "learning_rate": 4.44249797242498e-06, "loss": 0.6513, "step": 6863 }, { "epoch": 0.2004029079442936, "grad_norm": 0.7948101702728632, "learning_rate": 4.442335766423358e-06, "loss": 0.7279, "step": 6864 }, { "epoch": 0.20043210417214097, "grad_norm": 0.7054181295458224, "learning_rate": 4.442173560421736e-06, "loss": 0.6046, "step": 6865 }, { "epoch": 0.20046130039998833, "grad_norm": 0.7171409900774984, "learning_rate": 4.442011354420113e-06, "loss": 0.6244, "step": 6866 }, { "epoch": 0.2004904966278357, "grad_norm": 0.7916766550009479, "learning_rate": 4.441849148418491e-06, "loss": 0.7592, "step": 6867 }, { "epoch": 0.20051969285568305, "grad_norm": 0.7346619664424243, "learning_rate": 4.441686942416869e-06, "loss": 0.6845, "step": 6868 }, { "epoch": 0.2005488890835304, "grad_norm": 0.7694532383091062, "learning_rate": 4.4415247364152474e-06, "loss": 0.721, "step": 6869 }, { "epoch": 0.20057808531137777, "grad_norm": 0.7029528533720137, "learning_rate": 4.4413625304136254e-06, "loss": 0.6666, "step": 6870 }, { "epoch": 0.20060728153922514, "grad_norm": 0.7267584471259215, "learning_rate": 4.441200324412004e-06, "loss": 0.6183, "step": 6871 }, { "epoch": 0.2006364777670725, "grad_norm": 0.7439029529986502, "learning_rate": 4.4410381184103814e-06, "loss": 0.6858, "step": 6872 }, { "epoch": 0.20066567399491986, "grad_norm": 0.9903186522039588, "learning_rate": 4.4408759124087595e-06, "loss": 0.6339, "step": 6873 }, { "epoch": 0.20069487022276722, "grad_norm": 0.7315597086063336, "learning_rate": 4.4407137064071375e-06, "loss": 0.6371, "step": 6874 }, { "epoch": 0.20072406645061458, "grad_norm": 0.7511752443233365, "learning_rate": 4.4405515004055155e-06, "loss": 0.709, "step": 6875 }, { "epoch": 0.20075326267846194, "grad_norm": 0.7667354932190191, "learning_rate": 4.4403892944038935e-06, "loss": 0.6842, "step": 6876 }, { "epoch": 0.2007824589063093, "grad_norm": 0.7649901571050443, "learning_rate": 4.4402270884022715e-06, "loss": 0.7266, "step": 6877 }, { "epoch": 0.20081165513415666, "grad_norm": 0.7441480756154573, "learning_rate": 4.4400648824006495e-06, "loss": 0.7003, "step": 6878 }, { "epoch": 0.20084085136200402, "grad_norm": 0.9822512253658188, "learning_rate": 4.4399026763990275e-06, "loss": 0.7846, "step": 6879 }, { "epoch": 0.20087004758985139, "grad_norm": 0.7286660867087105, "learning_rate": 4.4397404703974055e-06, "loss": 0.6351, "step": 6880 }, { "epoch": 0.20089924381769875, "grad_norm": 0.7256642524783463, "learning_rate": 4.439578264395783e-06, "loss": 0.616, "step": 6881 }, { "epoch": 0.2009284400455461, "grad_norm": 0.7572861138064201, "learning_rate": 4.439416058394161e-06, "loss": 0.7061, "step": 6882 }, { "epoch": 0.20095763627339347, "grad_norm": 0.7001676207537216, "learning_rate": 4.439253852392539e-06, "loss": 0.6659, "step": 6883 }, { "epoch": 0.20098683250124083, "grad_norm": 0.7365241531894056, "learning_rate": 4.439091646390917e-06, "loss": 0.6378, "step": 6884 }, { "epoch": 0.2010160287290882, "grad_norm": 0.8013645339287582, "learning_rate": 4.438929440389295e-06, "loss": 0.764, "step": 6885 }, { "epoch": 0.20104522495693555, "grad_norm": 0.7714655186984699, "learning_rate": 4.438767234387673e-06, "loss": 0.7388, "step": 6886 }, { "epoch": 0.20107442118478291, "grad_norm": 0.7075008551928045, "learning_rate": 4.438605028386051e-06, "loss": 0.6204, "step": 6887 }, { "epoch": 0.20110361741263028, "grad_norm": 0.6949410503679241, "learning_rate": 4.438442822384429e-06, "loss": 0.6043, "step": 6888 }, { "epoch": 0.20113281364047764, "grad_norm": 0.9628964345960404, "learning_rate": 4.438280616382806e-06, "loss": 0.6823, "step": 6889 }, { "epoch": 0.20116200986832503, "grad_norm": 0.8333649526344781, "learning_rate": 4.438118410381185e-06, "loss": 0.6921, "step": 6890 }, { "epoch": 0.2011912060961724, "grad_norm": 0.7112677940460332, "learning_rate": 4.437956204379563e-06, "loss": 0.6338, "step": 6891 }, { "epoch": 0.20122040232401975, "grad_norm": 0.733894564508862, "learning_rate": 4.437793998377941e-06, "loss": 0.7028, "step": 6892 }, { "epoch": 0.2012495985518671, "grad_norm": 0.7224669936246992, "learning_rate": 4.437631792376319e-06, "loss": 0.6555, "step": 6893 }, { "epoch": 0.20127879477971447, "grad_norm": 0.8026811811256759, "learning_rate": 4.437469586374697e-06, "loss": 0.7534, "step": 6894 }, { "epoch": 0.20130799100756183, "grad_norm": 0.7450458298459335, "learning_rate": 4.437307380373074e-06, "loss": 0.6741, "step": 6895 }, { "epoch": 0.2013371872354092, "grad_norm": 0.7158054855040664, "learning_rate": 4.437145174371452e-06, "loss": 0.6145, "step": 6896 }, { "epoch": 0.20136638346325655, "grad_norm": 0.7971699704369414, "learning_rate": 4.43698296836983e-06, "loss": 0.7539, "step": 6897 }, { "epoch": 0.20139557969110392, "grad_norm": 0.7110689481477642, "learning_rate": 4.436820762368208e-06, "loss": 0.6148, "step": 6898 }, { "epoch": 0.20142477591895128, "grad_norm": 0.7355236051485695, "learning_rate": 4.436658556366586e-06, "loss": 0.6504, "step": 6899 }, { "epoch": 0.20145397214679864, "grad_norm": 0.6776034068476995, "learning_rate": 4.436496350364964e-06, "loss": 0.5912, "step": 6900 }, { "epoch": 0.201483168374646, "grad_norm": 0.7407764981259904, "learning_rate": 4.436334144363342e-06, "loss": 0.707, "step": 6901 }, { "epoch": 0.20151236460249336, "grad_norm": 0.7300226316499727, "learning_rate": 4.43617193836172e-06, "loss": 0.6401, "step": 6902 }, { "epoch": 0.20154156083034072, "grad_norm": 0.7422298410721815, "learning_rate": 4.436009732360098e-06, "loss": 0.7129, "step": 6903 }, { "epoch": 0.20157075705818808, "grad_norm": 0.7988284099385735, "learning_rate": 4.435847526358475e-06, "loss": 0.7262, "step": 6904 }, { "epoch": 0.20159995328603544, "grad_norm": 0.6995732847329713, "learning_rate": 4.435685320356853e-06, "loss": 0.6108, "step": 6905 }, { "epoch": 0.2016291495138828, "grad_norm": 0.706134056613829, "learning_rate": 4.435523114355231e-06, "loss": 0.6322, "step": 6906 }, { "epoch": 0.20165834574173017, "grad_norm": 0.7153699971625873, "learning_rate": 4.435360908353609e-06, "loss": 0.6261, "step": 6907 }, { "epoch": 0.20168754196957753, "grad_norm": 1.0476231194110532, "learning_rate": 4.435198702351987e-06, "loss": 0.7859, "step": 6908 }, { "epoch": 0.2017167381974249, "grad_norm": 0.7437859155646518, "learning_rate": 4.435036496350366e-06, "loss": 0.736, "step": 6909 }, { "epoch": 0.20174593442527225, "grad_norm": 0.7553494417055119, "learning_rate": 4.434874290348743e-06, "loss": 0.7344, "step": 6910 }, { "epoch": 0.2017751306531196, "grad_norm": 0.761345897576816, "learning_rate": 4.434712084347121e-06, "loss": 0.7041, "step": 6911 }, { "epoch": 0.20180432688096697, "grad_norm": 0.7862439549405646, "learning_rate": 4.434549878345499e-06, "loss": 0.7823, "step": 6912 }, { "epoch": 0.20183352310881433, "grad_norm": 0.7689836146607297, "learning_rate": 4.434387672343877e-06, "loss": 0.7247, "step": 6913 }, { "epoch": 0.2018627193366617, "grad_norm": 0.7573340773336484, "learning_rate": 4.434225466342255e-06, "loss": 0.6777, "step": 6914 }, { "epoch": 0.20189191556450906, "grad_norm": 0.7841925266115635, "learning_rate": 4.434063260340633e-06, "loss": 0.7595, "step": 6915 }, { "epoch": 0.20192111179235642, "grad_norm": 0.7867641637432989, "learning_rate": 4.433901054339011e-06, "loss": 0.7453, "step": 6916 }, { "epoch": 0.20195030802020378, "grad_norm": 0.7535007797363878, "learning_rate": 4.433738848337389e-06, "loss": 0.7075, "step": 6917 }, { "epoch": 0.20197950424805114, "grad_norm": 0.7486532551058501, "learning_rate": 4.433576642335766e-06, "loss": 0.707, "step": 6918 }, { "epoch": 0.2020087004758985, "grad_norm": 0.7902908344667428, "learning_rate": 4.433414436334144e-06, "loss": 0.7004, "step": 6919 }, { "epoch": 0.2020378967037459, "grad_norm": 0.7051854430354169, "learning_rate": 4.433252230332522e-06, "loss": 0.5998, "step": 6920 }, { "epoch": 0.20206709293159325, "grad_norm": 0.7358297855649625, "learning_rate": 4.4330900243309e-06, "loss": 0.6325, "step": 6921 }, { "epoch": 0.2020962891594406, "grad_norm": 0.7642622376635186, "learning_rate": 4.432927818329278e-06, "loss": 0.7034, "step": 6922 }, { "epoch": 0.20212548538728797, "grad_norm": 0.7142985822839077, "learning_rate": 4.432765612327656e-06, "loss": 0.6084, "step": 6923 }, { "epoch": 0.20215468161513533, "grad_norm": 0.7073207154933416, "learning_rate": 4.432603406326034e-06, "loss": 0.6417, "step": 6924 }, { "epoch": 0.2021838778429827, "grad_norm": 0.7407061393073469, "learning_rate": 4.432441200324412e-06, "loss": 0.6571, "step": 6925 }, { "epoch": 0.20221307407083006, "grad_norm": 0.8302425824483113, "learning_rate": 4.4322789943227904e-06, "loss": 0.6929, "step": 6926 }, { "epoch": 0.20224227029867742, "grad_norm": 1.1186840855250253, "learning_rate": 4.432116788321168e-06, "loss": 0.7846, "step": 6927 }, { "epoch": 0.20227146652652478, "grad_norm": 0.6971774272593029, "learning_rate": 4.4319545823195464e-06, "loss": 0.6179, "step": 6928 }, { "epoch": 0.20230066275437214, "grad_norm": 0.7917734823272369, "learning_rate": 4.4317923763179244e-06, "loss": 0.7495, "step": 6929 }, { "epoch": 0.2023298589822195, "grad_norm": 0.8145529349098257, "learning_rate": 4.4316301703163025e-06, "loss": 0.7503, "step": 6930 }, { "epoch": 0.20235905521006686, "grad_norm": 0.7302581288853088, "learning_rate": 4.4314679643146805e-06, "loss": 0.6649, "step": 6931 }, { "epoch": 0.20238825143791422, "grad_norm": 0.6728774400914176, "learning_rate": 4.4313057583130585e-06, "loss": 0.6067, "step": 6932 }, { "epoch": 0.20241744766576159, "grad_norm": 0.7151707906038279, "learning_rate": 4.431143552311436e-06, "loss": 0.6601, "step": 6933 }, { "epoch": 0.20244664389360895, "grad_norm": 0.8113630299470085, "learning_rate": 4.430981346309814e-06, "loss": 0.6357, "step": 6934 }, { "epoch": 0.2024758401214563, "grad_norm": 0.8734066260411094, "learning_rate": 4.430819140308192e-06, "loss": 0.9057, "step": 6935 }, { "epoch": 0.20250503634930367, "grad_norm": 0.7157897364952125, "learning_rate": 4.43065693430657e-06, "loss": 0.6282, "step": 6936 }, { "epoch": 0.20253423257715103, "grad_norm": 0.6923818605205803, "learning_rate": 4.430494728304948e-06, "loss": 0.5971, "step": 6937 }, { "epoch": 0.2025634288049984, "grad_norm": 0.7780916249951494, "learning_rate": 4.430332522303326e-06, "loss": 0.6558, "step": 6938 }, { "epoch": 0.20259262503284575, "grad_norm": 0.760877009913393, "learning_rate": 4.430170316301704e-06, "loss": 0.6927, "step": 6939 }, { "epoch": 0.2026218212606931, "grad_norm": 0.7672876338023359, "learning_rate": 4.430008110300082e-06, "loss": 0.6847, "step": 6940 }, { "epoch": 0.20265101748854047, "grad_norm": 0.8083904604233254, "learning_rate": 4.42984590429846e-06, "loss": 0.7184, "step": 6941 }, { "epoch": 0.20268021371638784, "grad_norm": 0.8219112971070736, "learning_rate": 4.429683698296837e-06, "loss": 0.6716, "step": 6942 }, { "epoch": 0.2027094099442352, "grad_norm": 0.7267397305494587, "learning_rate": 4.429521492295215e-06, "loss": 0.6824, "step": 6943 }, { "epoch": 0.20273860617208256, "grad_norm": 0.749431114044429, "learning_rate": 4.429359286293593e-06, "loss": 0.6874, "step": 6944 }, { "epoch": 0.20276780239992992, "grad_norm": 0.7783417338834545, "learning_rate": 4.429197080291971e-06, "loss": 0.7085, "step": 6945 }, { "epoch": 0.20279699862777728, "grad_norm": 0.724159451987399, "learning_rate": 4.429034874290349e-06, "loss": 0.6406, "step": 6946 }, { "epoch": 0.20282619485562464, "grad_norm": 0.7050138899339207, "learning_rate": 4.428872668288728e-06, "loss": 0.6399, "step": 6947 }, { "epoch": 0.202855391083472, "grad_norm": 0.8573886025840515, "learning_rate": 4.428710462287105e-06, "loss": 0.7218, "step": 6948 }, { "epoch": 0.20288458731131936, "grad_norm": 0.7263214745947648, "learning_rate": 4.428548256285483e-06, "loss": 0.6669, "step": 6949 }, { "epoch": 0.20291378353916675, "grad_norm": 0.7530536876265785, "learning_rate": 4.428386050283861e-06, "loss": 0.6708, "step": 6950 }, { "epoch": 0.20294297976701411, "grad_norm": 0.6718898405823608, "learning_rate": 4.428223844282239e-06, "loss": 0.5614, "step": 6951 }, { "epoch": 0.20297217599486148, "grad_norm": 0.7314156599161911, "learning_rate": 4.428061638280617e-06, "loss": 0.5885, "step": 6952 }, { "epoch": 0.20300137222270884, "grad_norm": 0.7093459223121512, "learning_rate": 4.427899432278995e-06, "loss": 0.5764, "step": 6953 }, { "epoch": 0.2030305684505562, "grad_norm": 0.7551330645648295, "learning_rate": 4.427737226277373e-06, "loss": 0.6444, "step": 6954 }, { "epoch": 0.20305976467840356, "grad_norm": 0.850993459771471, "learning_rate": 4.427575020275751e-06, "loss": 0.7978, "step": 6955 }, { "epoch": 0.20308896090625092, "grad_norm": 0.7552675180872876, "learning_rate": 4.427412814274128e-06, "loss": 0.7521, "step": 6956 }, { "epoch": 0.20311815713409828, "grad_norm": 0.7348302819985196, "learning_rate": 4.427250608272506e-06, "loss": 0.7018, "step": 6957 }, { "epoch": 0.20314735336194564, "grad_norm": 0.6998062637265559, "learning_rate": 4.427088402270884e-06, "loss": 0.6059, "step": 6958 }, { "epoch": 0.203176549589793, "grad_norm": 0.7245983124040293, "learning_rate": 4.426926196269262e-06, "loss": 0.6132, "step": 6959 }, { "epoch": 0.20320574581764037, "grad_norm": 0.7647053004064019, "learning_rate": 4.42676399026764e-06, "loss": 0.7054, "step": 6960 }, { "epoch": 0.20323494204548773, "grad_norm": 0.8549379547363589, "learning_rate": 4.426601784266018e-06, "loss": 0.7452, "step": 6961 }, { "epoch": 0.2032641382733351, "grad_norm": 0.7515138638171887, "learning_rate": 4.426439578264396e-06, "loss": 0.6978, "step": 6962 }, { "epoch": 0.20329333450118245, "grad_norm": 0.7453949546080962, "learning_rate": 4.426277372262774e-06, "loss": 0.6608, "step": 6963 }, { "epoch": 0.2033225307290298, "grad_norm": 0.7888601485053556, "learning_rate": 4.426115166261152e-06, "loss": 0.7221, "step": 6964 }, { "epoch": 0.20335172695687717, "grad_norm": 0.7568210402997144, "learning_rate": 4.425952960259529e-06, "loss": 0.696, "step": 6965 }, { "epoch": 0.20338092318472453, "grad_norm": 0.7650776388441586, "learning_rate": 4.425790754257908e-06, "loss": 0.6552, "step": 6966 }, { "epoch": 0.2034101194125719, "grad_norm": 0.7258360816305669, "learning_rate": 4.425628548256286e-06, "loss": 0.6724, "step": 6967 }, { "epoch": 0.20343931564041925, "grad_norm": 0.7443762125220461, "learning_rate": 4.425466342254664e-06, "loss": 0.704, "step": 6968 }, { "epoch": 0.20346851186826662, "grad_norm": 0.7581284071096265, "learning_rate": 4.425304136253042e-06, "loss": 0.7016, "step": 6969 }, { "epoch": 0.20349770809611398, "grad_norm": 0.7078299894662503, "learning_rate": 4.42514193025142e-06, "loss": 0.5948, "step": 6970 }, { "epoch": 0.20352690432396134, "grad_norm": 0.8385715240128346, "learning_rate": 4.424979724249797e-06, "loss": 0.8315, "step": 6971 }, { "epoch": 0.2035561005518087, "grad_norm": 0.7560975791971312, "learning_rate": 4.424817518248175e-06, "loss": 0.6982, "step": 6972 }, { "epoch": 0.20358529677965606, "grad_norm": 0.7194674514284544, "learning_rate": 4.424655312246553e-06, "loss": 0.6924, "step": 6973 }, { "epoch": 0.20361449300750342, "grad_norm": 0.6783987485051064, "learning_rate": 4.424493106244931e-06, "loss": 0.5939, "step": 6974 }, { "epoch": 0.20364368923535078, "grad_norm": 0.6885332315283363, "learning_rate": 4.424330900243309e-06, "loss": 0.6088, "step": 6975 }, { "epoch": 0.20367288546319814, "grad_norm": 0.7013380860583834, "learning_rate": 4.424168694241687e-06, "loss": 0.5929, "step": 6976 }, { "epoch": 0.2037020816910455, "grad_norm": 0.6840735915600292, "learning_rate": 4.424006488240065e-06, "loss": 0.6229, "step": 6977 }, { "epoch": 0.20373127791889287, "grad_norm": 0.7576433830402312, "learning_rate": 4.423844282238443e-06, "loss": 0.7438, "step": 6978 }, { "epoch": 0.20376047414674023, "grad_norm": 0.7576932990469252, "learning_rate": 4.423682076236821e-06, "loss": 0.677, "step": 6979 }, { "epoch": 0.20378967037458762, "grad_norm": 0.7043484971825582, "learning_rate": 4.4235198702351986e-06, "loss": 0.5952, "step": 6980 }, { "epoch": 0.20381886660243498, "grad_norm": 0.8134856435546685, "learning_rate": 4.4233576642335766e-06, "loss": 0.8185, "step": 6981 }, { "epoch": 0.20384806283028234, "grad_norm": 0.7215145015109589, "learning_rate": 4.4231954582319546e-06, "loss": 0.6529, "step": 6982 }, { "epoch": 0.2038772590581297, "grad_norm": 0.6692261720714922, "learning_rate": 4.4230332522303326e-06, "loss": 0.5891, "step": 6983 }, { "epoch": 0.20390645528597706, "grad_norm": 0.7254237695537309, "learning_rate": 4.422871046228711e-06, "loss": 0.6452, "step": 6984 }, { "epoch": 0.20393565151382442, "grad_norm": 0.7422444565864289, "learning_rate": 4.4227088402270894e-06, "loss": 0.6398, "step": 6985 }, { "epoch": 0.20396484774167178, "grad_norm": 0.7820728013843616, "learning_rate": 4.422546634225467e-06, "loss": 0.7184, "step": 6986 }, { "epoch": 0.20399404396951915, "grad_norm": 0.7512217036146347, "learning_rate": 4.422384428223845e-06, "loss": 0.7061, "step": 6987 }, { "epoch": 0.2040232401973665, "grad_norm": 0.8068617375413063, "learning_rate": 4.422222222222223e-06, "loss": 0.7037, "step": 6988 }, { "epoch": 0.20405243642521387, "grad_norm": 0.8159657240721333, "learning_rate": 4.422060016220601e-06, "loss": 0.7008, "step": 6989 }, { "epoch": 0.20408163265306123, "grad_norm": 0.6916972272199314, "learning_rate": 4.421897810218979e-06, "loss": 0.6282, "step": 6990 }, { "epoch": 0.2041108288809086, "grad_norm": 0.7614123313183492, "learning_rate": 4.421735604217357e-06, "loss": 0.684, "step": 6991 }, { "epoch": 0.20414002510875595, "grad_norm": 0.7649478730785358, "learning_rate": 4.421573398215735e-06, "loss": 0.7084, "step": 6992 }, { "epoch": 0.2041692213366033, "grad_norm": 0.7830117344612996, "learning_rate": 4.421411192214113e-06, "loss": 0.6316, "step": 6993 }, { "epoch": 0.20419841756445067, "grad_norm": 0.9202994736587337, "learning_rate": 4.42124898621249e-06, "loss": 0.7663, "step": 6994 }, { "epoch": 0.20422761379229804, "grad_norm": 0.7726158493201254, "learning_rate": 4.421086780210868e-06, "loss": 0.6522, "step": 6995 }, { "epoch": 0.2042568100201454, "grad_norm": 0.7506178805806052, "learning_rate": 4.420924574209246e-06, "loss": 0.6862, "step": 6996 }, { "epoch": 0.20428600624799276, "grad_norm": 0.728324268438989, "learning_rate": 4.420762368207624e-06, "loss": 0.6887, "step": 6997 }, { "epoch": 0.20431520247584012, "grad_norm": 0.7129400340791262, "learning_rate": 4.420600162206002e-06, "loss": 0.6727, "step": 6998 }, { "epoch": 0.20434439870368748, "grad_norm": 0.7954138361492208, "learning_rate": 4.42043795620438e-06, "loss": 0.7423, "step": 6999 }, { "epoch": 0.20437359493153484, "grad_norm": 0.7471829294519359, "learning_rate": 4.420275750202758e-06, "loss": 0.7113, "step": 7000 }, { "epoch": 0.2044027911593822, "grad_norm": 0.7092801527921258, "learning_rate": 4.420113544201136e-06, "loss": 0.6616, "step": 7001 }, { "epoch": 0.20443198738722956, "grad_norm": 0.7676838345159646, "learning_rate": 4.419951338199514e-06, "loss": 0.6579, "step": 7002 }, { "epoch": 0.20446118361507692, "grad_norm": 0.7552725942403625, "learning_rate": 4.419789132197891e-06, "loss": 0.7436, "step": 7003 }, { "epoch": 0.20449037984292429, "grad_norm": 0.7747040594389885, "learning_rate": 4.41962692619627e-06, "loss": 0.6296, "step": 7004 }, { "epoch": 0.20451957607077165, "grad_norm": 0.7860333187708113, "learning_rate": 4.419464720194648e-06, "loss": 0.7854, "step": 7005 }, { "epoch": 0.204548772298619, "grad_norm": 0.8709931279021131, "learning_rate": 4.419302514193026e-06, "loss": 0.7409, "step": 7006 }, { "epoch": 0.20457796852646637, "grad_norm": 0.7700775569612813, "learning_rate": 4.419140308191404e-06, "loss": 0.686, "step": 7007 }, { "epoch": 0.20460716475431373, "grad_norm": 0.7748232549345239, "learning_rate": 4.418978102189782e-06, "loss": 0.6822, "step": 7008 }, { "epoch": 0.2046363609821611, "grad_norm": 0.7326741880021116, "learning_rate": 4.418815896188159e-06, "loss": 0.6349, "step": 7009 }, { "epoch": 0.20466555721000848, "grad_norm": 0.7082319242378478, "learning_rate": 4.418653690186537e-06, "loss": 0.6472, "step": 7010 }, { "epoch": 0.20469475343785584, "grad_norm": 0.7046294782271146, "learning_rate": 4.418491484184915e-06, "loss": 0.624, "step": 7011 }, { "epoch": 0.2047239496657032, "grad_norm": 0.7915970230130193, "learning_rate": 4.418329278183293e-06, "loss": 0.8099, "step": 7012 }, { "epoch": 0.20475314589355056, "grad_norm": 0.8659092099878081, "learning_rate": 4.418167072181671e-06, "loss": 0.6643, "step": 7013 }, { "epoch": 0.20478234212139793, "grad_norm": 0.7307453057320091, "learning_rate": 4.418004866180049e-06, "loss": 0.6985, "step": 7014 }, { "epoch": 0.2048115383492453, "grad_norm": 0.7236375544293626, "learning_rate": 4.417842660178427e-06, "loss": 0.6878, "step": 7015 }, { "epoch": 0.20484073457709265, "grad_norm": 0.7908596813046969, "learning_rate": 4.417680454176805e-06, "loss": 0.7089, "step": 7016 }, { "epoch": 0.20486993080494, "grad_norm": 0.8183446734160845, "learning_rate": 4.417518248175183e-06, "loss": 0.8208, "step": 7017 }, { "epoch": 0.20489912703278737, "grad_norm": 0.7907064836767084, "learning_rate": 4.41735604217356e-06, "loss": 0.7414, "step": 7018 }, { "epoch": 0.20492832326063473, "grad_norm": 0.7684613276870854, "learning_rate": 4.417193836171938e-06, "loss": 0.7245, "step": 7019 }, { "epoch": 0.2049575194884821, "grad_norm": 1.1111304871196008, "learning_rate": 4.417031630170316e-06, "loss": 0.8028, "step": 7020 }, { "epoch": 0.20498671571632945, "grad_norm": 0.7271827186394433, "learning_rate": 4.416869424168694e-06, "loss": 0.6417, "step": 7021 }, { "epoch": 0.20501591194417682, "grad_norm": 0.7993616866589999, "learning_rate": 4.416707218167073e-06, "loss": 0.7117, "step": 7022 }, { "epoch": 0.20504510817202418, "grad_norm": 0.7485368784228611, "learning_rate": 4.41654501216545e-06, "loss": 0.7002, "step": 7023 }, { "epoch": 0.20507430439987154, "grad_norm": 0.762270685349967, "learning_rate": 4.416382806163828e-06, "loss": 0.6965, "step": 7024 }, { "epoch": 0.2051035006277189, "grad_norm": 0.7545267113867922, "learning_rate": 4.416220600162206e-06, "loss": 0.6859, "step": 7025 }, { "epoch": 0.20513269685556626, "grad_norm": 0.675433274098025, "learning_rate": 4.416058394160584e-06, "loss": 0.5661, "step": 7026 }, { "epoch": 0.20516189308341362, "grad_norm": 0.7134944998926749, "learning_rate": 4.415896188158962e-06, "loss": 0.6475, "step": 7027 }, { "epoch": 0.20519108931126098, "grad_norm": 0.8319529720355581, "learning_rate": 4.41573398215734e-06, "loss": 0.745, "step": 7028 }, { "epoch": 0.20522028553910834, "grad_norm": 0.7433261175283807, "learning_rate": 4.415571776155718e-06, "loss": 0.6832, "step": 7029 }, { "epoch": 0.2052494817669557, "grad_norm": 0.7509360978071227, "learning_rate": 4.415409570154096e-06, "loss": 0.6727, "step": 7030 }, { "epoch": 0.20527867799480307, "grad_norm": 0.9426784429218156, "learning_rate": 4.415247364152474e-06, "loss": 0.7021, "step": 7031 }, { "epoch": 0.20530787422265043, "grad_norm": 0.7474052256394129, "learning_rate": 4.4150851581508515e-06, "loss": 0.6516, "step": 7032 }, { "epoch": 0.2053370704504978, "grad_norm": 0.7383820282440361, "learning_rate": 4.4149229521492295e-06, "loss": 0.6893, "step": 7033 }, { "epoch": 0.20536626667834515, "grad_norm": 0.7429116023498178, "learning_rate": 4.4147607461476075e-06, "loss": 0.6149, "step": 7034 }, { "epoch": 0.2053954629061925, "grad_norm": 0.7026476101316438, "learning_rate": 4.4145985401459855e-06, "loss": 0.5172, "step": 7035 }, { "epoch": 0.20542465913403987, "grad_norm": 0.7819520328336498, "learning_rate": 4.4144363341443635e-06, "loss": 0.6941, "step": 7036 }, { "epoch": 0.20545385536188723, "grad_norm": 0.7477633432563691, "learning_rate": 4.4142741281427416e-06, "loss": 0.7156, "step": 7037 }, { "epoch": 0.2054830515897346, "grad_norm": 0.7370116027976628, "learning_rate": 4.4141119221411196e-06, "loss": 0.6657, "step": 7038 }, { "epoch": 0.20551224781758196, "grad_norm": 0.7449203070367996, "learning_rate": 4.4139497161394976e-06, "loss": 0.6852, "step": 7039 }, { "epoch": 0.20554144404542932, "grad_norm": 0.7556595704922008, "learning_rate": 4.4137875101378756e-06, "loss": 0.6601, "step": 7040 }, { "epoch": 0.2055706402732767, "grad_norm": 0.7063174853194707, "learning_rate": 4.413625304136254e-06, "loss": 0.597, "step": 7041 }, { "epoch": 0.20559983650112407, "grad_norm": 0.7115183536882923, "learning_rate": 4.413463098134632e-06, "loss": 0.5888, "step": 7042 }, { "epoch": 0.20562903272897143, "grad_norm": 0.7216547507942958, "learning_rate": 4.41330089213301e-06, "loss": 0.6963, "step": 7043 }, { "epoch": 0.2056582289568188, "grad_norm": 0.8981734751240705, "learning_rate": 4.413138686131388e-06, "loss": 0.6366, "step": 7044 }, { "epoch": 0.20568742518466615, "grad_norm": 0.7225164826098893, "learning_rate": 4.412976480129766e-06, "loss": 0.7147, "step": 7045 }, { "epoch": 0.2057166214125135, "grad_norm": 0.8848539070160862, "learning_rate": 4.412814274128144e-06, "loss": 0.7396, "step": 7046 }, { "epoch": 0.20574581764036087, "grad_norm": 0.7553410743390303, "learning_rate": 4.412652068126521e-06, "loss": 0.716, "step": 7047 }, { "epoch": 0.20577501386820823, "grad_norm": 0.7434087139390426, "learning_rate": 4.412489862124899e-06, "loss": 0.7033, "step": 7048 }, { "epoch": 0.2058042100960556, "grad_norm": 0.7818624516549765, "learning_rate": 4.412327656123277e-06, "loss": 0.7009, "step": 7049 }, { "epoch": 0.20583340632390296, "grad_norm": 0.7234867551578769, "learning_rate": 4.412165450121655e-06, "loss": 0.6423, "step": 7050 }, { "epoch": 0.20586260255175032, "grad_norm": 0.8473021332274115, "learning_rate": 4.412003244120033e-06, "loss": 0.8204, "step": 7051 }, { "epoch": 0.20589179877959768, "grad_norm": 0.8832557443936595, "learning_rate": 4.411841038118411e-06, "loss": 0.6833, "step": 7052 }, { "epoch": 0.20592099500744504, "grad_norm": 0.766639505807538, "learning_rate": 4.411678832116789e-06, "loss": 0.688, "step": 7053 }, { "epoch": 0.2059501912352924, "grad_norm": 0.7009219252942245, "learning_rate": 4.411516626115167e-06, "loss": 0.5804, "step": 7054 }, { "epoch": 0.20597938746313976, "grad_norm": 0.7851165522670861, "learning_rate": 4.411354420113545e-06, "loss": 0.6488, "step": 7055 }, { "epoch": 0.20600858369098712, "grad_norm": 0.7576709660398878, "learning_rate": 4.411192214111922e-06, "loss": 0.6982, "step": 7056 }, { "epoch": 0.20603777991883448, "grad_norm": 0.7633078306376809, "learning_rate": 4.4110300081103e-06, "loss": 0.7548, "step": 7057 }, { "epoch": 0.20606697614668185, "grad_norm": 0.8420167614516157, "learning_rate": 4.410867802108678e-06, "loss": 0.8, "step": 7058 }, { "epoch": 0.2060961723745292, "grad_norm": 0.7363080447630113, "learning_rate": 4.410705596107056e-06, "loss": 0.7114, "step": 7059 }, { "epoch": 0.20612536860237657, "grad_norm": 0.8277219884998643, "learning_rate": 4.410543390105435e-06, "loss": 0.6543, "step": 7060 }, { "epoch": 0.20615456483022393, "grad_norm": 0.7506178476965957, "learning_rate": 4.410381184103812e-06, "loss": 0.5732, "step": 7061 }, { "epoch": 0.2061837610580713, "grad_norm": 1.010382660550485, "learning_rate": 4.41021897810219e-06, "loss": 0.6951, "step": 7062 }, { "epoch": 0.20621295728591865, "grad_norm": 0.6852913629932997, "learning_rate": 4.410056772100568e-06, "loss": 0.6415, "step": 7063 }, { "epoch": 0.206242153513766, "grad_norm": 0.7807866789175888, "learning_rate": 4.409894566098946e-06, "loss": 0.6791, "step": 7064 }, { "epoch": 0.20627134974161337, "grad_norm": 0.8426668574532706, "learning_rate": 4.409732360097324e-06, "loss": 0.7132, "step": 7065 }, { "epoch": 0.20630054596946074, "grad_norm": 0.7067553576294173, "learning_rate": 4.409570154095702e-06, "loss": 0.6681, "step": 7066 }, { "epoch": 0.2063297421973081, "grad_norm": 0.7191405589145817, "learning_rate": 4.40940794809408e-06, "loss": 0.6336, "step": 7067 }, { "epoch": 0.20635893842515546, "grad_norm": 0.8580170935476638, "learning_rate": 4.409245742092458e-06, "loss": 0.6911, "step": 7068 }, { "epoch": 0.20638813465300282, "grad_norm": 0.7747604942092359, "learning_rate": 4.409083536090836e-06, "loss": 0.6922, "step": 7069 }, { "epoch": 0.20641733088085018, "grad_norm": 0.7666383518216674, "learning_rate": 4.408921330089213e-06, "loss": 0.6922, "step": 7070 }, { "epoch": 0.20644652710869757, "grad_norm": 0.8421244008969836, "learning_rate": 4.408759124087591e-06, "loss": 0.779, "step": 7071 }, { "epoch": 0.20647572333654493, "grad_norm": 0.8101561295835855, "learning_rate": 4.408596918085969e-06, "loss": 0.7963, "step": 7072 }, { "epoch": 0.2065049195643923, "grad_norm": 0.7141654832100978, "learning_rate": 4.408434712084347e-06, "loss": 0.6301, "step": 7073 }, { "epoch": 0.20653411579223965, "grad_norm": 0.9771165883649838, "learning_rate": 4.408272506082725e-06, "loss": 0.7013, "step": 7074 }, { "epoch": 0.20656331202008701, "grad_norm": 0.7541437649471306, "learning_rate": 4.408110300081103e-06, "loss": 0.7338, "step": 7075 }, { "epoch": 0.20659250824793438, "grad_norm": 0.7702588660106385, "learning_rate": 4.407948094079481e-06, "loss": 0.6935, "step": 7076 }, { "epoch": 0.20662170447578174, "grad_norm": 0.7413512944343527, "learning_rate": 4.407785888077859e-06, "loss": 0.6848, "step": 7077 }, { "epoch": 0.2066509007036291, "grad_norm": 0.7375281154838675, "learning_rate": 4.407623682076237e-06, "loss": 0.665, "step": 7078 }, { "epoch": 0.20668009693147646, "grad_norm": 0.7275131767551433, "learning_rate": 4.407461476074615e-06, "loss": 0.6533, "step": 7079 }, { "epoch": 0.20670929315932382, "grad_norm": 0.8004561865669889, "learning_rate": 4.407299270072993e-06, "loss": 0.654, "step": 7080 }, { "epoch": 0.20673848938717118, "grad_norm": 0.8390057930839362, "learning_rate": 4.407137064071371e-06, "loss": 0.719, "step": 7081 }, { "epoch": 0.20676768561501854, "grad_norm": 0.8728249038349888, "learning_rate": 4.406974858069749e-06, "loss": 0.7469, "step": 7082 }, { "epoch": 0.2067968818428659, "grad_norm": 0.8460868283099857, "learning_rate": 4.406812652068127e-06, "loss": 0.6618, "step": 7083 }, { "epoch": 0.20682607807071327, "grad_norm": 0.773091288435704, "learning_rate": 4.406650446066505e-06, "loss": 0.7351, "step": 7084 }, { "epoch": 0.20685527429856063, "grad_norm": 0.7463593695090752, "learning_rate": 4.4064882400648825e-06, "loss": 0.7036, "step": 7085 }, { "epoch": 0.206884470526408, "grad_norm": 0.7629185347200734, "learning_rate": 4.4063260340632605e-06, "loss": 0.7255, "step": 7086 }, { "epoch": 0.20691366675425535, "grad_norm": 1.1130013786734294, "learning_rate": 4.4061638280616385e-06, "loss": 0.705, "step": 7087 }, { "epoch": 0.2069428629821027, "grad_norm": 0.7211876973285419, "learning_rate": 4.4060016220600165e-06, "loss": 0.6279, "step": 7088 }, { "epoch": 0.20697205920995007, "grad_norm": 0.7838893239828416, "learning_rate": 4.4058394160583945e-06, "loss": 0.661, "step": 7089 }, { "epoch": 0.20700125543779743, "grad_norm": 0.7714637260826805, "learning_rate": 4.4056772100567725e-06, "loss": 0.6759, "step": 7090 }, { "epoch": 0.2070304516656448, "grad_norm": 0.7766173937947908, "learning_rate": 4.4055150040551505e-06, "loss": 0.7657, "step": 7091 }, { "epoch": 0.20705964789349215, "grad_norm": 0.773865177139439, "learning_rate": 4.4053527980535285e-06, "loss": 0.7349, "step": 7092 }, { "epoch": 0.20708884412133952, "grad_norm": 0.7405116206240913, "learning_rate": 4.4051905920519065e-06, "loss": 0.6361, "step": 7093 }, { "epoch": 0.20711804034918688, "grad_norm": 0.7529020987347463, "learning_rate": 4.405028386050284e-06, "loss": 0.6612, "step": 7094 }, { "epoch": 0.20714723657703424, "grad_norm": 0.8491280978188316, "learning_rate": 4.404866180048662e-06, "loss": 0.7257, "step": 7095 }, { "epoch": 0.2071764328048816, "grad_norm": 0.760182858864782, "learning_rate": 4.40470397404704e-06, "loss": 0.7403, "step": 7096 }, { "epoch": 0.20720562903272896, "grad_norm": 0.7710855507891328, "learning_rate": 4.404541768045418e-06, "loss": 0.7502, "step": 7097 }, { "epoch": 0.20723482526057632, "grad_norm": 0.7392171325581459, "learning_rate": 4.404379562043797e-06, "loss": 0.6875, "step": 7098 }, { "epoch": 0.20726402148842368, "grad_norm": 0.7835364567139097, "learning_rate": 4.404217356042174e-06, "loss": 0.6522, "step": 7099 }, { "epoch": 0.20729321771627104, "grad_norm": 0.862022897546423, "learning_rate": 4.404055150040552e-06, "loss": 0.6806, "step": 7100 }, { "epoch": 0.20732241394411843, "grad_norm": 0.8312563880140178, "learning_rate": 4.40389294403893e-06, "loss": 0.8045, "step": 7101 }, { "epoch": 0.2073516101719658, "grad_norm": 0.8343966874949772, "learning_rate": 4.403730738037308e-06, "loss": 0.7188, "step": 7102 }, { "epoch": 0.20738080639981316, "grad_norm": 0.7210142477760061, "learning_rate": 4.403568532035686e-06, "loss": 0.6582, "step": 7103 }, { "epoch": 0.20741000262766052, "grad_norm": 0.7074212212112931, "learning_rate": 4.403406326034064e-06, "loss": 0.6358, "step": 7104 }, { "epoch": 0.20743919885550788, "grad_norm": 0.8471375427793796, "learning_rate": 4.403244120032442e-06, "loss": 0.7539, "step": 7105 }, { "epoch": 0.20746839508335524, "grad_norm": 0.7702603521797221, "learning_rate": 4.40308191403082e-06, "loss": 0.6744, "step": 7106 }, { "epoch": 0.2074975913112026, "grad_norm": 0.7635144161576526, "learning_rate": 4.402919708029198e-06, "loss": 0.7358, "step": 7107 }, { "epoch": 0.20752678753904996, "grad_norm": 0.7791880449119022, "learning_rate": 4.402757502027575e-06, "loss": 0.6076, "step": 7108 }, { "epoch": 0.20755598376689732, "grad_norm": 0.7821666608261928, "learning_rate": 4.402595296025953e-06, "loss": 0.7502, "step": 7109 }, { "epoch": 0.20758517999474468, "grad_norm": 0.8273786693680061, "learning_rate": 4.402433090024331e-06, "loss": 0.6711, "step": 7110 }, { "epoch": 0.20761437622259205, "grad_norm": 0.7547990355449445, "learning_rate": 4.402270884022709e-06, "loss": 0.7146, "step": 7111 }, { "epoch": 0.2076435724504394, "grad_norm": 0.8284635323601549, "learning_rate": 4.402108678021087e-06, "loss": 0.6556, "step": 7112 }, { "epoch": 0.20767276867828677, "grad_norm": 0.7049437118591834, "learning_rate": 4.401946472019465e-06, "loss": 0.6381, "step": 7113 }, { "epoch": 0.20770196490613413, "grad_norm": 0.7257858388070834, "learning_rate": 4.401784266017843e-06, "loss": 0.6703, "step": 7114 }, { "epoch": 0.2077311611339815, "grad_norm": 0.7173457800833279, "learning_rate": 4.401622060016221e-06, "loss": 0.6656, "step": 7115 }, { "epoch": 0.20776035736182885, "grad_norm": 0.7452456637470791, "learning_rate": 4.401459854014599e-06, "loss": 0.6886, "step": 7116 }, { "epoch": 0.2077895535896762, "grad_norm": 0.7213301676610345, "learning_rate": 4.401297648012977e-06, "loss": 0.6141, "step": 7117 }, { "epoch": 0.20781874981752357, "grad_norm": 0.7226481352815131, "learning_rate": 4.401135442011355e-06, "loss": 0.7076, "step": 7118 }, { "epoch": 0.20784794604537093, "grad_norm": 0.8148737525495364, "learning_rate": 4.400973236009733e-06, "loss": 0.6833, "step": 7119 }, { "epoch": 0.2078771422732183, "grad_norm": 0.7616727717725319, "learning_rate": 4.400811030008111e-06, "loss": 0.6836, "step": 7120 }, { "epoch": 0.20790633850106566, "grad_norm": 0.8317387970322162, "learning_rate": 4.400648824006489e-06, "loss": 0.7302, "step": 7121 }, { "epoch": 0.20793553472891302, "grad_norm": 0.666522214400867, "learning_rate": 4.400486618004867e-06, "loss": 0.5917, "step": 7122 }, { "epoch": 0.20796473095676038, "grad_norm": 0.7686558207847992, "learning_rate": 4.400324412003244e-06, "loss": 0.7466, "step": 7123 }, { "epoch": 0.20799392718460774, "grad_norm": 0.8577242668638598, "learning_rate": 4.400162206001622e-06, "loss": 0.7349, "step": 7124 }, { "epoch": 0.2080231234124551, "grad_norm": 0.7394394326162945, "learning_rate": 4.4e-06, "loss": 0.6744, "step": 7125 }, { "epoch": 0.20805231964030246, "grad_norm": 0.9100775575451597, "learning_rate": 4.399837793998378e-06, "loss": 0.884, "step": 7126 }, { "epoch": 0.20808151586814982, "grad_norm": 0.8421043942982039, "learning_rate": 4.399675587996756e-06, "loss": 0.7617, "step": 7127 }, { "epoch": 0.20811071209599719, "grad_norm": 0.7266467462207067, "learning_rate": 4.399513381995134e-06, "loss": 0.6405, "step": 7128 }, { "epoch": 0.20813990832384455, "grad_norm": 0.7786396416183051, "learning_rate": 4.399351175993512e-06, "loss": 0.7093, "step": 7129 }, { "epoch": 0.2081691045516919, "grad_norm": 0.9658640452512429, "learning_rate": 4.39918896999189e-06, "loss": 0.7329, "step": 7130 }, { "epoch": 0.2081983007795393, "grad_norm": 0.7994044686528186, "learning_rate": 4.399026763990268e-06, "loss": 0.7287, "step": 7131 }, { "epoch": 0.20822749700738666, "grad_norm": 0.7281368934369481, "learning_rate": 4.3988645579886454e-06, "loss": 0.6869, "step": 7132 }, { "epoch": 0.20825669323523402, "grad_norm": 0.7085679403644133, "learning_rate": 4.3987023519870234e-06, "loss": 0.6419, "step": 7133 }, { "epoch": 0.20828588946308138, "grad_norm": 0.7439221031599984, "learning_rate": 4.3985401459854014e-06, "loss": 0.6316, "step": 7134 }, { "epoch": 0.20831508569092874, "grad_norm": 0.7424059271109928, "learning_rate": 4.3983779399837794e-06, "loss": 0.7049, "step": 7135 }, { "epoch": 0.2083442819187761, "grad_norm": 0.7670982930476247, "learning_rate": 4.398215733982158e-06, "loss": 0.7205, "step": 7136 }, { "epoch": 0.20837347814662346, "grad_norm": 0.7453203565107653, "learning_rate": 4.3980535279805355e-06, "loss": 0.664, "step": 7137 }, { "epoch": 0.20840267437447083, "grad_norm": 0.7415151776352205, "learning_rate": 4.3978913219789135e-06, "loss": 0.6654, "step": 7138 }, { "epoch": 0.2084318706023182, "grad_norm": 0.7410056202301166, "learning_rate": 4.3977291159772915e-06, "loss": 0.6217, "step": 7139 }, { "epoch": 0.20846106683016555, "grad_norm": 0.7283143007958282, "learning_rate": 4.3975669099756695e-06, "loss": 0.6367, "step": 7140 }, { "epoch": 0.2084902630580129, "grad_norm": 0.8231366124926062, "learning_rate": 4.3974047039740475e-06, "loss": 0.7509, "step": 7141 }, { "epoch": 0.20851945928586027, "grad_norm": 0.7755632058261637, "learning_rate": 4.3972424979724255e-06, "loss": 0.6831, "step": 7142 }, { "epoch": 0.20854865551370763, "grad_norm": 0.7761216103529652, "learning_rate": 4.3970802919708035e-06, "loss": 0.7305, "step": 7143 }, { "epoch": 0.208577851741555, "grad_norm": 0.7737589825328223, "learning_rate": 4.3969180859691815e-06, "loss": 0.7153, "step": 7144 }, { "epoch": 0.20860704796940235, "grad_norm": 0.9042532339320395, "learning_rate": 4.3967558799675595e-06, "loss": 0.642, "step": 7145 }, { "epoch": 0.20863624419724972, "grad_norm": 0.7222427047264425, "learning_rate": 4.396593673965937e-06, "loss": 0.665, "step": 7146 }, { "epoch": 0.20866544042509708, "grad_norm": 0.8258256535661023, "learning_rate": 4.396431467964315e-06, "loss": 0.6879, "step": 7147 }, { "epoch": 0.20869463665294444, "grad_norm": 0.7143131304955435, "learning_rate": 4.396269261962693e-06, "loss": 0.6027, "step": 7148 }, { "epoch": 0.2087238328807918, "grad_norm": 0.7177093837279, "learning_rate": 4.396107055961071e-06, "loss": 0.6387, "step": 7149 }, { "epoch": 0.20875302910863916, "grad_norm": 0.9380580608426617, "learning_rate": 4.395944849959449e-06, "loss": 0.7225, "step": 7150 }, { "epoch": 0.20878222533648652, "grad_norm": 0.8900890958256069, "learning_rate": 4.395782643957827e-06, "loss": 0.7455, "step": 7151 }, { "epoch": 0.20881142156433388, "grad_norm": 0.7443955205389764, "learning_rate": 4.395620437956205e-06, "loss": 0.6847, "step": 7152 }, { "epoch": 0.20884061779218124, "grad_norm": 0.9941217864027635, "learning_rate": 4.395458231954583e-06, "loss": 0.6414, "step": 7153 }, { "epoch": 0.2088698140200286, "grad_norm": 0.9209038831089319, "learning_rate": 4.395296025952961e-06, "loss": 0.7419, "step": 7154 }, { "epoch": 0.20889901024787597, "grad_norm": 0.7985205890080374, "learning_rate": 4.395133819951339e-06, "loss": 0.6651, "step": 7155 }, { "epoch": 0.20892820647572333, "grad_norm": 0.6768770051247796, "learning_rate": 4.394971613949717e-06, "loss": 0.6055, "step": 7156 }, { "epoch": 0.2089574027035707, "grad_norm": 0.7705953859229283, "learning_rate": 4.394809407948095e-06, "loss": 0.6928, "step": 7157 }, { "epoch": 0.20898659893141805, "grad_norm": 0.6958371786346969, "learning_rate": 4.394647201946473e-06, "loss": 0.6265, "step": 7158 }, { "epoch": 0.2090157951592654, "grad_norm": 0.7559573180287782, "learning_rate": 4.394484995944851e-06, "loss": 0.6576, "step": 7159 }, { "epoch": 0.20904499138711277, "grad_norm": 0.7191935334015483, "learning_rate": 4.394322789943229e-06, "loss": 0.6451, "step": 7160 }, { "epoch": 0.20907418761496016, "grad_norm": 0.7598392070732127, "learning_rate": 4.394160583941606e-06, "loss": 0.7545, "step": 7161 }, { "epoch": 0.20910338384280752, "grad_norm": 0.9515066316854134, "learning_rate": 4.393998377939984e-06, "loss": 0.7283, "step": 7162 }, { "epoch": 0.20913258007065488, "grad_norm": 0.7883614584211011, "learning_rate": 4.393836171938362e-06, "loss": 0.682, "step": 7163 }, { "epoch": 0.20916177629850224, "grad_norm": 0.7372971307658209, "learning_rate": 4.39367396593674e-06, "loss": 0.6279, "step": 7164 }, { "epoch": 0.2091909725263496, "grad_norm": 0.7109191335234442, "learning_rate": 4.393511759935118e-06, "loss": 0.625, "step": 7165 }, { "epoch": 0.20922016875419697, "grad_norm": 0.733052044871685, "learning_rate": 4.393349553933496e-06, "loss": 0.6927, "step": 7166 }, { "epoch": 0.20924936498204433, "grad_norm": 0.6864353050411809, "learning_rate": 4.393187347931874e-06, "loss": 0.5943, "step": 7167 }, { "epoch": 0.2092785612098917, "grad_norm": 0.8234626932275009, "learning_rate": 4.393025141930252e-06, "loss": 0.6729, "step": 7168 }, { "epoch": 0.20930775743773905, "grad_norm": 0.7537950331177955, "learning_rate": 4.392862935928629e-06, "loss": 0.6675, "step": 7169 }, { "epoch": 0.2093369536655864, "grad_norm": 0.7927335121204074, "learning_rate": 4.392700729927007e-06, "loss": 0.6878, "step": 7170 }, { "epoch": 0.20936614989343377, "grad_norm": 0.8246251367505985, "learning_rate": 4.392538523925385e-06, "loss": 0.7621, "step": 7171 }, { "epoch": 0.20939534612128113, "grad_norm": 0.7300830883048632, "learning_rate": 4.392376317923763e-06, "loss": 0.6814, "step": 7172 }, { "epoch": 0.2094245423491285, "grad_norm": 0.826634472254257, "learning_rate": 4.392214111922142e-06, "loss": 0.7993, "step": 7173 }, { "epoch": 0.20945373857697586, "grad_norm": 0.7981673903598632, "learning_rate": 4.39205190592052e-06, "loss": 0.7993, "step": 7174 }, { "epoch": 0.20948293480482322, "grad_norm": 0.7201807513048416, "learning_rate": 4.391889699918897e-06, "loss": 0.6629, "step": 7175 }, { "epoch": 0.20951213103267058, "grad_norm": 0.6911731101173318, "learning_rate": 4.391727493917275e-06, "loss": 0.6324, "step": 7176 }, { "epoch": 0.20954132726051794, "grad_norm": 0.7343027740845546, "learning_rate": 4.391565287915653e-06, "loss": 0.6673, "step": 7177 }, { "epoch": 0.2095705234883653, "grad_norm": 0.7254592123030642, "learning_rate": 4.391403081914031e-06, "loss": 0.6329, "step": 7178 }, { "epoch": 0.20959971971621266, "grad_norm": 0.7253911806879823, "learning_rate": 4.391240875912409e-06, "loss": 0.6344, "step": 7179 }, { "epoch": 0.20962891594406002, "grad_norm": 0.6839569647832644, "learning_rate": 4.391078669910787e-06, "loss": 0.6019, "step": 7180 }, { "epoch": 0.20965811217190738, "grad_norm": 0.7333302995957098, "learning_rate": 4.390916463909165e-06, "loss": 0.6405, "step": 7181 }, { "epoch": 0.20968730839975475, "grad_norm": 0.7592189095663181, "learning_rate": 4.390754257907543e-06, "loss": 0.6791, "step": 7182 }, { "epoch": 0.2097165046276021, "grad_norm": 0.6909155592534852, "learning_rate": 4.390592051905921e-06, "loss": 0.6275, "step": 7183 }, { "epoch": 0.20974570085544947, "grad_norm": 0.7538886645411907, "learning_rate": 4.390429845904298e-06, "loss": 0.7247, "step": 7184 }, { "epoch": 0.20977489708329683, "grad_norm": 0.7697568474014468, "learning_rate": 4.390267639902676e-06, "loss": 0.6004, "step": 7185 }, { "epoch": 0.2098040933111442, "grad_norm": 0.6995444592053083, "learning_rate": 4.390105433901054e-06, "loss": 0.6799, "step": 7186 }, { "epoch": 0.20983328953899155, "grad_norm": 0.7354540797377497, "learning_rate": 4.389943227899432e-06, "loss": 0.6727, "step": 7187 }, { "epoch": 0.2098624857668389, "grad_norm": 0.7274094242576535, "learning_rate": 4.38978102189781e-06, "loss": 0.6228, "step": 7188 }, { "epoch": 0.20989168199468627, "grad_norm": 0.7982907268040651, "learning_rate": 4.3896188158961884e-06, "loss": 0.7651, "step": 7189 }, { "epoch": 0.20992087822253364, "grad_norm": 0.8870422523987129, "learning_rate": 4.3894566098945664e-06, "loss": 0.6789, "step": 7190 }, { "epoch": 0.20995007445038102, "grad_norm": 0.73280541558878, "learning_rate": 4.3892944038929444e-06, "loss": 0.6828, "step": 7191 }, { "epoch": 0.20997927067822839, "grad_norm": 0.7283490825228406, "learning_rate": 4.3891321978913224e-06, "loss": 0.6937, "step": 7192 }, { "epoch": 0.21000846690607575, "grad_norm": 0.7198151089840675, "learning_rate": 4.3889699918897005e-06, "loss": 0.656, "step": 7193 }, { "epoch": 0.2100376631339231, "grad_norm": 0.7714643327348456, "learning_rate": 4.3888077858880785e-06, "loss": 0.6761, "step": 7194 }, { "epoch": 0.21006685936177047, "grad_norm": 0.7897472994773467, "learning_rate": 4.3886455798864565e-06, "loss": 0.7572, "step": 7195 }, { "epoch": 0.21009605558961783, "grad_norm": 0.8612550152824482, "learning_rate": 4.3884833738848345e-06, "loss": 0.6884, "step": 7196 }, { "epoch": 0.2101252518174652, "grad_norm": 0.814537493067428, "learning_rate": 4.3883211678832125e-06, "loss": 0.7254, "step": 7197 }, { "epoch": 0.21015444804531255, "grad_norm": 0.6967102861559351, "learning_rate": 4.3881589618815905e-06, "loss": 0.6273, "step": 7198 }, { "epoch": 0.21018364427315991, "grad_norm": 0.6585464385503408, "learning_rate": 4.387996755879968e-06, "loss": 0.5636, "step": 7199 }, { "epoch": 0.21021284050100728, "grad_norm": 0.7412755164749082, "learning_rate": 4.387834549878346e-06, "loss": 0.6064, "step": 7200 }, { "epoch": 0.21024203672885464, "grad_norm": 0.8040999602625217, "learning_rate": 4.387672343876724e-06, "loss": 0.6516, "step": 7201 }, { "epoch": 0.210271232956702, "grad_norm": 0.8160967732442642, "learning_rate": 4.387510137875102e-06, "loss": 0.7633, "step": 7202 }, { "epoch": 0.21030042918454936, "grad_norm": 0.8196100575841211, "learning_rate": 4.38734793187348e-06, "loss": 0.7558, "step": 7203 }, { "epoch": 0.21032962541239672, "grad_norm": 0.7517666468931339, "learning_rate": 4.387185725871858e-06, "loss": 0.6513, "step": 7204 }, { "epoch": 0.21035882164024408, "grad_norm": 0.7943326455763169, "learning_rate": 4.387023519870236e-06, "loss": 0.7149, "step": 7205 }, { "epoch": 0.21038801786809144, "grad_norm": 0.7864717171749483, "learning_rate": 4.386861313868614e-06, "loss": 0.7782, "step": 7206 }, { "epoch": 0.2104172140959388, "grad_norm": 0.7982460920300041, "learning_rate": 4.386699107866991e-06, "loss": 0.6864, "step": 7207 }, { "epoch": 0.21044641032378616, "grad_norm": 0.7705116060940146, "learning_rate": 4.386536901865369e-06, "loss": 0.6906, "step": 7208 }, { "epoch": 0.21047560655163353, "grad_norm": 0.7860667536479429, "learning_rate": 4.386374695863747e-06, "loss": 0.6798, "step": 7209 }, { "epoch": 0.2105048027794809, "grad_norm": 0.7579872873458712, "learning_rate": 4.386212489862125e-06, "loss": 0.6738, "step": 7210 }, { "epoch": 0.21053399900732825, "grad_norm": 0.7478761628010561, "learning_rate": 4.386050283860504e-06, "loss": 0.6613, "step": 7211 }, { "epoch": 0.2105631952351756, "grad_norm": 0.709204578603635, "learning_rate": 4.385888077858882e-06, "loss": 0.6547, "step": 7212 }, { "epoch": 0.21059239146302297, "grad_norm": 0.6950249645750899, "learning_rate": 4.385725871857259e-06, "loss": 0.6558, "step": 7213 }, { "epoch": 0.21062158769087033, "grad_norm": 0.7398282292339379, "learning_rate": 4.385563665855637e-06, "loss": 0.6531, "step": 7214 }, { "epoch": 0.2106507839187177, "grad_norm": 0.8908895736743854, "learning_rate": 4.385401459854015e-06, "loss": 0.707, "step": 7215 }, { "epoch": 0.21067998014656505, "grad_norm": 0.8168353738610918, "learning_rate": 4.385239253852393e-06, "loss": 0.712, "step": 7216 }, { "epoch": 0.21070917637441242, "grad_norm": 0.7589382490708697, "learning_rate": 4.385077047850771e-06, "loss": 0.6819, "step": 7217 }, { "epoch": 0.21073837260225978, "grad_norm": 0.7342555092953896, "learning_rate": 4.384914841849149e-06, "loss": 0.676, "step": 7218 }, { "epoch": 0.21076756883010714, "grad_norm": 0.7951843234446648, "learning_rate": 4.384752635847527e-06, "loss": 0.7603, "step": 7219 }, { "epoch": 0.2107967650579545, "grad_norm": 0.7118148907879261, "learning_rate": 4.384590429845905e-06, "loss": 0.6128, "step": 7220 }, { "epoch": 0.21082596128580186, "grad_norm": 0.7549835115650699, "learning_rate": 4.384428223844283e-06, "loss": 0.6853, "step": 7221 }, { "epoch": 0.21085515751364925, "grad_norm": 0.7347317829889766, "learning_rate": 4.38426601784266e-06, "loss": 0.5766, "step": 7222 }, { "epoch": 0.2108843537414966, "grad_norm": 0.7262710119975487, "learning_rate": 4.384103811841038e-06, "loss": 0.6617, "step": 7223 }, { "epoch": 0.21091354996934397, "grad_norm": 0.7110395393573972, "learning_rate": 4.383941605839416e-06, "loss": 0.6508, "step": 7224 }, { "epoch": 0.21094274619719133, "grad_norm": 0.7242211316626407, "learning_rate": 4.383779399837794e-06, "loss": 0.6208, "step": 7225 }, { "epoch": 0.2109719424250387, "grad_norm": 0.7460234094308956, "learning_rate": 4.383617193836172e-06, "loss": 0.6588, "step": 7226 }, { "epoch": 0.21100113865288606, "grad_norm": 0.7377963091223377, "learning_rate": 4.38345498783455e-06, "loss": 0.6479, "step": 7227 }, { "epoch": 0.21103033488073342, "grad_norm": 0.7345687785829653, "learning_rate": 4.383292781832928e-06, "loss": 0.5673, "step": 7228 }, { "epoch": 0.21105953110858078, "grad_norm": 0.8695495214059373, "learning_rate": 4.383130575831306e-06, "loss": 0.7051, "step": 7229 }, { "epoch": 0.21108872733642814, "grad_norm": 0.7202328933947042, "learning_rate": 4.382968369829684e-06, "loss": 0.6291, "step": 7230 }, { "epoch": 0.2111179235642755, "grad_norm": 0.7327034121387155, "learning_rate": 4.382806163828062e-06, "loss": 0.6757, "step": 7231 }, { "epoch": 0.21114711979212286, "grad_norm": 0.738817051245373, "learning_rate": 4.38264395782644e-06, "loss": 0.614, "step": 7232 }, { "epoch": 0.21117631601997022, "grad_norm": 0.7794000810442261, "learning_rate": 4.382481751824818e-06, "loss": 0.6601, "step": 7233 }, { "epoch": 0.21120551224781758, "grad_norm": 0.7499363469637523, "learning_rate": 4.382319545823196e-06, "loss": 0.6691, "step": 7234 }, { "epoch": 0.21123470847566495, "grad_norm": 0.7088910065016396, "learning_rate": 4.382157339821574e-06, "loss": 0.5741, "step": 7235 }, { "epoch": 0.2112639047035123, "grad_norm": 0.7110735046508014, "learning_rate": 4.381995133819952e-06, "loss": 0.6168, "step": 7236 }, { "epoch": 0.21129310093135967, "grad_norm": 0.830741519158342, "learning_rate": 4.381832927818329e-06, "loss": 0.7079, "step": 7237 }, { "epoch": 0.21132229715920703, "grad_norm": 0.952186092545103, "learning_rate": 4.381670721816707e-06, "loss": 0.6534, "step": 7238 }, { "epoch": 0.2113514933870544, "grad_norm": 0.8445746484311244, "learning_rate": 4.381508515815085e-06, "loss": 0.7521, "step": 7239 }, { "epoch": 0.21138068961490175, "grad_norm": 0.7422790525794164, "learning_rate": 4.381346309813463e-06, "loss": 0.7254, "step": 7240 }, { "epoch": 0.2114098858427491, "grad_norm": 0.7368596647574804, "learning_rate": 4.381184103811841e-06, "loss": 0.6918, "step": 7241 }, { "epoch": 0.21143908207059647, "grad_norm": 0.7023501090174677, "learning_rate": 4.381021897810219e-06, "loss": 0.6501, "step": 7242 }, { "epoch": 0.21146827829844383, "grad_norm": 0.7143865929077639, "learning_rate": 4.380859691808597e-06, "loss": 0.6236, "step": 7243 }, { "epoch": 0.2114974745262912, "grad_norm": 0.8188791840629586, "learning_rate": 4.380697485806975e-06, "loss": 0.6922, "step": 7244 }, { "epoch": 0.21152667075413856, "grad_norm": 0.7502452784826571, "learning_rate": 4.3805352798053526e-06, "loss": 0.688, "step": 7245 }, { "epoch": 0.21155586698198592, "grad_norm": 0.7635429598566549, "learning_rate": 4.380373073803731e-06, "loss": 0.674, "step": 7246 }, { "epoch": 0.21158506320983328, "grad_norm": 0.6681796867669123, "learning_rate": 4.380210867802109e-06, "loss": 0.5424, "step": 7247 }, { "epoch": 0.21161425943768064, "grad_norm": 0.8247418604304643, "learning_rate": 4.380048661800487e-06, "loss": 0.8338, "step": 7248 }, { "epoch": 0.211643455665528, "grad_norm": 0.8130583724036137, "learning_rate": 4.3798864557988654e-06, "loss": 0.7202, "step": 7249 }, { "epoch": 0.21167265189337536, "grad_norm": 0.7227983857224313, "learning_rate": 4.3797242497972435e-06, "loss": 0.6914, "step": 7250 }, { "epoch": 0.21170184812122272, "grad_norm": 0.727072123700154, "learning_rate": 4.379562043795621e-06, "loss": 0.6935, "step": 7251 }, { "epoch": 0.2117310443490701, "grad_norm": 0.7692204751792727, "learning_rate": 4.379399837793999e-06, "loss": 0.7669, "step": 7252 }, { "epoch": 0.21176024057691747, "grad_norm": 0.7861365078645519, "learning_rate": 4.379237631792377e-06, "loss": 0.7004, "step": 7253 }, { "epoch": 0.21178943680476484, "grad_norm": 0.7275197883563058, "learning_rate": 4.379075425790755e-06, "loss": 0.6172, "step": 7254 }, { "epoch": 0.2118186330326122, "grad_norm": 0.7811834682570398, "learning_rate": 4.378913219789133e-06, "loss": 0.7763, "step": 7255 }, { "epoch": 0.21184782926045956, "grad_norm": 0.7202218829378367, "learning_rate": 4.378751013787511e-06, "loss": 0.6428, "step": 7256 }, { "epoch": 0.21187702548830692, "grad_norm": 0.8500027694809521, "learning_rate": 4.378588807785889e-06, "loss": 0.7749, "step": 7257 }, { "epoch": 0.21190622171615428, "grad_norm": 0.9252649178118989, "learning_rate": 4.378426601784267e-06, "loss": 0.7284, "step": 7258 }, { "epoch": 0.21193541794400164, "grad_norm": 0.7697891098429156, "learning_rate": 4.378264395782645e-06, "loss": 0.7041, "step": 7259 }, { "epoch": 0.211964614171849, "grad_norm": 0.71517761236477, "learning_rate": 4.378102189781022e-06, "loss": 0.6419, "step": 7260 }, { "epoch": 0.21199381039969636, "grad_norm": 0.7545384219988013, "learning_rate": 4.3779399837794e-06, "loss": 0.7029, "step": 7261 }, { "epoch": 0.21202300662754373, "grad_norm": 0.7213923857879078, "learning_rate": 4.377777777777778e-06, "loss": 0.6911, "step": 7262 }, { "epoch": 0.2120522028553911, "grad_norm": 0.7137947670671989, "learning_rate": 4.377615571776156e-06, "loss": 0.6649, "step": 7263 }, { "epoch": 0.21208139908323845, "grad_norm": 0.7415714329148199, "learning_rate": 4.377453365774534e-06, "loss": 0.6834, "step": 7264 }, { "epoch": 0.2121105953110858, "grad_norm": 0.6986778615843364, "learning_rate": 4.377291159772912e-06, "loss": 0.6502, "step": 7265 }, { "epoch": 0.21213979153893317, "grad_norm": 0.7667419388032984, "learning_rate": 4.37712895377129e-06, "loss": 0.7159, "step": 7266 }, { "epoch": 0.21216898776678053, "grad_norm": 0.7137340587294464, "learning_rate": 4.376966747769668e-06, "loss": 0.6986, "step": 7267 }, { "epoch": 0.2121981839946279, "grad_norm": 0.7921886470540567, "learning_rate": 4.376804541768046e-06, "loss": 0.6863, "step": 7268 }, { "epoch": 0.21222738022247525, "grad_norm": 0.7174062909102399, "learning_rate": 4.376642335766424e-06, "loss": 0.6205, "step": 7269 }, { "epoch": 0.21225657645032261, "grad_norm": 0.715538715211325, "learning_rate": 4.376480129764802e-06, "loss": 0.5739, "step": 7270 }, { "epoch": 0.21228577267816998, "grad_norm": 0.6888924408862419, "learning_rate": 4.37631792376318e-06, "loss": 0.6146, "step": 7271 }, { "epoch": 0.21231496890601734, "grad_norm": 0.729818239339563, "learning_rate": 4.376155717761558e-06, "loss": 0.6166, "step": 7272 }, { "epoch": 0.2123441651338647, "grad_norm": 0.7762231830315847, "learning_rate": 4.375993511759936e-06, "loss": 0.6597, "step": 7273 }, { "epoch": 0.21237336136171206, "grad_norm": 0.8066966768447763, "learning_rate": 4.375831305758314e-06, "loss": 0.7617, "step": 7274 }, { "epoch": 0.21240255758955942, "grad_norm": 0.7525930930835592, "learning_rate": 4.375669099756691e-06, "loss": 0.6869, "step": 7275 }, { "epoch": 0.21243175381740678, "grad_norm": 0.7257621279351713, "learning_rate": 4.375506893755069e-06, "loss": 0.6665, "step": 7276 }, { "epoch": 0.21246095004525414, "grad_norm": 0.7522376374385461, "learning_rate": 4.375344687753447e-06, "loss": 0.6964, "step": 7277 }, { "epoch": 0.2124901462731015, "grad_norm": 0.7013328761793824, "learning_rate": 4.375182481751825e-06, "loss": 0.6812, "step": 7278 }, { "epoch": 0.21251934250094887, "grad_norm": 0.8293998219445828, "learning_rate": 4.375020275750203e-06, "loss": 0.8428, "step": 7279 }, { "epoch": 0.21254853872879623, "grad_norm": 0.7681798420797178, "learning_rate": 4.374858069748581e-06, "loss": 0.6864, "step": 7280 }, { "epoch": 0.2125777349566436, "grad_norm": 0.6980633701656259, "learning_rate": 4.374695863746959e-06, "loss": 0.6175, "step": 7281 }, { "epoch": 0.21260693118449098, "grad_norm": 0.9281670140074207, "learning_rate": 4.374533657745337e-06, "loss": 0.7726, "step": 7282 }, { "epoch": 0.21263612741233834, "grad_norm": 1.0086320576341095, "learning_rate": 4.374371451743714e-06, "loss": 0.6775, "step": 7283 }, { "epoch": 0.2126653236401857, "grad_norm": 0.7680243176684842, "learning_rate": 4.374209245742092e-06, "loss": 0.6967, "step": 7284 }, { "epoch": 0.21269451986803306, "grad_norm": 0.8023248449559933, "learning_rate": 4.37404703974047e-06, "loss": 0.7961, "step": 7285 }, { "epoch": 0.21272371609588042, "grad_norm": 0.7768888642636171, "learning_rate": 4.373884833738848e-06, "loss": 0.714, "step": 7286 }, { "epoch": 0.21275291232372778, "grad_norm": 0.7147278097365891, "learning_rate": 4.373722627737227e-06, "loss": 0.6342, "step": 7287 }, { "epoch": 0.21278210855157514, "grad_norm": 0.7616502386156652, "learning_rate": 4.373560421735605e-06, "loss": 0.6808, "step": 7288 }, { "epoch": 0.2128113047794225, "grad_norm": 0.7716630797841239, "learning_rate": 4.373398215733982e-06, "loss": 0.7624, "step": 7289 }, { "epoch": 0.21284050100726987, "grad_norm": 0.7403624352145475, "learning_rate": 4.37323600973236e-06, "loss": 0.5821, "step": 7290 }, { "epoch": 0.21286969723511723, "grad_norm": 0.7512204786249558, "learning_rate": 4.373073803730738e-06, "loss": 0.7053, "step": 7291 }, { "epoch": 0.2128988934629646, "grad_norm": 0.7418547497768639, "learning_rate": 4.372911597729116e-06, "loss": 0.7352, "step": 7292 }, { "epoch": 0.21292808969081195, "grad_norm": 0.8701383698056026, "learning_rate": 4.372749391727494e-06, "loss": 0.7273, "step": 7293 }, { "epoch": 0.2129572859186593, "grad_norm": 0.7802929620090735, "learning_rate": 4.372587185725872e-06, "loss": 0.6733, "step": 7294 }, { "epoch": 0.21298648214650667, "grad_norm": 0.826195194632774, "learning_rate": 4.37242497972425e-06, "loss": 0.7288, "step": 7295 }, { "epoch": 0.21301567837435403, "grad_norm": 0.7782236140455012, "learning_rate": 4.372262773722628e-06, "loss": 0.7117, "step": 7296 }, { "epoch": 0.2130448746022014, "grad_norm": 0.7464244573323469, "learning_rate": 4.372100567721006e-06, "loss": 0.6888, "step": 7297 }, { "epoch": 0.21307407083004876, "grad_norm": 0.7583122903549695, "learning_rate": 4.3719383617193835e-06, "loss": 0.7096, "step": 7298 }, { "epoch": 0.21310326705789612, "grad_norm": 0.8439578828937447, "learning_rate": 4.3717761557177615e-06, "loss": 0.7114, "step": 7299 }, { "epoch": 0.21313246328574348, "grad_norm": 0.7691045698526334, "learning_rate": 4.3716139497161396e-06, "loss": 0.7208, "step": 7300 }, { "epoch": 0.21316165951359084, "grad_norm": 0.7167696878262042, "learning_rate": 4.3714517437145176e-06, "loss": 0.6771, "step": 7301 }, { "epoch": 0.2131908557414382, "grad_norm": 0.7553391224410866, "learning_rate": 4.3712895377128956e-06, "loss": 0.7002, "step": 7302 }, { "epoch": 0.21322005196928556, "grad_norm": 0.7140495805213759, "learning_rate": 4.3711273317112736e-06, "loss": 0.6004, "step": 7303 }, { "epoch": 0.21324924819713292, "grad_norm": 0.7634449671160496, "learning_rate": 4.370965125709652e-06, "loss": 0.6771, "step": 7304 }, { "epoch": 0.21327844442498028, "grad_norm": 0.7643761899380757, "learning_rate": 4.37080291970803e-06, "loss": 0.7496, "step": 7305 }, { "epoch": 0.21330764065282765, "grad_norm": 0.7350142609771642, "learning_rate": 4.370640713706408e-06, "loss": 0.6763, "step": 7306 }, { "epoch": 0.213336836880675, "grad_norm": 0.7337593358358755, "learning_rate": 4.370478507704786e-06, "loss": 0.6605, "step": 7307 }, { "epoch": 0.21336603310852237, "grad_norm": 0.8006171246441338, "learning_rate": 4.370316301703164e-06, "loss": 0.7883, "step": 7308 }, { "epoch": 0.21339522933636973, "grad_norm": 0.7719526084258768, "learning_rate": 4.370154095701542e-06, "loss": 0.679, "step": 7309 }, { "epoch": 0.2134244255642171, "grad_norm": 0.790727105316937, "learning_rate": 4.36999188969992e-06, "loss": 0.6851, "step": 7310 }, { "epoch": 0.21345362179206445, "grad_norm": 0.6967899046348633, "learning_rate": 4.369829683698298e-06, "loss": 0.6388, "step": 7311 }, { "epoch": 0.21348281801991184, "grad_norm": 0.7578199297702916, "learning_rate": 4.369667477696675e-06, "loss": 0.73, "step": 7312 }, { "epoch": 0.2135120142477592, "grad_norm": 0.6954567764630812, "learning_rate": 4.369505271695053e-06, "loss": 0.6081, "step": 7313 }, { "epoch": 0.21354121047560656, "grad_norm": 0.7662292288707055, "learning_rate": 4.369343065693431e-06, "loss": 0.6491, "step": 7314 }, { "epoch": 0.21357040670345392, "grad_norm": 0.7255737433061462, "learning_rate": 4.369180859691809e-06, "loss": 0.6404, "step": 7315 }, { "epoch": 0.21359960293130129, "grad_norm": 0.7636879306319174, "learning_rate": 4.369018653690187e-06, "loss": 0.703, "step": 7316 }, { "epoch": 0.21362879915914865, "grad_norm": 0.8130476731446381, "learning_rate": 4.368856447688565e-06, "loss": 0.716, "step": 7317 }, { "epoch": 0.213657995386996, "grad_norm": 0.7503659305794566, "learning_rate": 4.368694241686943e-06, "loss": 0.7137, "step": 7318 }, { "epoch": 0.21368719161484337, "grad_norm": 0.7444450057861872, "learning_rate": 4.368532035685321e-06, "loss": 0.6329, "step": 7319 }, { "epoch": 0.21371638784269073, "grad_norm": 0.7213486652391645, "learning_rate": 4.368369829683699e-06, "loss": 0.6097, "step": 7320 }, { "epoch": 0.2137455840705381, "grad_norm": 0.6939200657273265, "learning_rate": 4.368207623682076e-06, "loss": 0.593, "step": 7321 }, { "epoch": 0.21377478029838545, "grad_norm": 0.7293343507220151, "learning_rate": 4.368045417680454e-06, "loss": 0.6442, "step": 7322 }, { "epoch": 0.21380397652623281, "grad_norm": 0.7567087439840859, "learning_rate": 4.367883211678832e-06, "loss": 0.6565, "step": 7323 }, { "epoch": 0.21383317275408018, "grad_norm": 0.7570122010181863, "learning_rate": 4.367721005677211e-06, "loss": 0.6719, "step": 7324 }, { "epoch": 0.21386236898192754, "grad_norm": 0.7042800357201827, "learning_rate": 4.367558799675589e-06, "loss": 0.6439, "step": 7325 }, { "epoch": 0.2138915652097749, "grad_norm": 1.1316759284865627, "learning_rate": 4.367396593673967e-06, "loss": 0.6978, "step": 7326 }, { "epoch": 0.21392076143762226, "grad_norm": 0.7839036332002803, "learning_rate": 4.367234387672344e-06, "loss": 0.7648, "step": 7327 }, { "epoch": 0.21394995766546962, "grad_norm": 0.7998686488436656, "learning_rate": 4.367072181670722e-06, "loss": 0.7585, "step": 7328 }, { "epoch": 0.21397915389331698, "grad_norm": 0.778792245985164, "learning_rate": 4.3669099756691e-06, "loss": 0.676, "step": 7329 }, { "epoch": 0.21400835012116434, "grad_norm": 0.7066160198123492, "learning_rate": 4.366747769667478e-06, "loss": 0.617, "step": 7330 }, { "epoch": 0.2140375463490117, "grad_norm": 0.7881134372044435, "learning_rate": 4.366585563665856e-06, "loss": 0.6947, "step": 7331 }, { "epoch": 0.21406674257685906, "grad_norm": 0.7368266526841366, "learning_rate": 4.366423357664234e-06, "loss": 0.6702, "step": 7332 }, { "epoch": 0.21409593880470643, "grad_norm": 0.7845379924959929, "learning_rate": 4.366261151662612e-06, "loss": 0.6317, "step": 7333 }, { "epoch": 0.2141251350325538, "grad_norm": 0.6788283470044693, "learning_rate": 4.36609894566099e-06, "loss": 0.6126, "step": 7334 }, { "epoch": 0.21415433126040115, "grad_norm": 0.70326603795317, "learning_rate": 4.365936739659368e-06, "loss": 0.6245, "step": 7335 }, { "epoch": 0.2141835274882485, "grad_norm": 0.6912717401958397, "learning_rate": 4.365774533657745e-06, "loss": 0.6054, "step": 7336 }, { "epoch": 0.21421272371609587, "grad_norm": 0.7497656176575855, "learning_rate": 4.365612327656123e-06, "loss": 0.6467, "step": 7337 }, { "epoch": 0.21424191994394323, "grad_norm": 0.7838234385741063, "learning_rate": 4.365450121654501e-06, "loss": 0.6739, "step": 7338 }, { "epoch": 0.2142711161717906, "grad_norm": 0.8071956030008345, "learning_rate": 4.365287915652879e-06, "loss": 0.6242, "step": 7339 }, { "epoch": 0.21430031239963795, "grad_norm": 0.7626038750428336, "learning_rate": 4.365125709651257e-06, "loss": 0.6494, "step": 7340 }, { "epoch": 0.21432950862748532, "grad_norm": 0.7335658179534025, "learning_rate": 4.364963503649635e-06, "loss": 0.6379, "step": 7341 }, { "epoch": 0.2143587048553327, "grad_norm": 0.7408341036471228, "learning_rate": 4.364801297648013e-06, "loss": 0.6957, "step": 7342 }, { "epoch": 0.21438790108318007, "grad_norm": 0.7584404628047038, "learning_rate": 4.364639091646391e-06, "loss": 0.6871, "step": 7343 }, { "epoch": 0.21441709731102743, "grad_norm": 0.7419804399373219, "learning_rate": 4.364476885644769e-06, "loss": 0.6449, "step": 7344 }, { "epoch": 0.2144462935388748, "grad_norm": 0.7624431887000829, "learning_rate": 4.364314679643147e-06, "loss": 0.676, "step": 7345 }, { "epoch": 0.21447548976672215, "grad_norm": 0.7303646399613684, "learning_rate": 4.364152473641525e-06, "loss": 0.5726, "step": 7346 }, { "epoch": 0.2145046859945695, "grad_norm": 0.7679878591134798, "learning_rate": 4.363990267639903e-06, "loss": 0.7266, "step": 7347 }, { "epoch": 0.21453388222241687, "grad_norm": 0.8026304370521845, "learning_rate": 4.363828061638281e-06, "loss": 0.7275, "step": 7348 }, { "epoch": 0.21456307845026423, "grad_norm": 0.7443915518321834, "learning_rate": 4.363665855636659e-06, "loss": 0.5958, "step": 7349 }, { "epoch": 0.2145922746781116, "grad_norm": 0.7520700144121771, "learning_rate": 4.3635036496350365e-06, "loss": 0.7082, "step": 7350 }, { "epoch": 0.21462147090595896, "grad_norm": 0.773155895445553, "learning_rate": 4.3633414436334145e-06, "loss": 0.7237, "step": 7351 }, { "epoch": 0.21465066713380632, "grad_norm": 0.7677111481156979, "learning_rate": 4.3631792376317925e-06, "loss": 0.71, "step": 7352 }, { "epoch": 0.21467986336165368, "grad_norm": 0.7913325183799342, "learning_rate": 4.3630170316301705e-06, "loss": 0.6632, "step": 7353 }, { "epoch": 0.21470905958950104, "grad_norm": 0.7109002133766453, "learning_rate": 4.3628548256285485e-06, "loss": 0.6026, "step": 7354 }, { "epoch": 0.2147382558173484, "grad_norm": 0.7460315060276613, "learning_rate": 4.3626926196269265e-06, "loss": 0.7088, "step": 7355 }, { "epoch": 0.21476745204519576, "grad_norm": 0.7325320801191981, "learning_rate": 4.3625304136253045e-06, "loss": 0.6584, "step": 7356 }, { "epoch": 0.21479664827304312, "grad_norm": 0.7014645803885872, "learning_rate": 4.3623682076236826e-06, "loss": 0.6678, "step": 7357 }, { "epoch": 0.21482584450089048, "grad_norm": 0.9979951234151394, "learning_rate": 4.3622060016220606e-06, "loss": 0.7874, "step": 7358 }, { "epoch": 0.21485504072873784, "grad_norm": 0.9336240564057997, "learning_rate": 4.362043795620438e-06, "loss": 0.7949, "step": 7359 }, { "epoch": 0.2148842369565852, "grad_norm": 0.7883118631748794, "learning_rate": 4.361881589618816e-06, "loss": 0.648, "step": 7360 }, { "epoch": 0.21491343318443257, "grad_norm": 0.8188660820468612, "learning_rate": 4.361719383617194e-06, "loss": 0.6726, "step": 7361 }, { "epoch": 0.21494262941227993, "grad_norm": 0.7837700358507672, "learning_rate": 4.361557177615573e-06, "loss": 0.7129, "step": 7362 }, { "epoch": 0.2149718256401273, "grad_norm": 0.7677942374499549, "learning_rate": 4.361394971613951e-06, "loss": 0.6775, "step": 7363 }, { "epoch": 0.21500102186797465, "grad_norm": 0.7348266299024734, "learning_rate": 4.361232765612329e-06, "loss": 0.7004, "step": 7364 }, { "epoch": 0.215030218095822, "grad_norm": 0.725664439727645, "learning_rate": 4.361070559610706e-06, "loss": 0.6251, "step": 7365 }, { "epoch": 0.21505941432366937, "grad_norm": 0.7287017107379692, "learning_rate": 4.360908353609084e-06, "loss": 0.6801, "step": 7366 }, { "epoch": 0.21508861055151673, "grad_norm": 0.8259561647867211, "learning_rate": 4.360746147607462e-06, "loss": 0.7581, "step": 7367 }, { "epoch": 0.2151178067793641, "grad_norm": 0.7152832134505019, "learning_rate": 4.36058394160584e-06, "loss": 0.6755, "step": 7368 }, { "epoch": 0.21514700300721146, "grad_norm": 0.7912716382490718, "learning_rate": 4.360421735604218e-06, "loss": 0.6691, "step": 7369 }, { "epoch": 0.21517619923505882, "grad_norm": 0.7470007626037086, "learning_rate": 4.360259529602596e-06, "loss": 0.6548, "step": 7370 }, { "epoch": 0.21520539546290618, "grad_norm": 0.7438219086942588, "learning_rate": 4.360097323600974e-06, "loss": 0.6573, "step": 7371 }, { "epoch": 0.21523459169075357, "grad_norm": 0.7634488801169836, "learning_rate": 4.359935117599352e-06, "loss": 0.7234, "step": 7372 }, { "epoch": 0.21526378791860093, "grad_norm": 0.7223886501076374, "learning_rate": 4.35977291159773e-06, "loss": 0.6624, "step": 7373 }, { "epoch": 0.2152929841464483, "grad_norm": 0.7460834922644565, "learning_rate": 4.359610705596107e-06, "loss": 0.6298, "step": 7374 }, { "epoch": 0.21532218037429565, "grad_norm": 0.8153468520890442, "learning_rate": 4.359448499594485e-06, "loss": 0.7995, "step": 7375 }, { "epoch": 0.215351376602143, "grad_norm": 0.7522170370783919, "learning_rate": 4.359286293592863e-06, "loss": 0.7664, "step": 7376 }, { "epoch": 0.21538057282999037, "grad_norm": 0.9496961952099323, "learning_rate": 4.359124087591241e-06, "loss": 0.667, "step": 7377 }, { "epoch": 0.21540976905783774, "grad_norm": 0.7100894214714711, "learning_rate": 4.358961881589619e-06, "loss": 0.6025, "step": 7378 }, { "epoch": 0.2154389652856851, "grad_norm": 0.7794625753408225, "learning_rate": 4.358799675587997e-06, "loss": 0.6951, "step": 7379 }, { "epoch": 0.21546816151353246, "grad_norm": 0.7085393684412502, "learning_rate": 4.358637469586375e-06, "loss": 0.6177, "step": 7380 }, { "epoch": 0.21549735774137982, "grad_norm": 0.7615138183918345, "learning_rate": 4.358475263584753e-06, "loss": 0.5767, "step": 7381 }, { "epoch": 0.21552655396922718, "grad_norm": 0.7073539563398952, "learning_rate": 4.358313057583131e-06, "loss": 0.6056, "step": 7382 }, { "epoch": 0.21555575019707454, "grad_norm": 0.7409350255111932, "learning_rate": 4.358150851581509e-06, "loss": 0.7353, "step": 7383 }, { "epoch": 0.2155849464249219, "grad_norm": 0.771344990065272, "learning_rate": 4.357988645579887e-06, "loss": 0.7158, "step": 7384 }, { "epoch": 0.21561414265276926, "grad_norm": 0.9302297772106992, "learning_rate": 4.357826439578265e-06, "loss": 0.7751, "step": 7385 }, { "epoch": 0.21564333888061663, "grad_norm": 0.7590393176211264, "learning_rate": 4.357664233576643e-06, "loss": 0.737, "step": 7386 }, { "epoch": 0.215672535108464, "grad_norm": 0.7091154326838357, "learning_rate": 4.357502027575021e-06, "loss": 0.5836, "step": 7387 }, { "epoch": 0.21570173133631135, "grad_norm": 0.7488843418347583, "learning_rate": 4.357339821573398e-06, "loss": 0.6892, "step": 7388 }, { "epoch": 0.2157309275641587, "grad_norm": 1.5943958436767391, "learning_rate": 4.357177615571776e-06, "loss": 0.7647, "step": 7389 }, { "epoch": 0.21576012379200607, "grad_norm": 0.860387156401181, "learning_rate": 4.357015409570154e-06, "loss": 0.6679, "step": 7390 }, { "epoch": 0.21578932001985343, "grad_norm": 0.7797061039932616, "learning_rate": 4.356853203568532e-06, "loss": 0.6483, "step": 7391 }, { "epoch": 0.2158185162477008, "grad_norm": 0.7701616995177478, "learning_rate": 4.35669099756691e-06, "loss": 0.6963, "step": 7392 }, { "epoch": 0.21584771247554815, "grad_norm": 1.0234680561557574, "learning_rate": 4.356528791565288e-06, "loss": 0.733, "step": 7393 }, { "epoch": 0.21587690870339551, "grad_norm": 0.7048441945215513, "learning_rate": 4.356366585563666e-06, "loss": 0.6752, "step": 7394 }, { "epoch": 0.21590610493124288, "grad_norm": 0.8618577749291625, "learning_rate": 4.356204379562044e-06, "loss": 0.6905, "step": 7395 }, { "epoch": 0.21593530115909024, "grad_norm": 0.8283208584899011, "learning_rate": 4.356042173560422e-06, "loss": 0.8048, "step": 7396 }, { "epoch": 0.2159644973869376, "grad_norm": 0.7063378877973471, "learning_rate": 4.3558799675587994e-06, "loss": 0.5995, "step": 7397 }, { "epoch": 0.21599369361478496, "grad_norm": 0.7098837797492467, "learning_rate": 4.3557177615571774e-06, "loss": 0.6287, "step": 7398 }, { "epoch": 0.21602288984263232, "grad_norm": 0.7313626426477362, "learning_rate": 4.3555555555555555e-06, "loss": 0.6783, "step": 7399 }, { "epoch": 0.21605208607047968, "grad_norm": 0.7285562427790879, "learning_rate": 4.355393349553934e-06, "loss": 0.661, "step": 7400 }, { "epoch": 0.21608128229832704, "grad_norm": 0.7004521116437391, "learning_rate": 4.355231143552312e-06, "loss": 0.5667, "step": 7401 }, { "epoch": 0.21611047852617443, "grad_norm": 0.6784970673916502, "learning_rate": 4.35506893755069e-06, "loss": 0.5681, "step": 7402 }, { "epoch": 0.2161396747540218, "grad_norm": 0.7222258479415685, "learning_rate": 4.3549067315490675e-06, "loss": 0.6395, "step": 7403 }, { "epoch": 0.21616887098186915, "grad_norm": 0.7874495941477162, "learning_rate": 4.3547445255474455e-06, "loss": 0.7101, "step": 7404 }, { "epoch": 0.21619806720971652, "grad_norm": 0.7211853714106135, "learning_rate": 4.3545823195458235e-06, "loss": 0.5898, "step": 7405 }, { "epoch": 0.21622726343756388, "grad_norm": 0.7294804992211752, "learning_rate": 4.3544201135442015e-06, "loss": 0.5991, "step": 7406 }, { "epoch": 0.21625645966541124, "grad_norm": 1.0405039460957228, "learning_rate": 4.3542579075425795e-06, "loss": 0.728, "step": 7407 }, { "epoch": 0.2162856558932586, "grad_norm": 0.7755247684009984, "learning_rate": 4.3540957015409575e-06, "loss": 0.705, "step": 7408 }, { "epoch": 0.21631485212110596, "grad_norm": 0.714658580198116, "learning_rate": 4.3539334955393355e-06, "loss": 0.6627, "step": 7409 }, { "epoch": 0.21634404834895332, "grad_norm": 0.7125440662505655, "learning_rate": 4.3537712895377135e-06, "loss": 0.6392, "step": 7410 }, { "epoch": 0.21637324457680068, "grad_norm": 0.7221463765685701, "learning_rate": 4.3536090835360915e-06, "loss": 0.6905, "step": 7411 }, { "epoch": 0.21640244080464804, "grad_norm": 0.7697496778731162, "learning_rate": 4.353446877534469e-06, "loss": 0.755, "step": 7412 }, { "epoch": 0.2164316370324954, "grad_norm": 0.7546886644587547, "learning_rate": 4.353284671532847e-06, "loss": 0.6638, "step": 7413 }, { "epoch": 0.21646083326034277, "grad_norm": 0.6776845626688437, "learning_rate": 4.353122465531225e-06, "loss": 0.618, "step": 7414 }, { "epoch": 0.21649002948819013, "grad_norm": 0.866065859203396, "learning_rate": 4.352960259529603e-06, "loss": 0.7537, "step": 7415 }, { "epoch": 0.2165192257160375, "grad_norm": 0.7531874540655531, "learning_rate": 4.352798053527981e-06, "loss": 0.7391, "step": 7416 }, { "epoch": 0.21654842194388485, "grad_norm": 0.8265361756372406, "learning_rate": 4.352635847526359e-06, "loss": 0.7283, "step": 7417 }, { "epoch": 0.2165776181717322, "grad_norm": 0.7963386921197859, "learning_rate": 4.352473641524737e-06, "loss": 0.752, "step": 7418 }, { "epoch": 0.21660681439957957, "grad_norm": 0.8541125819203182, "learning_rate": 4.352311435523115e-06, "loss": 0.6961, "step": 7419 }, { "epoch": 0.21663601062742693, "grad_norm": 0.799625719676361, "learning_rate": 4.352149229521493e-06, "loss": 0.7054, "step": 7420 }, { "epoch": 0.2166652068552743, "grad_norm": 0.6831826737482424, "learning_rate": 4.351987023519871e-06, "loss": 0.5858, "step": 7421 }, { "epoch": 0.21669440308312166, "grad_norm": 0.8530025248070174, "learning_rate": 4.351824817518249e-06, "loss": 0.7623, "step": 7422 }, { "epoch": 0.21672359931096902, "grad_norm": 0.690561668913298, "learning_rate": 4.351662611516627e-06, "loss": 0.636, "step": 7423 }, { "epoch": 0.21675279553881638, "grad_norm": 0.782562387864726, "learning_rate": 4.351500405515005e-06, "loss": 0.7174, "step": 7424 }, { "epoch": 0.21678199176666374, "grad_norm": 0.6932827910208622, "learning_rate": 4.351338199513383e-06, "loss": 0.6028, "step": 7425 }, { "epoch": 0.2168111879945111, "grad_norm": 0.7469948937922906, "learning_rate": 4.35117599351176e-06, "loss": 0.6935, "step": 7426 }, { "epoch": 0.21684038422235846, "grad_norm": 0.7502986063586811, "learning_rate": 4.351013787510138e-06, "loss": 0.6777, "step": 7427 }, { "epoch": 0.21686958045020582, "grad_norm": 0.6826701712528225, "learning_rate": 4.350851581508516e-06, "loss": 0.5871, "step": 7428 }, { "epoch": 0.21689877667805318, "grad_norm": 0.7127869242100453, "learning_rate": 4.350689375506894e-06, "loss": 0.6833, "step": 7429 }, { "epoch": 0.21692797290590055, "grad_norm": 0.7906015162670942, "learning_rate": 4.350527169505272e-06, "loss": 0.7396, "step": 7430 }, { "epoch": 0.2169571691337479, "grad_norm": 0.7660896226053203, "learning_rate": 4.35036496350365e-06, "loss": 0.6863, "step": 7431 }, { "epoch": 0.21698636536159527, "grad_norm": 0.7463196874518188, "learning_rate": 4.350202757502028e-06, "loss": 0.6923, "step": 7432 }, { "epoch": 0.21701556158944266, "grad_norm": 0.8376840988309802, "learning_rate": 4.350040551500406e-06, "loss": 0.6035, "step": 7433 }, { "epoch": 0.21704475781729002, "grad_norm": 0.8077958133904172, "learning_rate": 4.349878345498784e-06, "loss": 0.8092, "step": 7434 }, { "epoch": 0.21707395404513738, "grad_norm": 0.7298134098112521, "learning_rate": 4.349716139497161e-06, "loss": 0.5668, "step": 7435 }, { "epoch": 0.21710315027298474, "grad_norm": 0.7756312993123098, "learning_rate": 4.349553933495539e-06, "loss": 0.678, "step": 7436 }, { "epoch": 0.2171323465008321, "grad_norm": 0.7706561279663376, "learning_rate": 4.349391727493917e-06, "loss": 0.6909, "step": 7437 }, { "epoch": 0.21716154272867946, "grad_norm": 0.9526027924457051, "learning_rate": 4.349229521492296e-06, "loss": 0.815, "step": 7438 }, { "epoch": 0.21719073895652682, "grad_norm": 0.7631670294505285, "learning_rate": 4.349067315490674e-06, "loss": 0.7649, "step": 7439 }, { "epoch": 0.21721993518437419, "grad_norm": 0.7263908466655398, "learning_rate": 4.348905109489052e-06, "loss": 0.6781, "step": 7440 }, { "epoch": 0.21724913141222155, "grad_norm": 0.7247685602351688, "learning_rate": 4.348742903487429e-06, "loss": 0.667, "step": 7441 }, { "epoch": 0.2172783276400689, "grad_norm": 0.6705444831493395, "learning_rate": 4.348580697485807e-06, "loss": 0.53, "step": 7442 }, { "epoch": 0.21730752386791627, "grad_norm": 0.7497102326609587, "learning_rate": 4.348418491484185e-06, "loss": 0.6174, "step": 7443 }, { "epoch": 0.21733672009576363, "grad_norm": 0.8160919920836092, "learning_rate": 4.348256285482563e-06, "loss": 0.7507, "step": 7444 }, { "epoch": 0.217365916323611, "grad_norm": 0.7498051639487596, "learning_rate": 4.348094079480941e-06, "loss": 0.7123, "step": 7445 }, { "epoch": 0.21739511255145835, "grad_norm": 0.7017345230636944, "learning_rate": 4.347931873479319e-06, "loss": 0.6305, "step": 7446 }, { "epoch": 0.2174243087793057, "grad_norm": 0.7405582960874352, "learning_rate": 4.347769667477697e-06, "loss": 0.6877, "step": 7447 }, { "epoch": 0.21745350500715308, "grad_norm": 0.7712581333881704, "learning_rate": 4.347607461476075e-06, "loss": 0.7157, "step": 7448 }, { "epoch": 0.21748270123500044, "grad_norm": 0.6828453088363259, "learning_rate": 4.347445255474453e-06, "loss": 0.5607, "step": 7449 }, { "epoch": 0.2175118974628478, "grad_norm": 0.6730353437268707, "learning_rate": 4.34728304947283e-06, "loss": 0.5462, "step": 7450 }, { "epoch": 0.21754109369069516, "grad_norm": 0.7771757769094246, "learning_rate": 4.347120843471208e-06, "loss": 0.7102, "step": 7451 }, { "epoch": 0.21757028991854252, "grad_norm": 0.6755562423026815, "learning_rate": 4.3469586374695864e-06, "loss": 0.5832, "step": 7452 }, { "epoch": 0.21759948614638988, "grad_norm": 0.9253780582630085, "learning_rate": 4.3467964314679644e-06, "loss": 0.6851, "step": 7453 }, { "epoch": 0.21762868237423724, "grad_norm": 0.9257084433559664, "learning_rate": 4.3466342254663424e-06, "loss": 0.6706, "step": 7454 }, { "epoch": 0.2176578786020846, "grad_norm": 0.7461130279662087, "learning_rate": 4.3464720194647204e-06, "loss": 0.6939, "step": 7455 }, { "epoch": 0.21768707482993196, "grad_norm": 0.7257512271332323, "learning_rate": 4.3463098134630985e-06, "loss": 0.631, "step": 7456 }, { "epoch": 0.21771627105777933, "grad_norm": 0.8458180619062032, "learning_rate": 4.3461476074614765e-06, "loss": 0.733, "step": 7457 }, { "epoch": 0.2177454672856267, "grad_norm": 0.7249793997085349, "learning_rate": 4.3459854014598545e-06, "loss": 0.6438, "step": 7458 }, { "epoch": 0.21777466351347405, "grad_norm": 0.7753273756100978, "learning_rate": 4.3458231954582325e-06, "loss": 0.698, "step": 7459 }, { "epoch": 0.2178038597413214, "grad_norm": 1.7070290334057072, "learning_rate": 4.3456609894566105e-06, "loss": 0.6327, "step": 7460 }, { "epoch": 0.21783305596916877, "grad_norm": 0.6890152703403886, "learning_rate": 4.3454987834549885e-06, "loss": 0.6489, "step": 7461 }, { "epoch": 0.21786225219701613, "grad_norm": 0.7174545072733771, "learning_rate": 4.3453365774533665e-06, "loss": 0.6973, "step": 7462 }, { "epoch": 0.21789144842486352, "grad_norm": 0.7657948565412114, "learning_rate": 4.3451743714517445e-06, "loss": 0.6893, "step": 7463 }, { "epoch": 0.21792064465271088, "grad_norm": 0.7678581444756007, "learning_rate": 4.345012165450122e-06, "loss": 0.6582, "step": 7464 }, { "epoch": 0.21794984088055824, "grad_norm": 0.788914198010672, "learning_rate": 4.3448499594485e-06, "loss": 0.8078, "step": 7465 }, { "epoch": 0.2179790371084056, "grad_norm": 0.8485352591240062, "learning_rate": 4.344687753446878e-06, "loss": 0.736, "step": 7466 }, { "epoch": 0.21800823333625297, "grad_norm": 0.7977015544847357, "learning_rate": 4.344525547445256e-06, "loss": 0.6719, "step": 7467 }, { "epoch": 0.21803742956410033, "grad_norm": 0.7146359900307875, "learning_rate": 4.344363341443634e-06, "loss": 0.666, "step": 7468 }, { "epoch": 0.2180666257919477, "grad_norm": 0.7022255433966189, "learning_rate": 4.344201135442012e-06, "loss": 0.582, "step": 7469 }, { "epoch": 0.21809582201979505, "grad_norm": 0.7684901817953483, "learning_rate": 4.34403892944039e-06, "loss": 0.6581, "step": 7470 }, { "epoch": 0.2181250182476424, "grad_norm": 0.6863857933389913, "learning_rate": 4.343876723438768e-06, "loss": 0.6233, "step": 7471 }, { "epoch": 0.21815421447548977, "grad_norm": 0.7262520988925184, "learning_rate": 4.343714517437146e-06, "loss": 0.6437, "step": 7472 }, { "epoch": 0.21818341070333713, "grad_norm": 0.8286522808780656, "learning_rate": 4.343552311435523e-06, "loss": 0.7666, "step": 7473 }, { "epoch": 0.2182126069311845, "grad_norm": 0.7845838305401025, "learning_rate": 4.343390105433901e-06, "loss": 0.7291, "step": 7474 }, { "epoch": 0.21824180315903186, "grad_norm": 0.7121245171742447, "learning_rate": 4.34322789943228e-06, "loss": 0.6415, "step": 7475 }, { "epoch": 0.21827099938687922, "grad_norm": 0.6920071894630758, "learning_rate": 4.343065693430658e-06, "loss": 0.5798, "step": 7476 }, { "epoch": 0.21830019561472658, "grad_norm": 1.0335260196322367, "learning_rate": 4.342903487429036e-06, "loss": 0.6675, "step": 7477 }, { "epoch": 0.21832939184257394, "grad_norm": 0.747171229759458, "learning_rate": 4.342741281427414e-06, "loss": 0.6691, "step": 7478 }, { "epoch": 0.2183585880704213, "grad_norm": 0.7058075193905019, "learning_rate": 4.342579075425791e-06, "loss": 0.6094, "step": 7479 }, { "epoch": 0.21838778429826866, "grad_norm": 0.7206348354808714, "learning_rate": 4.342416869424169e-06, "loss": 0.6682, "step": 7480 }, { "epoch": 0.21841698052611602, "grad_norm": 0.7043680650724394, "learning_rate": 4.342254663422547e-06, "loss": 0.5736, "step": 7481 }, { "epoch": 0.21844617675396338, "grad_norm": 0.7684772719464015, "learning_rate": 4.342092457420925e-06, "loss": 0.5878, "step": 7482 }, { "epoch": 0.21847537298181074, "grad_norm": 0.6921958798210189, "learning_rate": 4.341930251419303e-06, "loss": 0.5669, "step": 7483 }, { "epoch": 0.2185045692096581, "grad_norm": 0.7647230092688787, "learning_rate": 4.341768045417681e-06, "loss": 0.7296, "step": 7484 }, { "epoch": 0.21853376543750547, "grad_norm": 0.7455821907873715, "learning_rate": 4.341605839416059e-06, "loss": 0.6647, "step": 7485 }, { "epoch": 0.21856296166535283, "grad_norm": 0.7293025342963851, "learning_rate": 4.341443633414437e-06, "loss": 0.6644, "step": 7486 }, { "epoch": 0.2185921578932002, "grad_norm": 0.7367621719096371, "learning_rate": 4.341281427412815e-06, "loss": 0.643, "step": 7487 }, { "epoch": 0.21862135412104755, "grad_norm": 0.845323814172808, "learning_rate": 4.341119221411192e-06, "loss": 0.6937, "step": 7488 }, { "epoch": 0.2186505503488949, "grad_norm": 0.8363825160021439, "learning_rate": 4.34095701540957e-06, "loss": 0.6811, "step": 7489 }, { "epoch": 0.21867974657674227, "grad_norm": 0.7496590864121152, "learning_rate": 4.340794809407948e-06, "loss": 0.6866, "step": 7490 }, { "epoch": 0.21870894280458963, "grad_norm": 0.8313812741822517, "learning_rate": 4.340632603406326e-06, "loss": 0.6762, "step": 7491 }, { "epoch": 0.218738139032437, "grad_norm": 0.7099727121256484, "learning_rate": 4.340470397404704e-06, "loss": 0.5964, "step": 7492 }, { "epoch": 0.21876733526028438, "grad_norm": 0.7050331199281616, "learning_rate": 4.340308191403082e-06, "loss": 0.656, "step": 7493 }, { "epoch": 0.21879653148813175, "grad_norm": 0.7639720506823422, "learning_rate": 4.34014598540146e-06, "loss": 0.704, "step": 7494 }, { "epoch": 0.2188257277159791, "grad_norm": 0.7424328195612329, "learning_rate": 4.339983779399838e-06, "loss": 0.6342, "step": 7495 }, { "epoch": 0.21885492394382647, "grad_norm": 0.7172839894875506, "learning_rate": 4.339821573398216e-06, "loss": 0.655, "step": 7496 }, { "epoch": 0.21888412017167383, "grad_norm": 0.7530320069491102, "learning_rate": 4.339659367396594e-06, "loss": 0.684, "step": 7497 }, { "epoch": 0.2189133163995212, "grad_norm": 0.7156869055270372, "learning_rate": 4.339497161394972e-06, "loss": 0.6615, "step": 7498 }, { "epoch": 0.21894251262736855, "grad_norm": 0.7171472440484019, "learning_rate": 4.33933495539335e-06, "loss": 0.6256, "step": 7499 }, { "epoch": 0.2189717088552159, "grad_norm": 0.7364570887906261, "learning_rate": 4.339172749391728e-06, "loss": 0.6395, "step": 7500 }, { "epoch": 0.21900090508306327, "grad_norm": 0.7101662581415477, "learning_rate": 4.339010543390106e-06, "loss": 0.6538, "step": 7501 }, { "epoch": 0.21903010131091064, "grad_norm": 0.866843604503348, "learning_rate": 4.338848337388483e-06, "loss": 0.659, "step": 7502 }, { "epoch": 0.219059297538758, "grad_norm": 0.7309082306879809, "learning_rate": 4.338686131386861e-06, "loss": 0.6759, "step": 7503 }, { "epoch": 0.21908849376660536, "grad_norm": 0.7737215964264247, "learning_rate": 4.338523925385239e-06, "loss": 0.6885, "step": 7504 }, { "epoch": 0.21911768999445272, "grad_norm": 0.6870318701078256, "learning_rate": 4.338361719383617e-06, "loss": 0.6049, "step": 7505 }, { "epoch": 0.21914688622230008, "grad_norm": 0.7641617984699511, "learning_rate": 4.338199513381995e-06, "loss": 0.6924, "step": 7506 }, { "epoch": 0.21917608245014744, "grad_norm": 0.7287123375771616, "learning_rate": 4.338037307380373e-06, "loss": 0.611, "step": 7507 }, { "epoch": 0.2192052786779948, "grad_norm": 0.7296775977227655, "learning_rate": 4.337875101378751e-06, "loss": 0.6641, "step": 7508 }, { "epoch": 0.21923447490584216, "grad_norm": 0.7372351992694943, "learning_rate": 4.3377128953771294e-06, "loss": 0.6859, "step": 7509 }, { "epoch": 0.21926367113368952, "grad_norm": 0.7453883846320098, "learning_rate": 4.3375506893755074e-06, "loss": 0.685, "step": 7510 }, { "epoch": 0.2192928673615369, "grad_norm": 0.7316579201792099, "learning_rate": 4.337388483373885e-06, "loss": 0.6606, "step": 7511 }, { "epoch": 0.21932206358938425, "grad_norm": 0.7949249857567464, "learning_rate": 4.337226277372263e-06, "loss": 0.7553, "step": 7512 }, { "epoch": 0.2193512598172316, "grad_norm": 0.8131861962439388, "learning_rate": 4.3370640713706415e-06, "loss": 0.7536, "step": 7513 }, { "epoch": 0.21938045604507897, "grad_norm": 0.7515058760950403, "learning_rate": 4.3369018653690195e-06, "loss": 0.7357, "step": 7514 }, { "epoch": 0.21940965227292633, "grad_norm": 0.801519909426744, "learning_rate": 4.3367396593673975e-06, "loss": 0.7565, "step": 7515 }, { "epoch": 0.2194388485007737, "grad_norm": 0.9853063866322606, "learning_rate": 4.3365774533657755e-06, "loss": 0.7196, "step": 7516 }, { "epoch": 0.21946804472862105, "grad_norm": 0.7251931980007283, "learning_rate": 4.336415247364153e-06, "loss": 0.6202, "step": 7517 }, { "epoch": 0.21949724095646841, "grad_norm": 0.6933564990911179, "learning_rate": 4.336253041362531e-06, "loss": 0.577, "step": 7518 }, { "epoch": 0.21952643718431578, "grad_norm": 0.8095400772355608, "learning_rate": 4.336090835360909e-06, "loss": 0.6555, "step": 7519 }, { "epoch": 0.21955563341216314, "grad_norm": 0.7814011945278856, "learning_rate": 4.335928629359287e-06, "loss": 0.7635, "step": 7520 }, { "epoch": 0.2195848296400105, "grad_norm": 0.7134484238375709, "learning_rate": 4.335766423357665e-06, "loss": 0.7152, "step": 7521 }, { "epoch": 0.21961402586785786, "grad_norm": 0.7197006122212197, "learning_rate": 4.335604217356043e-06, "loss": 0.6284, "step": 7522 }, { "epoch": 0.21964322209570525, "grad_norm": 0.7438566267570055, "learning_rate": 4.335442011354421e-06, "loss": 0.6633, "step": 7523 }, { "epoch": 0.2196724183235526, "grad_norm": 0.7978172068955831, "learning_rate": 4.335279805352799e-06, "loss": 0.6583, "step": 7524 }, { "epoch": 0.21970161455139997, "grad_norm": 0.7542760410149489, "learning_rate": 4.335117599351177e-06, "loss": 0.7456, "step": 7525 }, { "epoch": 0.21973081077924733, "grad_norm": 0.7143564231992843, "learning_rate": 4.334955393349554e-06, "loss": 0.6561, "step": 7526 }, { "epoch": 0.2197600070070947, "grad_norm": 0.67896062470983, "learning_rate": 4.334793187347932e-06, "loss": 0.6373, "step": 7527 }, { "epoch": 0.21978920323494205, "grad_norm": 0.7300647878563463, "learning_rate": 4.33463098134631e-06, "loss": 0.6006, "step": 7528 }, { "epoch": 0.21981839946278942, "grad_norm": 0.75611743625191, "learning_rate": 4.334468775344688e-06, "loss": 0.6895, "step": 7529 }, { "epoch": 0.21984759569063678, "grad_norm": 0.7126006895003728, "learning_rate": 4.334306569343066e-06, "loss": 0.6077, "step": 7530 }, { "epoch": 0.21987679191848414, "grad_norm": 0.7648177346785812, "learning_rate": 4.334144363341444e-06, "loss": 0.7121, "step": 7531 }, { "epoch": 0.2199059881463315, "grad_norm": 0.7810339218264691, "learning_rate": 4.333982157339822e-06, "loss": 0.7418, "step": 7532 }, { "epoch": 0.21993518437417886, "grad_norm": 0.8102836514219954, "learning_rate": 4.3338199513382e-06, "loss": 0.677, "step": 7533 }, { "epoch": 0.21996438060202622, "grad_norm": 0.8314952220872284, "learning_rate": 4.333657745336578e-06, "loss": 0.6468, "step": 7534 }, { "epoch": 0.21999357682987358, "grad_norm": 0.7416037369123125, "learning_rate": 4.333495539334956e-06, "loss": 0.647, "step": 7535 }, { "epoch": 0.22002277305772094, "grad_norm": 0.6569823953391991, "learning_rate": 4.333333333333334e-06, "loss": 0.5497, "step": 7536 }, { "epoch": 0.2200519692855683, "grad_norm": 0.6460529125595637, "learning_rate": 4.333171127331712e-06, "loss": 0.5409, "step": 7537 }, { "epoch": 0.22008116551341567, "grad_norm": 0.697727281371378, "learning_rate": 4.33300892133009e-06, "loss": 0.6539, "step": 7538 }, { "epoch": 0.22011036174126303, "grad_norm": 0.7318248644952626, "learning_rate": 4.332846715328468e-06, "loss": 0.6291, "step": 7539 }, { "epoch": 0.2201395579691104, "grad_norm": 0.7600514098236311, "learning_rate": 4.332684509326845e-06, "loss": 0.6773, "step": 7540 }, { "epoch": 0.22016875419695775, "grad_norm": 0.685977572688229, "learning_rate": 4.332522303325223e-06, "loss": 0.6212, "step": 7541 }, { "epoch": 0.2201979504248051, "grad_norm": 0.8110087687529711, "learning_rate": 4.332360097323601e-06, "loss": 0.6599, "step": 7542 }, { "epoch": 0.22022714665265247, "grad_norm": 0.7560647407609835, "learning_rate": 4.332197891321979e-06, "loss": 0.5877, "step": 7543 }, { "epoch": 0.22025634288049983, "grad_norm": 0.8076552291438989, "learning_rate": 4.332035685320357e-06, "loss": 0.7098, "step": 7544 }, { "epoch": 0.2202855391083472, "grad_norm": 0.7874628875161349, "learning_rate": 4.331873479318735e-06, "loss": 0.6923, "step": 7545 }, { "epoch": 0.22031473533619456, "grad_norm": 0.9052166500787419, "learning_rate": 4.331711273317113e-06, "loss": 0.8129, "step": 7546 }, { "epoch": 0.22034393156404192, "grad_norm": 0.7473205108018068, "learning_rate": 4.331549067315491e-06, "loss": 0.6705, "step": 7547 }, { "epoch": 0.22037312779188928, "grad_norm": 0.7347557121452752, "learning_rate": 4.331386861313869e-06, "loss": 0.713, "step": 7548 }, { "epoch": 0.22040232401973664, "grad_norm": 0.7386349250468888, "learning_rate": 4.331224655312246e-06, "loss": 0.6507, "step": 7549 }, { "epoch": 0.220431520247584, "grad_norm": 0.7950271206297886, "learning_rate": 4.331062449310624e-06, "loss": 0.7635, "step": 7550 }, { "epoch": 0.22046071647543136, "grad_norm": 0.769998666793374, "learning_rate": 4.330900243309003e-06, "loss": 0.6526, "step": 7551 }, { "epoch": 0.22048991270327872, "grad_norm": 0.6411163738755205, "learning_rate": 4.330738037307381e-06, "loss": 0.5596, "step": 7552 }, { "epoch": 0.2205191089311261, "grad_norm": 0.7196033006210828, "learning_rate": 4.330575831305759e-06, "loss": 0.6524, "step": 7553 }, { "epoch": 0.22054830515897347, "grad_norm": 0.7300252391959982, "learning_rate": 4.330413625304137e-06, "loss": 0.6528, "step": 7554 }, { "epoch": 0.22057750138682083, "grad_norm": 0.710187031247672, "learning_rate": 4.330251419302514e-06, "loss": 0.6307, "step": 7555 }, { "epoch": 0.2206066976146682, "grad_norm": 0.9932385183906147, "learning_rate": 4.330089213300892e-06, "loss": 0.763, "step": 7556 }, { "epoch": 0.22063589384251556, "grad_norm": 0.7353476879089453, "learning_rate": 4.32992700729927e-06, "loss": 0.6701, "step": 7557 }, { "epoch": 0.22066509007036292, "grad_norm": 0.6992645088748818, "learning_rate": 4.329764801297648e-06, "loss": 0.6336, "step": 7558 }, { "epoch": 0.22069428629821028, "grad_norm": 0.824945264364041, "learning_rate": 4.329602595296026e-06, "loss": 0.705, "step": 7559 }, { "epoch": 0.22072348252605764, "grad_norm": 0.8730096111692888, "learning_rate": 4.329440389294404e-06, "loss": 0.7232, "step": 7560 }, { "epoch": 0.220752678753905, "grad_norm": 0.7834882233223002, "learning_rate": 4.329278183292782e-06, "loss": 0.7358, "step": 7561 }, { "epoch": 0.22078187498175236, "grad_norm": 0.7782276702921295, "learning_rate": 4.32911597729116e-06, "loss": 0.65, "step": 7562 }, { "epoch": 0.22081107120959972, "grad_norm": 0.7797631847578196, "learning_rate": 4.3289537712895376e-06, "loss": 0.7227, "step": 7563 }, { "epoch": 0.22084026743744709, "grad_norm": 0.79185459088327, "learning_rate": 4.3287915652879156e-06, "loss": 0.6966, "step": 7564 }, { "epoch": 0.22086946366529445, "grad_norm": 0.7868181459566554, "learning_rate": 4.3286293592862936e-06, "loss": 0.7071, "step": 7565 }, { "epoch": 0.2208986598931418, "grad_norm": 0.8115913298852381, "learning_rate": 4.328467153284672e-06, "loss": 0.6931, "step": 7566 }, { "epoch": 0.22092785612098917, "grad_norm": 0.7480857281598051, "learning_rate": 4.32830494728305e-06, "loss": 0.666, "step": 7567 }, { "epoch": 0.22095705234883653, "grad_norm": 0.7316742307503769, "learning_rate": 4.328142741281428e-06, "loss": 0.6582, "step": 7568 }, { "epoch": 0.2209862485766839, "grad_norm": 0.8608310215697782, "learning_rate": 4.327980535279806e-06, "loss": 0.7175, "step": 7569 }, { "epoch": 0.22101544480453125, "grad_norm": 0.7346895865563432, "learning_rate": 4.327818329278184e-06, "loss": 0.6892, "step": 7570 }, { "epoch": 0.2210446410323786, "grad_norm": 0.6955898734000424, "learning_rate": 4.327656123276562e-06, "loss": 0.5674, "step": 7571 }, { "epoch": 0.22107383726022597, "grad_norm": 0.7619729508566843, "learning_rate": 4.32749391727494e-06, "loss": 0.6945, "step": 7572 }, { "epoch": 0.22110303348807334, "grad_norm": 0.8186576740669913, "learning_rate": 4.327331711273318e-06, "loss": 0.7592, "step": 7573 }, { "epoch": 0.2211322297159207, "grad_norm": 0.7418636191671948, "learning_rate": 4.327169505271696e-06, "loss": 0.672, "step": 7574 }, { "epoch": 0.22116142594376806, "grad_norm": 0.8135111757337015, "learning_rate": 4.327007299270074e-06, "loss": 0.7421, "step": 7575 }, { "epoch": 0.22119062217161542, "grad_norm": 0.7144323705291797, "learning_rate": 4.326845093268452e-06, "loss": 0.6622, "step": 7576 }, { "epoch": 0.22121981839946278, "grad_norm": 0.8071755113409499, "learning_rate": 4.32668288726683e-06, "loss": 0.6177, "step": 7577 }, { "epoch": 0.22124901462731014, "grad_norm": 0.7263263824553529, "learning_rate": 4.326520681265207e-06, "loss": 0.6774, "step": 7578 }, { "epoch": 0.2212782108551575, "grad_norm": 0.988923192320572, "learning_rate": 4.326358475263585e-06, "loss": 0.7852, "step": 7579 }, { "epoch": 0.22130740708300486, "grad_norm": 0.7320485068041571, "learning_rate": 4.326196269261963e-06, "loss": 0.6839, "step": 7580 }, { "epoch": 0.22133660331085223, "grad_norm": 0.7384747965233406, "learning_rate": 4.326034063260341e-06, "loss": 0.6891, "step": 7581 }, { "epoch": 0.2213657995386996, "grad_norm": 0.7457661352619862, "learning_rate": 4.325871857258719e-06, "loss": 0.6846, "step": 7582 }, { "epoch": 0.22139499576654698, "grad_norm": 0.7856206459595172, "learning_rate": 4.325709651257097e-06, "loss": 0.7588, "step": 7583 }, { "epoch": 0.22142419199439434, "grad_norm": 0.7216354011025274, "learning_rate": 4.325547445255475e-06, "loss": 0.6973, "step": 7584 }, { "epoch": 0.2214533882222417, "grad_norm": 0.7968398539126853, "learning_rate": 4.325385239253853e-06, "loss": 0.727, "step": 7585 }, { "epoch": 0.22148258445008906, "grad_norm": 0.7321580468470476, "learning_rate": 4.325223033252231e-06, "loss": 0.6552, "step": 7586 }, { "epoch": 0.22151178067793642, "grad_norm": 0.7807565961735861, "learning_rate": 4.325060827250608e-06, "loss": 0.7566, "step": 7587 }, { "epoch": 0.22154097690578378, "grad_norm": 0.7781706641379582, "learning_rate": 4.324898621248986e-06, "loss": 0.7093, "step": 7588 }, { "epoch": 0.22157017313363114, "grad_norm": 0.7412529510425735, "learning_rate": 4.324736415247365e-06, "loss": 0.7018, "step": 7589 }, { "epoch": 0.2215993693614785, "grad_norm": 0.7065946160895238, "learning_rate": 4.324574209245743e-06, "loss": 0.6665, "step": 7590 }, { "epoch": 0.22162856558932587, "grad_norm": 0.7214776523088173, "learning_rate": 4.324412003244121e-06, "loss": 0.6423, "step": 7591 }, { "epoch": 0.22165776181717323, "grad_norm": 0.727395123527736, "learning_rate": 4.324249797242499e-06, "loss": 0.6588, "step": 7592 }, { "epoch": 0.2216869580450206, "grad_norm": 0.8146440686247389, "learning_rate": 4.324087591240876e-06, "loss": 0.7433, "step": 7593 }, { "epoch": 0.22171615427286795, "grad_norm": 0.8440034389976747, "learning_rate": 4.323925385239254e-06, "loss": 0.7456, "step": 7594 }, { "epoch": 0.2217453505007153, "grad_norm": 0.8359086271276341, "learning_rate": 4.323763179237632e-06, "loss": 0.8107, "step": 7595 }, { "epoch": 0.22177454672856267, "grad_norm": 0.7183635873655773, "learning_rate": 4.32360097323601e-06, "loss": 0.6661, "step": 7596 }, { "epoch": 0.22180374295641003, "grad_norm": 0.7581739829237685, "learning_rate": 4.323438767234388e-06, "loss": 0.6102, "step": 7597 }, { "epoch": 0.2218329391842574, "grad_norm": 0.7783926068243995, "learning_rate": 4.323276561232766e-06, "loss": 0.6966, "step": 7598 }, { "epoch": 0.22186213541210476, "grad_norm": 0.8099588691040513, "learning_rate": 4.323114355231144e-06, "loss": 0.7696, "step": 7599 }, { "epoch": 0.22189133163995212, "grad_norm": 0.7562931127292805, "learning_rate": 4.322952149229522e-06, "loss": 0.6209, "step": 7600 }, { "epoch": 0.22192052786779948, "grad_norm": 0.7682977120710708, "learning_rate": 4.322789943227899e-06, "loss": 0.666, "step": 7601 }, { "epoch": 0.22194972409564684, "grad_norm": 0.7521363803679586, "learning_rate": 4.322627737226277e-06, "loss": 0.6458, "step": 7602 }, { "epoch": 0.2219789203234942, "grad_norm": 0.975281700871055, "learning_rate": 4.322465531224655e-06, "loss": 0.6611, "step": 7603 }, { "epoch": 0.22200811655134156, "grad_norm": 0.8991697475583647, "learning_rate": 4.322303325223033e-06, "loss": 0.6252, "step": 7604 }, { "epoch": 0.22203731277918892, "grad_norm": 0.8547297212021453, "learning_rate": 4.322141119221411e-06, "loss": 0.6444, "step": 7605 }, { "epoch": 0.22206650900703628, "grad_norm": 0.8298309524343311, "learning_rate": 4.321978913219789e-06, "loss": 0.7135, "step": 7606 }, { "epoch": 0.22209570523488364, "grad_norm": 0.8940906636622876, "learning_rate": 4.321816707218167e-06, "loss": 0.819, "step": 7607 }, { "epoch": 0.222124901462731, "grad_norm": 0.7482812090933241, "learning_rate": 4.321654501216545e-06, "loss": 0.671, "step": 7608 }, { "epoch": 0.22215409769057837, "grad_norm": 0.6959888470142058, "learning_rate": 4.321492295214923e-06, "loss": 0.6128, "step": 7609 }, { "epoch": 0.22218329391842573, "grad_norm": 0.7278396328871375, "learning_rate": 4.321330089213301e-06, "loss": 0.6585, "step": 7610 }, { "epoch": 0.2222124901462731, "grad_norm": 0.9267756948328278, "learning_rate": 4.321167883211679e-06, "loss": 0.5984, "step": 7611 }, { "epoch": 0.22224168637412045, "grad_norm": 0.7824675716097432, "learning_rate": 4.321005677210057e-06, "loss": 0.7409, "step": 7612 }, { "epoch": 0.2222708826019678, "grad_norm": 0.7653721739611281, "learning_rate": 4.320843471208435e-06, "loss": 0.6972, "step": 7613 }, { "epoch": 0.2223000788298152, "grad_norm": 0.9250446099342711, "learning_rate": 4.320681265206813e-06, "loss": 0.721, "step": 7614 }, { "epoch": 0.22232927505766256, "grad_norm": 0.73370539239823, "learning_rate": 4.320519059205191e-06, "loss": 0.6886, "step": 7615 }, { "epoch": 0.22235847128550992, "grad_norm": 0.7333710716303178, "learning_rate": 4.3203568532035685e-06, "loss": 0.663, "step": 7616 }, { "epoch": 0.22238766751335728, "grad_norm": 0.6831207155753478, "learning_rate": 4.3201946472019465e-06, "loss": 0.5582, "step": 7617 }, { "epoch": 0.22241686374120465, "grad_norm": 0.964637979841662, "learning_rate": 4.3200324412003245e-06, "loss": 0.7024, "step": 7618 }, { "epoch": 0.222446059969052, "grad_norm": 0.7014246704972167, "learning_rate": 4.3198702351987025e-06, "loss": 0.5989, "step": 7619 }, { "epoch": 0.22247525619689937, "grad_norm": 0.699138349605851, "learning_rate": 4.3197080291970806e-06, "loss": 0.6129, "step": 7620 }, { "epoch": 0.22250445242474673, "grad_norm": 0.7345966703960591, "learning_rate": 4.3195458231954586e-06, "loss": 0.6274, "step": 7621 }, { "epoch": 0.2225336486525941, "grad_norm": 0.7561340042649531, "learning_rate": 4.3193836171938366e-06, "loss": 0.7195, "step": 7622 }, { "epoch": 0.22256284488044145, "grad_norm": 0.7821008508892449, "learning_rate": 4.3192214111922146e-06, "loss": 0.6789, "step": 7623 }, { "epoch": 0.2225920411082888, "grad_norm": 0.7511903299851516, "learning_rate": 4.319059205190593e-06, "loss": 0.7068, "step": 7624 }, { "epoch": 0.22262123733613617, "grad_norm": 0.7575065863332551, "learning_rate": 4.31889699918897e-06, "loss": 0.7498, "step": 7625 }, { "epoch": 0.22265043356398354, "grad_norm": 1.0557748866139363, "learning_rate": 4.318734793187349e-06, "loss": 0.6614, "step": 7626 }, { "epoch": 0.2226796297918309, "grad_norm": 0.7810727643590794, "learning_rate": 4.318572587185727e-06, "loss": 0.6902, "step": 7627 }, { "epoch": 0.22270882601967826, "grad_norm": 0.8818906707121931, "learning_rate": 4.318410381184105e-06, "loss": 0.6946, "step": 7628 }, { "epoch": 0.22273802224752562, "grad_norm": 0.9747119729225203, "learning_rate": 4.318248175182483e-06, "loss": 0.6588, "step": 7629 }, { "epoch": 0.22276721847537298, "grad_norm": 0.7268948797583986, "learning_rate": 4.318085969180861e-06, "loss": 0.6561, "step": 7630 }, { "epoch": 0.22279641470322034, "grad_norm": 0.7464779552416918, "learning_rate": 4.317923763179238e-06, "loss": 0.6854, "step": 7631 }, { "epoch": 0.2228256109310677, "grad_norm": 0.7575639775665867, "learning_rate": 4.317761557177616e-06, "loss": 0.7329, "step": 7632 }, { "epoch": 0.22285480715891506, "grad_norm": 0.7236997397671273, "learning_rate": 4.317599351175994e-06, "loss": 0.6177, "step": 7633 }, { "epoch": 0.22288400338676242, "grad_norm": 0.8730675611857353, "learning_rate": 4.317437145174372e-06, "loss": 0.7413, "step": 7634 }, { "epoch": 0.22291319961460979, "grad_norm": 0.7554004687886992, "learning_rate": 4.31727493917275e-06, "loss": 0.6699, "step": 7635 }, { "epoch": 0.22294239584245715, "grad_norm": 1.2456649234672714, "learning_rate": 4.317112733171128e-06, "loss": 0.9194, "step": 7636 }, { "epoch": 0.2229715920703045, "grad_norm": 0.7635179923619235, "learning_rate": 4.316950527169506e-06, "loss": 0.7053, "step": 7637 }, { "epoch": 0.22300078829815187, "grad_norm": 0.7742074729554468, "learning_rate": 4.316788321167884e-06, "loss": 0.8152, "step": 7638 }, { "epoch": 0.22302998452599923, "grad_norm": 0.8160381519646261, "learning_rate": 4.316626115166261e-06, "loss": 0.7344, "step": 7639 }, { "epoch": 0.2230591807538466, "grad_norm": 0.7585025223025594, "learning_rate": 4.316463909164639e-06, "loss": 0.6975, "step": 7640 }, { "epoch": 0.22308837698169395, "grad_norm": 0.6770776499398033, "learning_rate": 4.316301703163017e-06, "loss": 0.6323, "step": 7641 }, { "epoch": 0.22311757320954131, "grad_norm": 0.7426921626227687, "learning_rate": 4.316139497161395e-06, "loss": 0.6634, "step": 7642 }, { "epoch": 0.22314676943738868, "grad_norm": 0.765087751448106, "learning_rate": 4.315977291159773e-06, "loss": 0.6788, "step": 7643 }, { "epoch": 0.22317596566523606, "grad_norm": 0.7311581398100649, "learning_rate": 4.315815085158151e-06, "loss": 0.6332, "step": 7644 }, { "epoch": 0.22320516189308343, "grad_norm": 0.8417553972492966, "learning_rate": 4.315652879156529e-06, "loss": 0.7542, "step": 7645 }, { "epoch": 0.2232343581209308, "grad_norm": 0.779037365788236, "learning_rate": 4.315490673154907e-06, "loss": 0.7029, "step": 7646 }, { "epoch": 0.22326355434877815, "grad_norm": 0.7249375639709527, "learning_rate": 4.315328467153285e-06, "loss": 0.6577, "step": 7647 }, { "epoch": 0.2232927505766255, "grad_norm": 0.753011851810524, "learning_rate": 4.315166261151663e-06, "loss": 0.6037, "step": 7648 }, { "epoch": 0.22332194680447287, "grad_norm": 0.8095182628880601, "learning_rate": 4.315004055150041e-06, "loss": 0.6553, "step": 7649 }, { "epoch": 0.22335114303232023, "grad_norm": 1.2704256796376598, "learning_rate": 4.314841849148419e-06, "loss": 0.6133, "step": 7650 }, { "epoch": 0.2233803392601676, "grad_norm": 0.7638884925876193, "learning_rate": 4.314679643146797e-06, "loss": 0.7347, "step": 7651 }, { "epoch": 0.22340953548801495, "grad_norm": 0.7359433123163732, "learning_rate": 4.314517437145175e-06, "loss": 0.664, "step": 7652 }, { "epoch": 0.22343873171586232, "grad_norm": 0.7378987282901723, "learning_rate": 4.314355231143553e-06, "loss": 0.6531, "step": 7653 }, { "epoch": 0.22346792794370968, "grad_norm": 0.7431967570680866, "learning_rate": 4.31419302514193e-06, "loss": 0.6143, "step": 7654 }, { "epoch": 0.22349712417155704, "grad_norm": 0.7484283938723238, "learning_rate": 4.314030819140308e-06, "loss": 0.7101, "step": 7655 }, { "epoch": 0.2235263203994044, "grad_norm": 0.7883138033741406, "learning_rate": 4.313868613138686e-06, "loss": 0.7135, "step": 7656 }, { "epoch": 0.22355551662725176, "grad_norm": 0.7119269630305882, "learning_rate": 4.313706407137064e-06, "loss": 0.633, "step": 7657 }, { "epoch": 0.22358471285509912, "grad_norm": 0.7244831804657339, "learning_rate": 4.313544201135442e-06, "loss": 0.703, "step": 7658 }, { "epoch": 0.22361390908294648, "grad_norm": 0.7152808263879612, "learning_rate": 4.31338199513382e-06, "loss": 0.6821, "step": 7659 }, { "epoch": 0.22364310531079384, "grad_norm": 0.8403484121936815, "learning_rate": 4.313219789132198e-06, "loss": 0.805, "step": 7660 }, { "epoch": 0.2236723015386412, "grad_norm": 0.7124527342902763, "learning_rate": 4.313057583130576e-06, "loss": 0.6682, "step": 7661 }, { "epoch": 0.22370149776648857, "grad_norm": 0.7869057143150845, "learning_rate": 4.312895377128954e-06, "loss": 0.7292, "step": 7662 }, { "epoch": 0.22373069399433593, "grad_norm": 0.7423513202298037, "learning_rate": 4.3127331711273315e-06, "loss": 0.6774, "step": 7663 }, { "epoch": 0.2237598902221833, "grad_norm": 0.7802559086225157, "learning_rate": 4.31257096512571e-06, "loss": 0.7472, "step": 7664 }, { "epoch": 0.22378908645003065, "grad_norm": 0.7215277847610501, "learning_rate": 4.312408759124088e-06, "loss": 0.7026, "step": 7665 }, { "epoch": 0.223818282677878, "grad_norm": 0.700536024729184, "learning_rate": 4.312246553122466e-06, "loss": 0.6075, "step": 7666 }, { "epoch": 0.22384747890572537, "grad_norm": 0.8670742847893648, "learning_rate": 4.312084347120844e-06, "loss": 0.8551, "step": 7667 }, { "epoch": 0.22387667513357273, "grad_norm": 0.7209450250302708, "learning_rate": 4.3119221411192215e-06, "loss": 0.6553, "step": 7668 }, { "epoch": 0.2239058713614201, "grad_norm": 0.8009070244380512, "learning_rate": 4.3117599351175995e-06, "loss": 0.6645, "step": 7669 }, { "epoch": 0.22393506758926746, "grad_norm": 0.7373337767055427, "learning_rate": 4.3115977291159775e-06, "loss": 0.6765, "step": 7670 }, { "epoch": 0.22396426381711482, "grad_norm": 0.7535441898358318, "learning_rate": 4.3114355231143555e-06, "loss": 0.6922, "step": 7671 }, { "epoch": 0.22399346004496218, "grad_norm": 0.6542772955042951, "learning_rate": 4.3112733171127335e-06, "loss": 0.574, "step": 7672 }, { "epoch": 0.22402265627280954, "grad_norm": 0.7025260233017294, "learning_rate": 4.3111111111111115e-06, "loss": 0.6592, "step": 7673 }, { "epoch": 0.22405185250065693, "grad_norm": 0.6756127624923366, "learning_rate": 4.3109489051094895e-06, "loss": 0.5848, "step": 7674 }, { "epoch": 0.2240810487285043, "grad_norm": 0.728533606591736, "learning_rate": 4.3107866991078675e-06, "loss": 0.7171, "step": 7675 }, { "epoch": 0.22411024495635165, "grad_norm": 0.7596749514123665, "learning_rate": 4.3106244931062455e-06, "loss": 0.7053, "step": 7676 }, { "epoch": 0.224139441184199, "grad_norm": 0.6992004419978008, "learning_rate": 4.310462287104623e-06, "loss": 0.5866, "step": 7677 }, { "epoch": 0.22416863741204637, "grad_norm": 0.7453767297049211, "learning_rate": 4.310300081103001e-06, "loss": 0.7285, "step": 7678 }, { "epoch": 0.22419783363989373, "grad_norm": 0.7226527940275299, "learning_rate": 4.310137875101379e-06, "loss": 0.669, "step": 7679 }, { "epoch": 0.2242270298677411, "grad_norm": 0.7896154167754857, "learning_rate": 4.309975669099757e-06, "loss": 0.6831, "step": 7680 }, { "epoch": 0.22425622609558846, "grad_norm": 0.7407607699417843, "learning_rate": 4.309813463098135e-06, "loss": 0.6972, "step": 7681 }, { "epoch": 0.22428542232343582, "grad_norm": 0.7233357387852941, "learning_rate": 4.309651257096513e-06, "loss": 0.6513, "step": 7682 }, { "epoch": 0.22431461855128318, "grad_norm": 0.7566257555056431, "learning_rate": 4.309489051094891e-06, "loss": 0.6599, "step": 7683 }, { "epoch": 0.22434381477913054, "grad_norm": 0.7723269236142871, "learning_rate": 4.309326845093269e-06, "loss": 0.6806, "step": 7684 }, { "epoch": 0.2243730110069779, "grad_norm": 0.746206835586124, "learning_rate": 4.309164639091647e-06, "loss": 0.6605, "step": 7685 }, { "epoch": 0.22440220723482526, "grad_norm": 0.6957852101007077, "learning_rate": 4.309002433090025e-06, "loss": 0.638, "step": 7686 }, { "epoch": 0.22443140346267262, "grad_norm": 0.7724977788684715, "learning_rate": 4.308840227088403e-06, "loss": 0.7557, "step": 7687 }, { "epoch": 0.22446059969051999, "grad_norm": 0.7719172640105209, "learning_rate": 4.308678021086781e-06, "loss": 0.7033, "step": 7688 }, { "epoch": 0.22448979591836735, "grad_norm": 0.8145760758675338, "learning_rate": 4.308515815085159e-06, "loss": 0.7483, "step": 7689 }, { "epoch": 0.2245189921462147, "grad_norm": 1.1309958635299482, "learning_rate": 4.308353609083537e-06, "loss": 0.7643, "step": 7690 }, { "epoch": 0.22454818837406207, "grad_norm": 0.7410206265553544, "learning_rate": 4.308191403081915e-06, "loss": 0.6921, "step": 7691 }, { "epoch": 0.22457738460190943, "grad_norm": 0.7090874759816791, "learning_rate": 4.308029197080292e-06, "loss": 0.6522, "step": 7692 }, { "epoch": 0.2246065808297568, "grad_norm": 0.6695012020385541, "learning_rate": 4.30786699107867e-06, "loss": 0.5789, "step": 7693 }, { "epoch": 0.22463577705760415, "grad_norm": 0.7872709197361757, "learning_rate": 4.307704785077048e-06, "loss": 0.6875, "step": 7694 }, { "epoch": 0.2246649732854515, "grad_norm": 0.6647600646544197, "learning_rate": 4.307542579075426e-06, "loss": 0.5742, "step": 7695 }, { "epoch": 0.22469416951329887, "grad_norm": 0.7469556260828905, "learning_rate": 4.307380373073804e-06, "loss": 0.6724, "step": 7696 }, { "epoch": 0.22472336574114624, "grad_norm": 0.7215446105742286, "learning_rate": 4.307218167072182e-06, "loss": 0.6166, "step": 7697 }, { "epoch": 0.2247525619689936, "grad_norm": 0.7005470328104008, "learning_rate": 4.30705596107056e-06, "loss": 0.6415, "step": 7698 }, { "epoch": 0.22478175819684096, "grad_norm": 0.7983578668943441, "learning_rate": 4.306893755068938e-06, "loss": 0.7766, "step": 7699 }, { "epoch": 0.22481095442468832, "grad_norm": 0.701703555797284, "learning_rate": 4.306731549067316e-06, "loss": 0.6502, "step": 7700 }, { "epoch": 0.22484015065253568, "grad_norm": 0.7629174185015286, "learning_rate": 4.306569343065693e-06, "loss": 0.726, "step": 7701 }, { "epoch": 0.22486934688038304, "grad_norm": 0.8146395179454973, "learning_rate": 4.306407137064072e-06, "loss": 0.7279, "step": 7702 }, { "epoch": 0.2248985431082304, "grad_norm": 0.7253228405046659, "learning_rate": 4.30624493106245e-06, "loss": 0.6669, "step": 7703 }, { "epoch": 0.2249277393360778, "grad_norm": 0.7404133901883456, "learning_rate": 4.306082725060828e-06, "loss": 0.6961, "step": 7704 }, { "epoch": 0.22495693556392515, "grad_norm": 0.7723551238367946, "learning_rate": 4.305920519059206e-06, "loss": 0.7062, "step": 7705 }, { "epoch": 0.22498613179177251, "grad_norm": 0.7812702042826019, "learning_rate": 4.305758313057583e-06, "loss": 0.7415, "step": 7706 }, { "epoch": 0.22501532801961988, "grad_norm": 0.76678957723382, "learning_rate": 4.305596107055961e-06, "loss": 0.7133, "step": 7707 }, { "epoch": 0.22504452424746724, "grad_norm": 0.7157622372842278, "learning_rate": 4.305433901054339e-06, "loss": 0.6407, "step": 7708 }, { "epoch": 0.2250737204753146, "grad_norm": 0.7180012089525092, "learning_rate": 4.305271695052717e-06, "loss": 0.6661, "step": 7709 }, { "epoch": 0.22510291670316196, "grad_norm": 0.7512173801058507, "learning_rate": 4.305109489051095e-06, "loss": 0.7267, "step": 7710 }, { "epoch": 0.22513211293100932, "grad_norm": 0.7040859846095908, "learning_rate": 4.304947283049473e-06, "loss": 0.6343, "step": 7711 }, { "epoch": 0.22516130915885668, "grad_norm": 0.7144333400360553, "learning_rate": 4.304785077047851e-06, "loss": 0.5998, "step": 7712 }, { "epoch": 0.22519050538670404, "grad_norm": 0.7921542822801447, "learning_rate": 4.304622871046229e-06, "loss": 0.668, "step": 7713 }, { "epoch": 0.2252197016145514, "grad_norm": 0.7255511826991131, "learning_rate": 4.304460665044607e-06, "loss": 0.6602, "step": 7714 }, { "epoch": 0.22524889784239877, "grad_norm": 0.7671638092314611, "learning_rate": 4.3042984590429844e-06, "loss": 0.729, "step": 7715 }, { "epoch": 0.22527809407024613, "grad_norm": 0.7573461295732024, "learning_rate": 4.3041362530413624e-06, "loss": 0.6152, "step": 7716 }, { "epoch": 0.2253072902980935, "grad_norm": 0.7769619011267386, "learning_rate": 4.3039740470397404e-06, "loss": 0.7579, "step": 7717 }, { "epoch": 0.22533648652594085, "grad_norm": 0.7799531434159692, "learning_rate": 4.3038118410381184e-06, "loss": 0.732, "step": 7718 }, { "epoch": 0.2253656827537882, "grad_norm": 0.7772602160904064, "learning_rate": 4.3036496350364965e-06, "loss": 0.6889, "step": 7719 }, { "epoch": 0.22539487898163557, "grad_norm": 0.7679474915668217, "learning_rate": 4.3034874290348745e-06, "loss": 0.7128, "step": 7720 }, { "epoch": 0.22542407520948293, "grad_norm": 0.7692039844975449, "learning_rate": 4.3033252230332525e-06, "loss": 0.6725, "step": 7721 }, { "epoch": 0.2254532714373303, "grad_norm": 0.8751850467732218, "learning_rate": 4.3031630170316305e-06, "loss": 0.7864, "step": 7722 }, { "epoch": 0.22548246766517765, "grad_norm": 0.7954963588134781, "learning_rate": 4.3030008110300085e-06, "loss": 0.6517, "step": 7723 }, { "epoch": 0.22551166389302502, "grad_norm": 0.7557685570516192, "learning_rate": 4.3028386050283865e-06, "loss": 0.6943, "step": 7724 }, { "epoch": 0.22554086012087238, "grad_norm": 0.7450914935954045, "learning_rate": 4.3026763990267645e-06, "loss": 0.6811, "step": 7725 }, { "epoch": 0.22557005634871974, "grad_norm": 0.7556074429083502, "learning_rate": 4.3025141930251425e-06, "loss": 0.6503, "step": 7726 }, { "epoch": 0.2255992525765671, "grad_norm": 0.7076066379237463, "learning_rate": 4.3023519870235205e-06, "loss": 0.5611, "step": 7727 }, { "epoch": 0.22562844880441446, "grad_norm": 0.733189640648208, "learning_rate": 4.3021897810218985e-06, "loss": 0.6102, "step": 7728 }, { "epoch": 0.22565764503226182, "grad_norm": 0.7259467739871621, "learning_rate": 4.3020275750202765e-06, "loss": 0.6628, "step": 7729 }, { "epoch": 0.22568684126010918, "grad_norm": 0.7551804290304157, "learning_rate": 4.301865369018654e-06, "loss": 0.6657, "step": 7730 }, { "epoch": 0.22571603748795654, "grad_norm": 0.7397988082814666, "learning_rate": 4.301703163017032e-06, "loss": 0.6663, "step": 7731 }, { "epoch": 0.2257452337158039, "grad_norm": 0.696121276432816, "learning_rate": 4.30154095701541e-06, "loss": 0.5969, "step": 7732 }, { "epoch": 0.22577442994365127, "grad_norm": 0.7517186635284878, "learning_rate": 4.301378751013788e-06, "loss": 0.6697, "step": 7733 }, { "epoch": 0.22580362617149866, "grad_norm": 0.7372934583337942, "learning_rate": 4.301216545012166e-06, "loss": 0.6559, "step": 7734 }, { "epoch": 0.22583282239934602, "grad_norm": 0.9311424802379451, "learning_rate": 4.301054339010544e-06, "loss": 0.6928, "step": 7735 }, { "epoch": 0.22586201862719338, "grad_norm": 0.8155089723933605, "learning_rate": 4.300892133008922e-06, "loss": 0.6974, "step": 7736 }, { "epoch": 0.22589121485504074, "grad_norm": 0.7113265532918123, "learning_rate": 4.3007299270073e-06, "loss": 0.6303, "step": 7737 }, { "epoch": 0.2259204110828881, "grad_norm": 0.7858671302944308, "learning_rate": 4.300567721005678e-06, "loss": 0.645, "step": 7738 }, { "epoch": 0.22594960731073546, "grad_norm": 0.7200718049081846, "learning_rate": 4.300405515004055e-06, "loss": 0.6685, "step": 7739 }, { "epoch": 0.22597880353858282, "grad_norm": 0.7323909895312809, "learning_rate": 4.300243309002434e-06, "loss": 0.6413, "step": 7740 }, { "epoch": 0.22600799976643018, "grad_norm": 0.7512772348804271, "learning_rate": 4.300081103000812e-06, "loss": 0.6955, "step": 7741 }, { "epoch": 0.22603719599427755, "grad_norm": 0.717483378192772, "learning_rate": 4.29991889699919e-06, "loss": 0.6187, "step": 7742 }, { "epoch": 0.2260663922221249, "grad_norm": 0.7579588063961085, "learning_rate": 4.299756690997568e-06, "loss": 0.67, "step": 7743 }, { "epoch": 0.22609558844997227, "grad_norm": 0.8083739194364165, "learning_rate": 4.299594484995945e-06, "loss": 0.7289, "step": 7744 }, { "epoch": 0.22612478467781963, "grad_norm": 0.7857771018729445, "learning_rate": 4.299432278994323e-06, "loss": 0.7294, "step": 7745 }, { "epoch": 0.226153980905667, "grad_norm": 0.7943929705886712, "learning_rate": 4.299270072992701e-06, "loss": 0.7208, "step": 7746 }, { "epoch": 0.22618317713351435, "grad_norm": 0.8543818334265919, "learning_rate": 4.299107866991079e-06, "loss": 0.7865, "step": 7747 }, { "epoch": 0.2262123733613617, "grad_norm": 0.7435003551511737, "learning_rate": 4.298945660989457e-06, "loss": 0.6192, "step": 7748 }, { "epoch": 0.22624156958920907, "grad_norm": 0.7329588348795353, "learning_rate": 4.298783454987835e-06, "loss": 0.6169, "step": 7749 }, { "epoch": 0.22627076581705644, "grad_norm": 1.443664511274642, "learning_rate": 4.298621248986213e-06, "loss": 0.721, "step": 7750 }, { "epoch": 0.2262999620449038, "grad_norm": 0.7812436902830826, "learning_rate": 4.298459042984591e-06, "loss": 0.7554, "step": 7751 }, { "epoch": 0.22632915827275116, "grad_norm": 0.7461475222589357, "learning_rate": 4.298296836982969e-06, "loss": 0.6433, "step": 7752 }, { "epoch": 0.22635835450059852, "grad_norm": 0.8205368607308503, "learning_rate": 4.298134630981346e-06, "loss": 0.7095, "step": 7753 }, { "epoch": 0.22638755072844588, "grad_norm": 0.8341952903608653, "learning_rate": 4.297972424979724e-06, "loss": 0.7179, "step": 7754 }, { "epoch": 0.22641674695629324, "grad_norm": 0.6968173474802402, "learning_rate": 4.297810218978102e-06, "loss": 0.6318, "step": 7755 }, { "epoch": 0.2264459431841406, "grad_norm": 0.7667301207102266, "learning_rate": 4.29764801297648e-06, "loss": 0.654, "step": 7756 }, { "epoch": 0.22647513941198796, "grad_norm": 0.6943220312233491, "learning_rate": 4.297485806974858e-06, "loss": 0.6282, "step": 7757 }, { "epoch": 0.22650433563983532, "grad_norm": 0.7500695392823405, "learning_rate": 4.297323600973236e-06, "loss": 0.6526, "step": 7758 }, { "epoch": 0.22653353186768269, "grad_norm": 0.8013263188410302, "learning_rate": 4.297161394971614e-06, "loss": 0.7605, "step": 7759 }, { "epoch": 0.22656272809553005, "grad_norm": 0.7710061582864519, "learning_rate": 4.296999188969992e-06, "loss": 0.6891, "step": 7760 }, { "epoch": 0.2265919243233774, "grad_norm": 0.7506686519044611, "learning_rate": 4.29683698296837e-06, "loss": 0.6626, "step": 7761 }, { "epoch": 0.22662112055122477, "grad_norm": 0.7890677755470314, "learning_rate": 4.296674776966748e-06, "loss": 0.7396, "step": 7762 }, { "epoch": 0.22665031677907213, "grad_norm": 0.8007921183150971, "learning_rate": 4.296512570965126e-06, "loss": 0.6596, "step": 7763 }, { "epoch": 0.22667951300691952, "grad_norm": 0.711984835789968, "learning_rate": 4.296350364963504e-06, "loss": 0.6241, "step": 7764 }, { "epoch": 0.22670870923476688, "grad_norm": 0.6618331977006296, "learning_rate": 4.296188158961882e-06, "loss": 0.5864, "step": 7765 }, { "epoch": 0.22673790546261424, "grad_norm": 0.7465870502789289, "learning_rate": 4.29602595296026e-06, "loss": 0.6367, "step": 7766 }, { "epoch": 0.2267671016904616, "grad_norm": 0.7063761696255768, "learning_rate": 4.295863746958638e-06, "loss": 0.6364, "step": 7767 }, { "epoch": 0.22679629791830896, "grad_norm": 0.7506966815495103, "learning_rate": 4.295701540957015e-06, "loss": 0.7034, "step": 7768 }, { "epoch": 0.22682549414615633, "grad_norm": 0.6484659877010923, "learning_rate": 4.295539334955393e-06, "loss": 0.5216, "step": 7769 }, { "epoch": 0.2268546903740037, "grad_norm": 0.7707778282797659, "learning_rate": 4.295377128953771e-06, "loss": 0.684, "step": 7770 }, { "epoch": 0.22688388660185105, "grad_norm": 0.7184613123660367, "learning_rate": 4.295214922952149e-06, "loss": 0.6775, "step": 7771 }, { "epoch": 0.2269130828296984, "grad_norm": 0.7689280230348907, "learning_rate": 4.2950527169505274e-06, "loss": 0.6684, "step": 7772 }, { "epoch": 0.22694227905754577, "grad_norm": 0.7167510795332801, "learning_rate": 4.2948905109489054e-06, "loss": 0.6485, "step": 7773 }, { "epoch": 0.22697147528539313, "grad_norm": 0.8023076725167665, "learning_rate": 4.2947283049472834e-06, "loss": 0.7807, "step": 7774 }, { "epoch": 0.2270006715132405, "grad_norm": 0.7596157792062087, "learning_rate": 4.2945660989456614e-06, "loss": 0.6508, "step": 7775 }, { "epoch": 0.22702986774108785, "grad_norm": 0.7763449013073215, "learning_rate": 4.2944038929440395e-06, "loss": 0.6635, "step": 7776 }, { "epoch": 0.22705906396893522, "grad_norm": 0.7147826551608817, "learning_rate": 4.2942416869424175e-06, "loss": 0.6197, "step": 7777 }, { "epoch": 0.22708826019678258, "grad_norm": 0.7232300660302611, "learning_rate": 4.2940794809407955e-06, "loss": 0.6426, "step": 7778 }, { "epoch": 0.22711745642462994, "grad_norm": 0.7162756512926249, "learning_rate": 4.2939172749391735e-06, "loss": 0.6171, "step": 7779 }, { "epoch": 0.2271466526524773, "grad_norm": 0.7244121531876653, "learning_rate": 4.2937550689375515e-06, "loss": 0.6418, "step": 7780 }, { "epoch": 0.22717584888032466, "grad_norm": 0.7627323599823177, "learning_rate": 4.2935928629359295e-06, "loss": 0.6774, "step": 7781 }, { "epoch": 0.22720504510817202, "grad_norm": 0.7856115172273491, "learning_rate": 4.293430656934307e-06, "loss": 0.7497, "step": 7782 }, { "epoch": 0.22723424133601938, "grad_norm": 0.7071091358568198, "learning_rate": 4.293268450932685e-06, "loss": 0.6113, "step": 7783 }, { "epoch": 0.22726343756386674, "grad_norm": 0.7130816009564066, "learning_rate": 4.293106244931063e-06, "loss": 0.5783, "step": 7784 }, { "epoch": 0.2272926337917141, "grad_norm": 0.72121866937013, "learning_rate": 4.292944038929441e-06, "loss": 0.6477, "step": 7785 }, { "epoch": 0.22732183001956147, "grad_norm": 0.7229283785409224, "learning_rate": 4.292781832927819e-06, "loss": 0.6286, "step": 7786 }, { "epoch": 0.22735102624740883, "grad_norm": 0.6942748980370607, "learning_rate": 4.292619626926197e-06, "loss": 0.6084, "step": 7787 }, { "epoch": 0.2273802224752562, "grad_norm": 0.7590584867049155, "learning_rate": 4.292457420924575e-06, "loss": 0.6759, "step": 7788 }, { "epoch": 0.22740941870310355, "grad_norm": 0.8467776151590308, "learning_rate": 4.292295214922953e-06, "loss": 0.6977, "step": 7789 }, { "epoch": 0.2274386149309509, "grad_norm": 0.7299337044525385, "learning_rate": 4.292133008921331e-06, "loss": 0.6719, "step": 7790 }, { "epoch": 0.22746781115879827, "grad_norm": 0.7725908602041053, "learning_rate": 4.291970802919708e-06, "loss": 0.6763, "step": 7791 }, { "epoch": 0.22749700738664563, "grad_norm": 0.7388839106570864, "learning_rate": 4.291808596918086e-06, "loss": 0.6822, "step": 7792 }, { "epoch": 0.227526203614493, "grad_norm": 0.7863187079721692, "learning_rate": 4.291646390916464e-06, "loss": 0.7271, "step": 7793 }, { "epoch": 0.22755539984234038, "grad_norm": 0.7411834949845276, "learning_rate": 4.291484184914842e-06, "loss": 0.6508, "step": 7794 }, { "epoch": 0.22758459607018774, "grad_norm": 0.9166285491950731, "learning_rate": 4.29132197891322e-06, "loss": 0.6596, "step": 7795 }, { "epoch": 0.2276137922980351, "grad_norm": 0.74353986285826, "learning_rate": 4.291159772911599e-06, "loss": 0.6742, "step": 7796 }, { "epoch": 0.22764298852588247, "grad_norm": 0.9325071100946567, "learning_rate": 4.290997566909976e-06, "loss": 0.765, "step": 7797 }, { "epoch": 0.22767218475372983, "grad_norm": 0.7445336208094037, "learning_rate": 4.290835360908354e-06, "loss": 0.6722, "step": 7798 }, { "epoch": 0.2277013809815772, "grad_norm": 0.710569439410632, "learning_rate": 4.290673154906732e-06, "loss": 0.6192, "step": 7799 }, { "epoch": 0.22773057720942455, "grad_norm": 0.7437147803851828, "learning_rate": 4.29051094890511e-06, "loss": 0.6309, "step": 7800 }, { "epoch": 0.2277597734372719, "grad_norm": 0.7672755566246963, "learning_rate": 4.290348742903488e-06, "loss": 0.6386, "step": 7801 }, { "epoch": 0.22778896966511927, "grad_norm": 0.7712572420293975, "learning_rate": 4.290186536901866e-06, "loss": 0.6882, "step": 7802 }, { "epoch": 0.22781816589296663, "grad_norm": 0.8793095935881177, "learning_rate": 4.290024330900244e-06, "loss": 0.6382, "step": 7803 }, { "epoch": 0.227847362120814, "grad_norm": 0.7532545863044877, "learning_rate": 4.289862124898622e-06, "loss": 0.6945, "step": 7804 }, { "epoch": 0.22787655834866136, "grad_norm": 0.7478043442493092, "learning_rate": 4.289699918897e-06, "loss": 0.5611, "step": 7805 }, { "epoch": 0.22790575457650872, "grad_norm": 0.7488896596275236, "learning_rate": 4.289537712895377e-06, "loss": 0.7252, "step": 7806 }, { "epoch": 0.22793495080435608, "grad_norm": 0.6897227376169421, "learning_rate": 4.289375506893755e-06, "loss": 0.5944, "step": 7807 }, { "epoch": 0.22796414703220344, "grad_norm": 0.725439192602609, "learning_rate": 4.289213300892133e-06, "loss": 0.7224, "step": 7808 }, { "epoch": 0.2279933432600508, "grad_norm": 0.7199854898549729, "learning_rate": 4.289051094890511e-06, "loss": 0.6727, "step": 7809 }, { "epoch": 0.22802253948789816, "grad_norm": 0.7715206049156308, "learning_rate": 4.288888888888889e-06, "loss": 0.6967, "step": 7810 }, { "epoch": 0.22805173571574552, "grad_norm": 0.8086795759128498, "learning_rate": 4.288726682887267e-06, "loss": 0.7265, "step": 7811 }, { "epoch": 0.22808093194359289, "grad_norm": 0.7409771950622355, "learning_rate": 4.288564476885645e-06, "loss": 0.5637, "step": 7812 }, { "epoch": 0.22811012817144025, "grad_norm": 0.782470806039796, "learning_rate": 4.288402270884023e-06, "loss": 0.7043, "step": 7813 }, { "epoch": 0.2281393243992876, "grad_norm": 0.7684108427880276, "learning_rate": 4.2882400648824e-06, "loss": 0.6685, "step": 7814 }, { "epoch": 0.22816852062713497, "grad_norm": 0.7024411607050074, "learning_rate": 4.288077858880779e-06, "loss": 0.647, "step": 7815 }, { "epoch": 0.22819771685498233, "grad_norm": 0.6855623768969673, "learning_rate": 4.287915652879157e-06, "loss": 0.6187, "step": 7816 }, { "epoch": 0.2282269130828297, "grad_norm": 0.7250926027385567, "learning_rate": 4.287753446877535e-06, "loss": 0.6698, "step": 7817 }, { "epoch": 0.22825610931067705, "grad_norm": 0.814536201716938, "learning_rate": 4.287591240875913e-06, "loss": 0.7073, "step": 7818 }, { "epoch": 0.2282853055385244, "grad_norm": 0.8570217808190906, "learning_rate": 4.287429034874291e-06, "loss": 0.7426, "step": 7819 }, { "epoch": 0.22831450176637177, "grad_norm": 0.7415192944513959, "learning_rate": 4.287266828872668e-06, "loss": 0.6331, "step": 7820 }, { "epoch": 0.22834369799421914, "grad_norm": 0.7094585010444189, "learning_rate": 4.287104622871046e-06, "loss": 0.6457, "step": 7821 }, { "epoch": 0.2283728942220665, "grad_norm": 0.7577809950823997, "learning_rate": 4.286942416869424e-06, "loss": 0.6121, "step": 7822 }, { "epoch": 0.22840209044991386, "grad_norm": 0.7499897702850572, "learning_rate": 4.286780210867802e-06, "loss": 0.7181, "step": 7823 }, { "epoch": 0.22843128667776122, "grad_norm": 0.7052278827925123, "learning_rate": 4.28661800486618e-06, "loss": 0.6156, "step": 7824 }, { "epoch": 0.2284604829056086, "grad_norm": 0.7134618573734175, "learning_rate": 4.286455798864558e-06, "loss": 0.6544, "step": 7825 }, { "epoch": 0.22848967913345597, "grad_norm": 0.7385514322745295, "learning_rate": 4.286293592862936e-06, "loss": 0.6392, "step": 7826 }, { "epoch": 0.22851887536130333, "grad_norm": 0.6590073313963017, "learning_rate": 4.286131386861314e-06, "loss": 0.5429, "step": 7827 }, { "epoch": 0.2285480715891507, "grad_norm": 0.7866776747263998, "learning_rate": 4.285969180859692e-06, "loss": 0.5348, "step": 7828 }, { "epoch": 0.22857726781699805, "grad_norm": 0.7754778765863295, "learning_rate": 4.28580697485807e-06, "loss": 0.6281, "step": 7829 }, { "epoch": 0.22860646404484541, "grad_norm": 0.7682812881784441, "learning_rate": 4.285644768856448e-06, "loss": 0.6226, "step": 7830 }, { "epoch": 0.22863566027269278, "grad_norm": 0.761909738793256, "learning_rate": 4.285482562854826e-06, "loss": 0.6468, "step": 7831 }, { "epoch": 0.22866485650054014, "grad_norm": 0.7840050558979635, "learning_rate": 4.285320356853204e-06, "loss": 0.7139, "step": 7832 }, { "epoch": 0.2286940527283875, "grad_norm": 0.7747318999017162, "learning_rate": 4.285158150851582e-06, "loss": 0.7483, "step": 7833 }, { "epoch": 0.22872324895623486, "grad_norm": 0.7922380771985646, "learning_rate": 4.2849959448499605e-06, "loss": 0.7653, "step": 7834 }, { "epoch": 0.22875244518408222, "grad_norm": 0.7477708837608625, "learning_rate": 4.284833738848338e-06, "loss": 0.656, "step": 7835 }, { "epoch": 0.22878164141192958, "grad_norm": 0.7287114405070115, "learning_rate": 4.284671532846716e-06, "loss": 0.6597, "step": 7836 }, { "epoch": 0.22881083763977694, "grad_norm": 0.7190668653779618, "learning_rate": 4.284509326845094e-06, "loss": 0.6272, "step": 7837 }, { "epoch": 0.2288400338676243, "grad_norm": 0.8046947702623313, "learning_rate": 4.284347120843472e-06, "loss": 0.7751, "step": 7838 }, { "epoch": 0.22886923009547167, "grad_norm": 0.7443386139706881, "learning_rate": 4.28418491484185e-06, "loss": 0.675, "step": 7839 }, { "epoch": 0.22889842632331903, "grad_norm": 0.7655585498877897, "learning_rate": 4.284022708840228e-06, "loss": 0.6967, "step": 7840 }, { "epoch": 0.2289276225511664, "grad_norm": 0.7891731098127319, "learning_rate": 4.283860502838606e-06, "loss": 0.71, "step": 7841 }, { "epoch": 0.22895681877901375, "grad_norm": 0.7903882510881147, "learning_rate": 4.283698296836984e-06, "loss": 0.6472, "step": 7842 }, { "epoch": 0.2289860150068611, "grad_norm": 0.6470474411723602, "learning_rate": 4.283536090835362e-06, "loss": 0.546, "step": 7843 }, { "epoch": 0.22901521123470847, "grad_norm": 0.7639657384428967, "learning_rate": 4.283373884833739e-06, "loss": 0.7221, "step": 7844 }, { "epoch": 0.22904440746255583, "grad_norm": 0.6848555325888942, "learning_rate": 4.283211678832117e-06, "loss": 0.5896, "step": 7845 }, { "epoch": 0.2290736036904032, "grad_norm": 0.7046892999624865, "learning_rate": 4.283049472830495e-06, "loss": 0.643, "step": 7846 }, { "epoch": 0.22910279991825055, "grad_norm": 0.7061857727364786, "learning_rate": 4.282887266828873e-06, "loss": 0.6379, "step": 7847 }, { "epoch": 0.22913199614609792, "grad_norm": 0.7092404385697973, "learning_rate": 4.282725060827251e-06, "loss": 0.6733, "step": 7848 }, { "epoch": 0.22916119237394528, "grad_norm": 8.861611241994527, "learning_rate": 4.282562854825629e-06, "loss": 0.8274, "step": 7849 }, { "epoch": 0.22919038860179264, "grad_norm": 0.7917617405169044, "learning_rate": 4.282400648824007e-06, "loss": 0.7547, "step": 7850 }, { "epoch": 0.22921958482964, "grad_norm": 0.7176071450968448, "learning_rate": 4.282238442822385e-06, "loss": 0.6225, "step": 7851 }, { "epoch": 0.22924878105748736, "grad_norm": 0.6756978515231024, "learning_rate": 4.282076236820762e-06, "loss": 0.5902, "step": 7852 }, { "epoch": 0.22927797728533472, "grad_norm": 0.7449221787731729, "learning_rate": 4.281914030819141e-06, "loss": 0.6642, "step": 7853 }, { "epoch": 0.22930717351318208, "grad_norm": 0.7756086223288668, "learning_rate": 4.281751824817519e-06, "loss": 0.681, "step": 7854 }, { "epoch": 0.22933636974102947, "grad_norm": 0.7689994124172504, "learning_rate": 4.281589618815897e-06, "loss": 0.7283, "step": 7855 }, { "epoch": 0.22936556596887683, "grad_norm": 0.8169122175648371, "learning_rate": 4.281427412814275e-06, "loss": 0.7962, "step": 7856 }, { "epoch": 0.2293947621967242, "grad_norm": 0.8339588373104564, "learning_rate": 4.281265206812653e-06, "loss": 0.6491, "step": 7857 }, { "epoch": 0.22942395842457156, "grad_norm": 0.7782118184755952, "learning_rate": 4.28110300081103e-06, "loss": 0.7005, "step": 7858 }, { "epoch": 0.22945315465241892, "grad_norm": 0.6778855879584726, "learning_rate": 4.280940794809408e-06, "loss": 0.5382, "step": 7859 }, { "epoch": 0.22948235088026628, "grad_norm": 0.7846269329451478, "learning_rate": 4.280778588807786e-06, "loss": 0.6713, "step": 7860 }, { "epoch": 0.22951154710811364, "grad_norm": 0.6808449075472709, "learning_rate": 4.280616382806164e-06, "loss": 0.6487, "step": 7861 }, { "epoch": 0.229540743335961, "grad_norm": 0.7373608712714539, "learning_rate": 4.280454176804542e-06, "loss": 0.7235, "step": 7862 }, { "epoch": 0.22956993956380836, "grad_norm": 0.8132639987131862, "learning_rate": 4.28029197080292e-06, "loss": 0.7847, "step": 7863 }, { "epoch": 0.22959913579165572, "grad_norm": 0.8065234531263085, "learning_rate": 4.280129764801298e-06, "loss": 0.7573, "step": 7864 }, { "epoch": 0.22962833201950308, "grad_norm": 0.7203685686444884, "learning_rate": 4.279967558799676e-06, "loss": 0.6191, "step": 7865 }, { "epoch": 0.22965752824735045, "grad_norm": 0.725997821197134, "learning_rate": 4.279805352798054e-06, "loss": 0.6792, "step": 7866 }, { "epoch": 0.2296867244751978, "grad_norm": 0.7046997868119069, "learning_rate": 4.279643146796431e-06, "loss": 0.642, "step": 7867 }, { "epoch": 0.22971592070304517, "grad_norm": 0.7484035362768313, "learning_rate": 4.279480940794809e-06, "loss": 0.7126, "step": 7868 }, { "epoch": 0.22974511693089253, "grad_norm": 0.6792061193662103, "learning_rate": 4.279318734793187e-06, "loss": 0.6235, "step": 7869 }, { "epoch": 0.2297743131587399, "grad_norm": 0.7683835871051349, "learning_rate": 4.279156528791565e-06, "loss": 0.7337, "step": 7870 }, { "epoch": 0.22980350938658725, "grad_norm": 0.7359755912381323, "learning_rate": 4.278994322789943e-06, "loss": 0.6484, "step": 7871 }, { "epoch": 0.2298327056144346, "grad_norm": 0.7534640804235337, "learning_rate": 4.278832116788322e-06, "loss": 0.7072, "step": 7872 }, { "epoch": 0.22986190184228197, "grad_norm": 0.7254911715587794, "learning_rate": 4.278669910786699e-06, "loss": 0.6626, "step": 7873 }, { "epoch": 0.22989109807012933, "grad_norm": 0.7520408298839544, "learning_rate": 4.278507704785077e-06, "loss": 0.6956, "step": 7874 }, { "epoch": 0.2299202942979767, "grad_norm": 0.7412381505133638, "learning_rate": 4.278345498783455e-06, "loss": 0.7025, "step": 7875 }, { "epoch": 0.22994949052582406, "grad_norm": 0.7951641304028849, "learning_rate": 4.278183292781833e-06, "loss": 0.7164, "step": 7876 }, { "epoch": 0.22997868675367142, "grad_norm": 0.7228685026262993, "learning_rate": 4.278021086780211e-06, "loss": 0.6195, "step": 7877 }, { "epoch": 0.23000788298151878, "grad_norm": 0.7452772935811307, "learning_rate": 4.277858880778589e-06, "loss": 0.6705, "step": 7878 }, { "epoch": 0.23003707920936614, "grad_norm": 0.7638990581665498, "learning_rate": 4.277696674776967e-06, "loss": 0.7158, "step": 7879 }, { "epoch": 0.2300662754372135, "grad_norm": 0.7454303490235187, "learning_rate": 4.277534468775345e-06, "loss": 0.7012, "step": 7880 }, { "epoch": 0.23009547166506086, "grad_norm": 0.7661182374786047, "learning_rate": 4.277372262773723e-06, "loss": 0.7354, "step": 7881 }, { "epoch": 0.23012466789290822, "grad_norm": 0.8480872204199488, "learning_rate": 4.2772100567721005e-06, "loss": 0.7512, "step": 7882 }, { "epoch": 0.23015386412075559, "grad_norm": 0.6953969747759579, "learning_rate": 4.2770478507704786e-06, "loss": 0.5963, "step": 7883 }, { "epoch": 0.23018306034860295, "grad_norm": 0.743903069364747, "learning_rate": 4.2768856447688566e-06, "loss": 0.6556, "step": 7884 }, { "epoch": 0.23021225657645034, "grad_norm": 0.6833634978918343, "learning_rate": 4.2767234387672346e-06, "loss": 0.551, "step": 7885 }, { "epoch": 0.2302414528042977, "grad_norm": 0.7314796349598255, "learning_rate": 4.2765612327656126e-06, "loss": 0.6562, "step": 7886 }, { "epoch": 0.23027064903214506, "grad_norm": 0.7260553624700212, "learning_rate": 4.276399026763991e-06, "loss": 0.6253, "step": 7887 }, { "epoch": 0.23029984525999242, "grad_norm": 0.7254683509945745, "learning_rate": 4.276236820762369e-06, "loss": 0.6845, "step": 7888 }, { "epoch": 0.23032904148783978, "grad_norm": 0.7403341687164046, "learning_rate": 4.276074614760747e-06, "loss": 0.7247, "step": 7889 }, { "epoch": 0.23035823771568714, "grad_norm": 0.7883859355175294, "learning_rate": 4.275912408759124e-06, "loss": 0.7083, "step": 7890 }, { "epoch": 0.2303874339435345, "grad_norm": 0.7946475996662824, "learning_rate": 4.275750202757503e-06, "loss": 0.746, "step": 7891 }, { "epoch": 0.23041663017138186, "grad_norm": 0.7373800531315656, "learning_rate": 4.275587996755881e-06, "loss": 0.6801, "step": 7892 }, { "epoch": 0.23044582639922923, "grad_norm": 0.7602584615999948, "learning_rate": 4.275425790754259e-06, "loss": 0.6653, "step": 7893 }, { "epoch": 0.2304750226270766, "grad_norm": 0.7779988152887103, "learning_rate": 4.275263584752637e-06, "loss": 0.7323, "step": 7894 }, { "epoch": 0.23050421885492395, "grad_norm": 0.7056539557045933, "learning_rate": 4.275101378751015e-06, "loss": 0.628, "step": 7895 }, { "epoch": 0.2305334150827713, "grad_norm": 0.7205527271814509, "learning_rate": 4.274939172749392e-06, "loss": 0.642, "step": 7896 }, { "epoch": 0.23056261131061867, "grad_norm": 0.9492087510890337, "learning_rate": 4.27477696674777e-06, "loss": 0.6117, "step": 7897 }, { "epoch": 0.23059180753846603, "grad_norm": 0.9335430694512793, "learning_rate": 4.274614760746148e-06, "loss": 0.6427, "step": 7898 }, { "epoch": 0.2306210037663134, "grad_norm": 0.9217702876477085, "learning_rate": 4.274452554744526e-06, "loss": 0.8143, "step": 7899 }, { "epoch": 0.23065019999416075, "grad_norm": 0.7150212820060429, "learning_rate": 4.274290348742904e-06, "loss": 0.6633, "step": 7900 }, { "epoch": 0.23067939622200812, "grad_norm": 0.7667422269468772, "learning_rate": 4.274128142741282e-06, "loss": 0.7343, "step": 7901 }, { "epoch": 0.23070859244985548, "grad_norm": 0.7569807581945155, "learning_rate": 4.27396593673966e-06, "loss": 0.6904, "step": 7902 }, { "epoch": 0.23073778867770284, "grad_norm": 0.7155376512594062, "learning_rate": 4.273803730738038e-06, "loss": 0.6466, "step": 7903 }, { "epoch": 0.2307669849055502, "grad_norm": 0.7372989795665176, "learning_rate": 4.273641524736416e-06, "loss": 0.7038, "step": 7904 }, { "epoch": 0.23079618113339756, "grad_norm": 0.8160023755166553, "learning_rate": 4.273479318734793e-06, "loss": 0.8145, "step": 7905 }, { "epoch": 0.23082537736124492, "grad_norm": 0.7259624067858598, "learning_rate": 4.273317112733171e-06, "loss": 0.5655, "step": 7906 }, { "epoch": 0.23085457358909228, "grad_norm": 0.7055467579079271, "learning_rate": 4.273154906731549e-06, "loss": 0.6688, "step": 7907 }, { "epoch": 0.23088376981693964, "grad_norm": 0.829964565963024, "learning_rate": 4.272992700729927e-06, "loss": 0.7415, "step": 7908 }, { "epoch": 0.230912966044787, "grad_norm": 0.7177087768249781, "learning_rate": 4.272830494728305e-06, "loss": 0.6443, "step": 7909 }, { "epoch": 0.23094216227263437, "grad_norm": 0.8127798908847115, "learning_rate": 4.272668288726684e-06, "loss": 0.7061, "step": 7910 }, { "epoch": 0.23097135850048173, "grad_norm": 0.7196606430322979, "learning_rate": 4.272506082725061e-06, "loss": 0.6103, "step": 7911 }, { "epoch": 0.2310005547283291, "grad_norm": 0.7730481754544481, "learning_rate": 4.272343876723439e-06, "loss": 0.6228, "step": 7912 }, { "epoch": 0.23102975095617645, "grad_norm": 0.7467160233331371, "learning_rate": 4.272181670721817e-06, "loss": 0.6692, "step": 7913 }, { "epoch": 0.2310589471840238, "grad_norm": 0.7334823877308183, "learning_rate": 4.272019464720195e-06, "loss": 0.6512, "step": 7914 }, { "epoch": 0.2310881434118712, "grad_norm": 0.6967104100874724, "learning_rate": 4.271857258718573e-06, "loss": 0.5723, "step": 7915 }, { "epoch": 0.23111733963971856, "grad_norm": 0.800875023571375, "learning_rate": 4.271695052716951e-06, "loss": 0.7399, "step": 7916 }, { "epoch": 0.23114653586756592, "grad_norm": 0.7894431333311466, "learning_rate": 4.271532846715329e-06, "loss": 0.7407, "step": 7917 }, { "epoch": 0.23117573209541328, "grad_norm": 0.7692523301917342, "learning_rate": 4.271370640713707e-06, "loss": 0.6546, "step": 7918 }, { "epoch": 0.23120492832326064, "grad_norm": 0.709926347673432, "learning_rate": 4.271208434712085e-06, "loss": 0.627, "step": 7919 }, { "epoch": 0.231234124551108, "grad_norm": 0.7075359935145961, "learning_rate": 4.271046228710462e-06, "loss": 0.6843, "step": 7920 }, { "epoch": 0.23126332077895537, "grad_norm": 0.8515083412986727, "learning_rate": 4.27088402270884e-06, "loss": 0.8074, "step": 7921 }, { "epoch": 0.23129251700680273, "grad_norm": 0.8664754850163888, "learning_rate": 4.270721816707218e-06, "loss": 0.714, "step": 7922 }, { "epoch": 0.2313217132346501, "grad_norm": 0.8095924000605222, "learning_rate": 4.270559610705596e-06, "loss": 0.7026, "step": 7923 }, { "epoch": 0.23135090946249745, "grad_norm": 0.6952507373603392, "learning_rate": 4.270397404703974e-06, "loss": 0.5807, "step": 7924 }, { "epoch": 0.2313801056903448, "grad_norm": 0.7271353586399322, "learning_rate": 4.270235198702352e-06, "loss": 0.5709, "step": 7925 }, { "epoch": 0.23140930191819217, "grad_norm": 0.733072201174189, "learning_rate": 4.27007299270073e-06, "loss": 0.6634, "step": 7926 }, { "epoch": 0.23143849814603953, "grad_norm": 0.6881210811226872, "learning_rate": 4.269910786699108e-06, "loss": 0.6263, "step": 7927 }, { "epoch": 0.2314676943738869, "grad_norm": 0.830945791805623, "learning_rate": 4.269748580697486e-06, "loss": 0.6864, "step": 7928 }, { "epoch": 0.23149689060173426, "grad_norm": 0.7579387145734006, "learning_rate": 4.269586374695864e-06, "loss": 0.7148, "step": 7929 }, { "epoch": 0.23152608682958162, "grad_norm": 0.7824833478025163, "learning_rate": 4.269424168694242e-06, "loss": 0.6869, "step": 7930 }, { "epoch": 0.23155528305742898, "grad_norm": 0.7912864871590725, "learning_rate": 4.26926196269262e-06, "loss": 0.7535, "step": 7931 }, { "epoch": 0.23158447928527634, "grad_norm": 0.7075296102019193, "learning_rate": 4.269099756690998e-06, "loss": 0.5795, "step": 7932 }, { "epoch": 0.2316136755131237, "grad_norm": 0.8316167720987686, "learning_rate": 4.268937550689376e-06, "loss": 0.7753, "step": 7933 }, { "epoch": 0.23164287174097106, "grad_norm": 0.7473298379198507, "learning_rate": 4.2687753446877535e-06, "loss": 0.6091, "step": 7934 }, { "epoch": 0.23167206796881842, "grad_norm": 0.7235781238411048, "learning_rate": 4.2686131386861315e-06, "loss": 0.6598, "step": 7935 }, { "epoch": 0.23170126419666578, "grad_norm": 0.7857049940728112, "learning_rate": 4.2684509326845095e-06, "loss": 0.7254, "step": 7936 }, { "epoch": 0.23173046042451315, "grad_norm": 0.7171228834869575, "learning_rate": 4.2682887266828875e-06, "loss": 0.6429, "step": 7937 }, { "epoch": 0.2317596566523605, "grad_norm": 0.7028861662205383, "learning_rate": 4.2681265206812655e-06, "loss": 0.6835, "step": 7938 }, { "epoch": 0.23178885288020787, "grad_norm": 0.7816108301252698, "learning_rate": 4.2679643146796435e-06, "loss": 0.7331, "step": 7939 }, { "epoch": 0.23181804910805523, "grad_norm": 0.7263758084542694, "learning_rate": 4.2678021086780216e-06, "loss": 0.5433, "step": 7940 }, { "epoch": 0.2318472453359026, "grad_norm": 0.7726437341472561, "learning_rate": 4.2676399026763996e-06, "loss": 0.6884, "step": 7941 }, { "epoch": 0.23187644156374995, "grad_norm": 0.6730559108810242, "learning_rate": 4.2674776966747776e-06, "loss": 0.5743, "step": 7942 }, { "epoch": 0.2319056377915973, "grad_norm": 0.7632107043796263, "learning_rate": 4.267315490673155e-06, "loss": 0.7247, "step": 7943 }, { "epoch": 0.23193483401944467, "grad_norm": 0.745916596565842, "learning_rate": 4.267153284671533e-06, "loss": 0.6331, "step": 7944 }, { "epoch": 0.23196403024729206, "grad_norm": 0.7954091210926701, "learning_rate": 4.266991078669911e-06, "loss": 0.7748, "step": 7945 }, { "epoch": 0.23199322647513942, "grad_norm": 0.6951889209687435, "learning_rate": 4.266828872668289e-06, "loss": 0.6212, "step": 7946 }, { "epoch": 0.23202242270298679, "grad_norm": 0.7333320367003467, "learning_rate": 4.266666666666668e-06, "loss": 0.5988, "step": 7947 }, { "epoch": 0.23205161893083415, "grad_norm": 0.7981322356985312, "learning_rate": 4.266504460665046e-06, "loss": 0.6752, "step": 7948 }, { "epoch": 0.2320808151586815, "grad_norm": 0.7258525305239968, "learning_rate": 4.266342254663423e-06, "loss": 0.5946, "step": 7949 }, { "epoch": 0.23211001138652887, "grad_norm": 0.7579847932100116, "learning_rate": 4.266180048661801e-06, "loss": 0.6458, "step": 7950 }, { "epoch": 0.23213920761437623, "grad_norm": 0.7432250700082736, "learning_rate": 4.266017842660179e-06, "loss": 0.6819, "step": 7951 }, { "epoch": 0.2321684038422236, "grad_norm": 0.6636344109355469, "learning_rate": 4.265855636658557e-06, "loss": 0.5621, "step": 7952 }, { "epoch": 0.23219760007007095, "grad_norm": 0.7115361428345828, "learning_rate": 4.265693430656935e-06, "loss": 0.6578, "step": 7953 }, { "epoch": 0.23222679629791831, "grad_norm": 0.8354416383949433, "learning_rate": 4.265531224655313e-06, "loss": 0.7276, "step": 7954 }, { "epoch": 0.23225599252576568, "grad_norm": 0.7390108931492475, "learning_rate": 4.265369018653691e-06, "loss": 0.6481, "step": 7955 }, { "epoch": 0.23228518875361304, "grad_norm": 0.8435152314295132, "learning_rate": 4.265206812652069e-06, "loss": 0.7561, "step": 7956 }, { "epoch": 0.2323143849814604, "grad_norm": 0.7730269166858131, "learning_rate": 4.265044606650446e-06, "loss": 0.7192, "step": 7957 }, { "epoch": 0.23234358120930776, "grad_norm": 0.8031622575182502, "learning_rate": 4.264882400648824e-06, "loss": 0.7633, "step": 7958 }, { "epoch": 0.23237277743715512, "grad_norm": 0.7071687646228143, "learning_rate": 4.264720194647202e-06, "loss": 0.679, "step": 7959 }, { "epoch": 0.23240197366500248, "grad_norm": 0.8104695133447588, "learning_rate": 4.26455798864558e-06, "loss": 0.6632, "step": 7960 }, { "epoch": 0.23243116989284984, "grad_norm": 0.9851518442935239, "learning_rate": 4.264395782643958e-06, "loss": 0.6829, "step": 7961 }, { "epoch": 0.2324603661206972, "grad_norm": 0.7227729056406745, "learning_rate": 4.264233576642336e-06, "loss": 0.6472, "step": 7962 }, { "epoch": 0.23248956234854457, "grad_norm": 0.7315525702722474, "learning_rate": 4.264071370640714e-06, "loss": 0.6536, "step": 7963 }, { "epoch": 0.23251875857639193, "grad_norm": 0.7458271086304381, "learning_rate": 4.263909164639092e-06, "loss": 0.6852, "step": 7964 }, { "epoch": 0.2325479548042393, "grad_norm": 0.8080054783032853, "learning_rate": 4.26374695863747e-06, "loss": 0.7121, "step": 7965 }, { "epoch": 0.23257715103208665, "grad_norm": 0.7682754621590477, "learning_rate": 4.263584752635848e-06, "loss": 0.6746, "step": 7966 }, { "epoch": 0.232606347259934, "grad_norm": 0.6851641648675378, "learning_rate": 4.263422546634226e-06, "loss": 0.5918, "step": 7967 }, { "epoch": 0.23263554348778137, "grad_norm": 0.7369952685856588, "learning_rate": 4.263260340632604e-06, "loss": 0.6767, "step": 7968 }, { "epoch": 0.23266473971562873, "grad_norm": 0.7078150653686754, "learning_rate": 4.263098134630982e-06, "loss": 0.6608, "step": 7969 }, { "epoch": 0.2326939359434761, "grad_norm": 0.7679324540010746, "learning_rate": 4.26293592862936e-06, "loss": 0.7737, "step": 7970 }, { "epoch": 0.23272313217132345, "grad_norm": 0.7593044521691811, "learning_rate": 4.262773722627738e-06, "loss": 0.7255, "step": 7971 }, { "epoch": 0.23275232839917082, "grad_norm": 0.7674548488156365, "learning_rate": 4.262611516626115e-06, "loss": 0.7173, "step": 7972 }, { "epoch": 0.23278152462701818, "grad_norm": 0.6566452152587068, "learning_rate": 4.262449310624493e-06, "loss": 0.5659, "step": 7973 }, { "epoch": 0.23281072085486554, "grad_norm": 0.7484695176163809, "learning_rate": 4.262287104622871e-06, "loss": 0.6874, "step": 7974 }, { "epoch": 0.23283991708271293, "grad_norm": 0.7115330421903392, "learning_rate": 4.262124898621249e-06, "loss": 0.6413, "step": 7975 }, { "epoch": 0.2328691133105603, "grad_norm": 0.7869248411726301, "learning_rate": 4.261962692619627e-06, "loss": 0.7106, "step": 7976 }, { "epoch": 0.23289830953840765, "grad_norm": 0.733944015154592, "learning_rate": 4.261800486618005e-06, "loss": 0.698, "step": 7977 }, { "epoch": 0.232927505766255, "grad_norm": 0.69216158333615, "learning_rate": 4.261638280616383e-06, "loss": 0.6048, "step": 7978 }, { "epoch": 0.23295670199410237, "grad_norm": 0.7552918353212035, "learning_rate": 4.261476074614761e-06, "loss": 0.697, "step": 7979 }, { "epoch": 0.23298589822194973, "grad_norm": 0.7580079746054608, "learning_rate": 4.261313868613139e-06, "loss": 0.6352, "step": 7980 }, { "epoch": 0.2330150944497971, "grad_norm": 0.727308167944824, "learning_rate": 4.2611516626115164e-06, "loss": 0.6489, "step": 7981 }, { "epoch": 0.23304429067764446, "grad_norm": 0.830505353704367, "learning_rate": 4.2609894566098945e-06, "loss": 0.688, "step": 7982 }, { "epoch": 0.23307348690549182, "grad_norm": 0.7709371039739698, "learning_rate": 4.2608272506082725e-06, "loss": 0.6724, "step": 7983 }, { "epoch": 0.23310268313333918, "grad_norm": 0.7340498158502634, "learning_rate": 4.2606650446066505e-06, "loss": 0.7225, "step": 7984 }, { "epoch": 0.23313187936118654, "grad_norm": 0.7740850262272978, "learning_rate": 4.260502838605029e-06, "loss": 0.7793, "step": 7985 }, { "epoch": 0.2331610755890339, "grad_norm": 0.720628314367511, "learning_rate": 4.260340632603407e-06, "loss": 0.6366, "step": 7986 }, { "epoch": 0.23319027181688126, "grad_norm": 0.8276754596968681, "learning_rate": 4.2601784266017845e-06, "loss": 0.684, "step": 7987 }, { "epoch": 0.23321946804472862, "grad_norm": 0.726084817985009, "learning_rate": 4.2600162206001625e-06, "loss": 0.6672, "step": 7988 }, { "epoch": 0.23324866427257598, "grad_norm": 0.7500744222006701, "learning_rate": 4.2598540145985405e-06, "loss": 0.7111, "step": 7989 }, { "epoch": 0.23327786050042335, "grad_norm": 0.7279287161261001, "learning_rate": 4.2596918085969185e-06, "loss": 0.6287, "step": 7990 }, { "epoch": 0.2333070567282707, "grad_norm": 0.8771604733406868, "learning_rate": 4.2595296025952965e-06, "loss": 0.6919, "step": 7991 }, { "epoch": 0.23333625295611807, "grad_norm": 0.8164039155075729, "learning_rate": 4.2593673965936745e-06, "loss": 0.7688, "step": 7992 }, { "epoch": 0.23336544918396543, "grad_norm": 0.7231760065796, "learning_rate": 4.2592051905920525e-06, "loss": 0.6121, "step": 7993 }, { "epoch": 0.2333946454118128, "grad_norm": 0.679627576170397, "learning_rate": 4.2590429845904305e-06, "loss": 0.5574, "step": 7994 }, { "epoch": 0.23342384163966015, "grad_norm": 0.7187932835597329, "learning_rate": 4.258880778588808e-06, "loss": 0.6515, "step": 7995 }, { "epoch": 0.2334530378675075, "grad_norm": 0.7795159554474504, "learning_rate": 4.258718572587186e-06, "loss": 0.6131, "step": 7996 }, { "epoch": 0.23348223409535487, "grad_norm": 0.6682309042503064, "learning_rate": 4.258556366585564e-06, "loss": 0.577, "step": 7997 }, { "epoch": 0.23351143032320223, "grad_norm": 0.7691999051072175, "learning_rate": 4.258394160583942e-06, "loss": 0.7523, "step": 7998 }, { "epoch": 0.2335406265510496, "grad_norm": 0.7233757180426915, "learning_rate": 4.25823195458232e-06, "loss": 0.6048, "step": 7999 }, { "epoch": 0.23356982277889696, "grad_norm": 0.7392184202225522, "learning_rate": 4.258069748580698e-06, "loss": 0.6523, "step": 8000 }, { "epoch": 0.23359901900674432, "grad_norm": 0.7412621291360205, "learning_rate": 4.257907542579076e-06, "loss": 0.6686, "step": 8001 }, { "epoch": 0.23362821523459168, "grad_norm": 0.7791802140057356, "learning_rate": 4.257745336577454e-06, "loss": 0.645, "step": 8002 }, { "epoch": 0.23365741146243904, "grad_norm": 0.7979478163038031, "learning_rate": 4.257583130575832e-06, "loss": 0.6913, "step": 8003 }, { "epoch": 0.2336866076902864, "grad_norm": 0.8120945136268396, "learning_rate": 4.25742092457421e-06, "loss": 0.6968, "step": 8004 }, { "epoch": 0.2337158039181338, "grad_norm": 0.7968560512389216, "learning_rate": 4.257258718572588e-06, "loss": 0.7492, "step": 8005 }, { "epoch": 0.23374500014598115, "grad_norm": 0.8076205748921688, "learning_rate": 4.257096512570966e-06, "loss": 0.6522, "step": 8006 }, { "epoch": 0.2337741963738285, "grad_norm": 1.078822978963844, "learning_rate": 4.256934306569344e-06, "loss": 0.6619, "step": 8007 }, { "epoch": 0.23380339260167587, "grad_norm": 0.744355066898721, "learning_rate": 4.256772100567722e-06, "loss": 0.6969, "step": 8008 }, { "epoch": 0.23383258882952324, "grad_norm": 0.7553748587105236, "learning_rate": 4.2566098945661e-06, "loss": 0.7143, "step": 8009 }, { "epoch": 0.2338617850573706, "grad_norm": 0.6911610773039032, "learning_rate": 4.256447688564477e-06, "loss": 0.5905, "step": 8010 }, { "epoch": 0.23389098128521796, "grad_norm": 0.7765655292918583, "learning_rate": 4.256285482562855e-06, "loss": 0.7236, "step": 8011 }, { "epoch": 0.23392017751306532, "grad_norm": 0.7165007572996012, "learning_rate": 4.256123276561233e-06, "loss": 0.6237, "step": 8012 }, { "epoch": 0.23394937374091268, "grad_norm": 0.669015721251367, "learning_rate": 4.255961070559611e-06, "loss": 0.5589, "step": 8013 }, { "epoch": 0.23397856996876004, "grad_norm": 0.7623138575455798, "learning_rate": 4.255798864557989e-06, "loss": 0.6806, "step": 8014 }, { "epoch": 0.2340077661966074, "grad_norm": 0.771620136340398, "learning_rate": 4.255636658556367e-06, "loss": 0.7132, "step": 8015 }, { "epoch": 0.23403696242445476, "grad_norm": 0.87693420370128, "learning_rate": 4.255474452554745e-06, "loss": 0.6198, "step": 8016 }, { "epoch": 0.23406615865230213, "grad_norm": 0.8726031636274252, "learning_rate": 4.255312246553123e-06, "loss": 0.719, "step": 8017 }, { "epoch": 0.2340953548801495, "grad_norm": 0.7354859732559571, "learning_rate": 4.255150040551501e-06, "loss": 0.6676, "step": 8018 }, { "epoch": 0.23412455110799685, "grad_norm": 0.6938067901400619, "learning_rate": 4.254987834549878e-06, "loss": 0.6122, "step": 8019 }, { "epoch": 0.2341537473358442, "grad_norm": 0.7893664779166616, "learning_rate": 4.254825628548256e-06, "loss": 0.73, "step": 8020 }, { "epoch": 0.23418294356369157, "grad_norm": 0.821974358749223, "learning_rate": 4.254663422546634e-06, "loss": 0.7189, "step": 8021 }, { "epoch": 0.23421213979153893, "grad_norm": 0.8005931159422054, "learning_rate": 4.254501216545012e-06, "loss": 0.7694, "step": 8022 }, { "epoch": 0.2342413360193863, "grad_norm": 0.7157174674430276, "learning_rate": 4.254339010543391e-06, "loss": 0.6684, "step": 8023 }, { "epoch": 0.23427053224723365, "grad_norm": 0.7632642254557406, "learning_rate": 4.254176804541769e-06, "loss": 0.6636, "step": 8024 }, { "epoch": 0.23429972847508101, "grad_norm": 0.7853255145524737, "learning_rate": 4.254014598540146e-06, "loss": 0.676, "step": 8025 }, { "epoch": 0.23432892470292838, "grad_norm": 0.6901236405609598, "learning_rate": 4.253852392538524e-06, "loss": 0.5785, "step": 8026 }, { "epoch": 0.23435812093077574, "grad_norm": 0.712846576021322, "learning_rate": 4.253690186536902e-06, "loss": 0.6657, "step": 8027 }, { "epoch": 0.2343873171586231, "grad_norm": 0.7485648275554945, "learning_rate": 4.25352798053528e-06, "loss": 0.7003, "step": 8028 }, { "epoch": 0.23441651338647046, "grad_norm": 0.9312137043732581, "learning_rate": 4.253365774533658e-06, "loss": 0.7086, "step": 8029 }, { "epoch": 0.23444570961431782, "grad_norm": 0.7889815931790525, "learning_rate": 4.253203568532036e-06, "loss": 0.692, "step": 8030 }, { "epoch": 0.23447490584216518, "grad_norm": 0.854889722648953, "learning_rate": 4.253041362530414e-06, "loss": 0.7183, "step": 8031 }, { "epoch": 0.23450410207001254, "grad_norm": 0.722664513598926, "learning_rate": 4.252879156528792e-06, "loss": 0.6258, "step": 8032 }, { "epoch": 0.2345332982978599, "grad_norm": 0.6925827424171346, "learning_rate": 4.252716950527169e-06, "loss": 0.6055, "step": 8033 }, { "epoch": 0.23456249452570727, "grad_norm": 0.7191601544135474, "learning_rate": 4.252554744525547e-06, "loss": 0.5854, "step": 8034 }, { "epoch": 0.23459169075355463, "grad_norm": 0.7588529092773332, "learning_rate": 4.2523925385239254e-06, "loss": 0.6974, "step": 8035 }, { "epoch": 0.23462088698140202, "grad_norm": 0.6742752721187109, "learning_rate": 4.2522303325223034e-06, "loss": 0.5414, "step": 8036 }, { "epoch": 0.23465008320924938, "grad_norm": 0.7741703730797148, "learning_rate": 4.2520681265206814e-06, "loss": 0.6736, "step": 8037 }, { "epoch": 0.23467927943709674, "grad_norm": 0.7793380216145859, "learning_rate": 4.2519059205190594e-06, "loss": 0.7277, "step": 8038 }, { "epoch": 0.2347084756649441, "grad_norm": 0.7541816074603237, "learning_rate": 4.2517437145174375e-06, "loss": 0.6929, "step": 8039 }, { "epoch": 0.23473767189279146, "grad_norm": 0.7404093421169798, "learning_rate": 4.2515815085158155e-06, "loss": 0.6486, "step": 8040 }, { "epoch": 0.23476686812063882, "grad_norm": 0.731023599508064, "learning_rate": 4.2514193025141935e-06, "loss": 0.6846, "step": 8041 }, { "epoch": 0.23479606434848618, "grad_norm": 0.9312661006546304, "learning_rate": 4.2512570965125715e-06, "loss": 0.7646, "step": 8042 }, { "epoch": 0.23482526057633354, "grad_norm": 0.7496911975608169, "learning_rate": 4.2510948905109495e-06, "loss": 0.7152, "step": 8043 }, { "epoch": 0.2348544568041809, "grad_norm": 0.7351415912426654, "learning_rate": 4.2509326845093275e-06, "loss": 0.7004, "step": 8044 }, { "epoch": 0.23488365303202827, "grad_norm": 0.7921974406837784, "learning_rate": 4.2507704785077055e-06, "loss": 0.6805, "step": 8045 }, { "epoch": 0.23491284925987563, "grad_norm": 0.7737814753914041, "learning_rate": 4.2506082725060835e-06, "loss": 0.6434, "step": 8046 }, { "epoch": 0.234942045487723, "grad_norm": 0.72328663985572, "learning_rate": 4.2504460665044615e-06, "loss": 0.7054, "step": 8047 }, { "epoch": 0.23497124171557035, "grad_norm": 0.8297705889210616, "learning_rate": 4.250283860502839e-06, "loss": 0.7688, "step": 8048 }, { "epoch": 0.2350004379434177, "grad_norm": 0.8115346506234535, "learning_rate": 4.250121654501217e-06, "loss": 0.7744, "step": 8049 }, { "epoch": 0.23502963417126507, "grad_norm": 0.8411878752505089, "learning_rate": 4.249959448499595e-06, "loss": 0.7882, "step": 8050 }, { "epoch": 0.23505883039911243, "grad_norm": 0.7590541810416942, "learning_rate": 4.249797242497973e-06, "loss": 0.6468, "step": 8051 }, { "epoch": 0.2350880266269598, "grad_norm": 0.7779899750651632, "learning_rate": 4.249635036496351e-06, "loss": 0.7523, "step": 8052 }, { "epoch": 0.23511722285480716, "grad_norm": 0.7445063283039454, "learning_rate": 4.249472830494729e-06, "loss": 0.7511, "step": 8053 }, { "epoch": 0.23514641908265452, "grad_norm": 0.7099632588182059, "learning_rate": 4.249310624493107e-06, "loss": 0.6395, "step": 8054 }, { "epoch": 0.23517561531050188, "grad_norm": 0.7460152466176571, "learning_rate": 4.249148418491485e-06, "loss": 0.6636, "step": 8055 }, { "epoch": 0.23520481153834924, "grad_norm": 0.8580968510351736, "learning_rate": 4.248986212489863e-06, "loss": 0.7795, "step": 8056 }, { "epoch": 0.2352340077661966, "grad_norm": 0.7362612906013118, "learning_rate": 4.24882400648824e-06, "loss": 0.6649, "step": 8057 }, { "epoch": 0.23526320399404396, "grad_norm": 0.7476592766644049, "learning_rate": 4.248661800486618e-06, "loss": 0.6946, "step": 8058 }, { "epoch": 0.23529240022189132, "grad_norm": 0.7418988641530145, "learning_rate": 4.248499594484996e-06, "loss": 0.6498, "step": 8059 }, { "epoch": 0.23532159644973868, "grad_norm": 0.7377107658002195, "learning_rate": 4.248337388483374e-06, "loss": 0.6323, "step": 8060 }, { "epoch": 0.23535079267758605, "grad_norm": 0.7556055065087668, "learning_rate": 4.248175182481753e-06, "loss": 0.7395, "step": 8061 }, { "epoch": 0.2353799889054334, "grad_norm": 0.7462448168990659, "learning_rate": 4.24801297648013e-06, "loss": 0.692, "step": 8062 }, { "epoch": 0.23540918513328077, "grad_norm": 0.7319349378821488, "learning_rate": 4.247850770478508e-06, "loss": 0.656, "step": 8063 }, { "epoch": 0.23543838136112813, "grad_norm": 0.7267565485008045, "learning_rate": 4.247688564476886e-06, "loss": 0.6596, "step": 8064 }, { "epoch": 0.2354675775889755, "grad_norm": 0.7460904504916105, "learning_rate": 4.247526358475264e-06, "loss": 0.6501, "step": 8065 }, { "epoch": 0.23549677381682288, "grad_norm": 0.7415322965675805, "learning_rate": 4.247364152473642e-06, "loss": 0.6647, "step": 8066 }, { "epoch": 0.23552597004467024, "grad_norm": 0.7991151454163736, "learning_rate": 4.24720194647202e-06, "loss": 0.742, "step": 8067 }, { "epoch": 0.2355551662725176, "grad_norm": 0.69162579291303, "learning_rate": 4.247039740470398e-06, "loss": 0.6094, "step": 8068 }, { "epoch": 0.23558436250036496, "grad_norm": 0.7268190783248647, "learning_rate": 4.246877534468776e-06, "loss": 0.6711, "step": 8069 }, { "epoch": 0.23561355872821232, "grad_norm": 0.718648548144759, "learning_rate": 4.246715328467154e-06, "loss": 0.6753, "step": 8070 }, { "epoch": 0.23564275495605969, "grad_norm": 0.7261648871223191, "learning_rate": 4.246553122465531e-06, "loss": 0.6313, "step": 8071 }, { "epoch": 0.23567195118390705, "grad_norm": 0.7562005588580882, "learning_rate": 4.246390916463909e-06, "loss": 0.6688, "step": 8072 }, { "epoch": 0.2357011474117544, "grad_norm": 0.6862646078069455, "learning_rate": 4.246228710462287e-06, "loss": 0.6266, "step": 8073 }, { "epoch": 0.23573034363960177, "grad_norm": 0.788118565760285, "learning_rate": 4.246066504460665e-06, "loss": 0.7797, "step": 8074 }, { "epoch": 0.23575953986744913, "grad_norm": 0.7510290447411725, "learning_rate": 4.245904298459043e-06, "loss": 0.6507, "step": 8075 }, { "epoch": 0.2357887360952965, "grad_norm": 0.7457544841572457, "learning_rate": 4.245742092457421e-06, "loss": 0.6231, "step": 8076 }, { "epoch": 0.23581793232314385, "grad_norm": 0.8671295439069062, "learning_rate": 4.245579886455799e-06, "loss": 0.7422, "step": 8077 }, { "epoch": 0.23584712855099121, "grad_norm": 0.7621855808824932, "learning_rate": 4.245417680454177e-06, "loss": 0.7109, "step": 8078 }, { "epoch": 0.23587632477883858, "grad_norm": 0.7102607358797172, "learning_rate": 4.245255474452555e-06, "loss": 0.6632, "step": 8079 }, { "epoch": 0.23590552100668594, "grad_norm": 0.7637194782354313, "learning_rate": 4.245093268450933e-06, "loss": 0.7098, "step": 8080 }, { "epoch": 0.2359347172345333, "grad_norm": 0.7637571250506714, "learning_rate": 4.244931062449311e-06, "loss": 0.7385, "step": 8081 }, { "epoch": 0.23596391346238066, "grad_norm": 0.7586896368672068, "learning_rate": 4.244768856447689e-06, "loss": 0.7195, "step": 8082 }, { "epoch": 0.23599310969022802, "grad_norm": 0.7938763837400105, "learning_rate": 4.244606650446067e-06, "loss": 0.7393, "step": 8083 }, { "epoch": 0.23602230591807538, "grad_norm": 0.7265852266620118, "learning_rate": 4.244444444444445e-06, "loss": 0.6087, "step": 8084 }, { "epoch": 0.23605150214592274, "grad_norm": 0.9258502362831689, "learning_rate": 4.244282238442823e-06, "loss": 0.75, "step": 8085 }, { "epoch": 0.2360806983737701, "grad_norm": 0.7821297876915143, "learning_rate": 4.2441200324412e-06, "loss": 0.7115, "step": 8086 }, { "epoch": 0.23610989460161746, "grad_norm": 0.7515093558267462, "learning_rate": 4.243957826439578e-06, "loss": 0.7176, "step": 8087 }, { "epoch": 0.23613909082946483, "grad_norm": 0.7599000177967834, "learning_rate": 4.243795620437956e-06, "loss": 0.7706, "step": 8088 }, { "epoch": 0.2361682870573122, "grad_norm": 0.7645141221078813, "learning_rate": 4.243633414436334e-06, "loss": 0.6753, "step": 8089 }, { "epoch": 0.23619748328515955, "grad_norm": 0.8051366730201914, "learning_rate": 4.243471208434712e-06, "loss": 0.8108, "step": 8090 }, { "epoch": 0.2362266795130069, "grad_norm": 0.7612960048588857, "learning_rate": 4.24330900243309e-06, "loss": 0.6764, "step": 8091 }, { "epoch": 0.23625587574085427, "grad_norm": 0.7607563175198737, "learning_rate": 4.2431467964314684e-06, "loss": 0.6549, "step": 8092 }, { "epoch": 0.23628507196870163, "grad_norm": 0.7873114002414412, "learning_rate": 4.2429845904298464e-06, "loss": 0.7251, "step": 8093 }, { "epoch": 0.236314268196549, "grad_norm": 0.8095276546217498, "learning_rate": 4.2428223844282244e-06, "loss": 0.7012, "step": 8094 }, { "epoch": 0.23634346442439635, "grad_norm": 0.7161111922549039, "learning_rate": 4.242660178426602e-06, "loss": 0.6307, "step": 8095 }, { "epoch": 0.23637266065224374, "grad_norm": 0.730696339446343, "learning_rate": 4.24249797242498e-06, "loss": 0.5779, "step": 8096 }, { "epoch": 0.2364018568800911, "grad_norm": 0.7565642597436061, "learning_rate": 4.242335766423358e-06, "loss": 0.6431, "step": 8097 }, { "epoch": 0.23643105310793847, "grad_norm": 0.8206312828481777, "learning_rate": 4.2421735604217365e-06, "loss": 0.8024, "step": 8098 }, { "epoch": 0.23646024933578583, "grad_norm": 0.8913613448157595, "learning_rate": 4.2420113544201145e-06, "loss": 0.6325, "step": 8099 }, { "epoch": 0.2364894455636332, "grad_norm": 0.744972018369488, "learning_rate": 4.241849148418492e-06, "loss": 0.6933, "step": 8100 }, { "epoch": 0.23651864179148055, "grad_norm": 0.7720683591297509, "learning_rate": 4.24168694241687e-06, "loss": 0.7243, "step": 8101 }, { "epoch": 0.2365478380193279, "grad_norm": 0.7991428392090818, "learning_rate": 4.241524736415248e-06, "loss": 0.673, "step": 8102 }, { "epoch": 0.23657703424717527, "grad_norm": 0.7253717205699071, "learning_rate": 4.241362530413626e-06, "loss": 0.6151, "step": 8103 }, { "epoch": 0.23660623047502263, "grad_norm": 0.7323779642559135, "learning_rate": 4.241200324412004e-06, "loss": 0.7259, "step": 8104 }, { "epoch": 0.23663542670287, "grad_norm": 0.7359573668376735, "learning_rate": 4.241038118410382e-06, "loss": 0.6254, "step": 8105 }, { "epoch": 0.23666462293071736, "grad_norm": 0.7226155771709939, "learning_rate": 4.24087591240876e-06, "loss": 0.6644, "step": 8106 }, { "epoch": 0.23669381915856472, "grad_norm": 0.7608159057588909, "learning_rate": 4.240713706407138e-06, "loss": 0.6285, "step": 8107 }, { "epoch": 0.23672301538641208, "grad_norm": 0.8698862296140905, "learning_rate": 4.240551500405516e-06, "loss": 0.7037, "step": 8108 }, { "epoch": 0.23675221161425944, "grad_norm": 0.811950504754915, "learning_rate": 4.240389294403893e-06, "loss": 0.7196, "step": 8109 }, { "epoch": 0.2367814078421068, "grad_norm": 0.6945833726792788, "learning_rate": 4.240227088402271e-06, "loss": 0.6015, "step": 8110 }, { "epoch": 0.23681060406995416, "grad_norm": 0.7432727999179157, "learning_rate": 4.240064882400649e-06, "loss": 0.6955, "step": 8111 }, { "epoch": 0.23683980029780152, "grad_norm": 0.8403247209465805, "learning_rate": 4.239902676399027e-06, "loss": 0.8333, "step": 8112 }, { "epoch": 0.23686899652564888, "grad_norm": 0.787219915788785, "learning_rate": 4.239740470397405e-06, "loss": 0.6926, "step": 8113 }, { "epoch": 0.23689819275349625, "grad_norm": 0.7366395508411187, "learning_rate": 4.239578264395783e-06, "loss": 0.6298, "step": 8114 }, { "epoch": 0.2369273889813436, "grad_norm": 0.7352627087079329, "learning_rate": 4.239416058394161e-06, "loss": 0.7192, "step": 8115 }, { "epoch": 0.23695658520919097, "grad_norm": 0.6808618010805063, "learning_rate": 4.239253852392539e-06, "loss": 0.6133, "step": 8116 }, { "epoch": 0.23698578143703833, "grad_norm": 0.7537137696258845, "learning_rate": 4.239091646390917e-06, "loss": 0.67, "step": 8117 }, { "epoch": 0.2370149776648857, "grad_norm": 0.700655139497497, "learning_rate": 4.238929440389295e-06, "loss": 0.6295, "step": 8118 }, { "epoch": 0.23704417389273305, "grad_norm": 0.7973174663356118, "learning_rate": 4.238767234387673e-06, "loss": 0.7224, "step": 8119 }, { "epoch": 0.2370733701205804, "grad_norm": 0.6608203703094204, "learning_rate": 4.238605028386051e-06, "loss": 0.5794, "step": 8120 }, { "epoch": 0.23710256634842777, "grad_norm": 0.6936429088721221, "learning_rate": 4.238442822384429e-06, "loss": 0.6475, "step": 8121 }, { "epoch": 0.23713176257627513, "grad_norm": 0.6449579913428316, "learning_rate": 4.238280616382807e-06, "loss": 0.5111, "step": 8122 }, { "epoch": 0.2371609588041225, "grad_norm": 0.7819583034614929, "learning_rate": 4.238118410381185e-06, "loss": 0.7583, "step": 8123 }, { "epoch": 0.23719015503196986, "grad_norm": 0.8213148359958444, "learning_rate": 4.237956204379562e-06, "loss": 0.7477, "step": 8124 }, { "epoch": 0.23721935125981722, "grad_norm": 0.6832025787099525, "learning_rate": 4.23779399837794e-06, "loss": 0.6131, "step": 8125 }, { "epoch": 0.2372485474876646, "grad_norm": 0.7458255469532737, "learning_rate": 4.237631792376318e-06, "loss": 0.7092, "step": 8126 }, { "epoch": 0.23727774371551197, "grad_norm": 0.82078421107101, "learning_rate": 4.237469586374696e-06, "loss": 0.7448, "step": 8127 }, { "epoch": 0.23730693994335933, "grad_norm": 0.7588159279677433, "learning_rate": 4.237307380373074e-06, "loss": 0.6478, "step": 8128 }, { "epoch": 0.2373361361712067, "grad_norm": 0.7350694238236268, "learning_rate": 4.237145174371452e-06, "loss": 0.6058, "step": 8129 }, { "epoch": 0.23736533239905405, "grad_norm": 0.7246100628572435, "learning_rate": 4.23698296836983e-06, "loss": 0.6402, "step": 8130 }, { "epoch": 0.2373945286269014, "grad_norm": 0.7593240462652755, "learning_rate": 4.236820762368208e-06, "loss": 0.686, "step": 8131 }, { "epoch": 0.23742372485474877, "grad_norm": 0.7405975913511615, "learning_rate": 4.236658556366586e-06, "loss": 0.703, "step": 8132 }, { "epoch": 0.23745292108259614, "grad_norm": 0.7404592541868107, "learning_rate": 4.236496350364963e-06, "loss": 0.7515, "step": 8133 }, { "epoch": 0.2374821173104435, "grad_norm": 0.7278808992921517, "learning_rate": 4.236334144363341e-06, "loss": 0.6639, "step": 8134 }, { "epoch": 0.23751131353829086, "grad_norm": 0.7698389527839634, "learning_rate": 4.236171938361719e-06, "loss": 0.6996, "step": 8135 }, { "epoch": 0.23754050976613822, "grad_norm": 0.681200714398424, "learning_rate": 4.236009732360098e-06, "loss": 0.5876, "step": 8136 }, { "epoch": 0.23756970599398558, "grad_norm": 0.7093745327234879, "learning_rate": 4.235847526358476e-06, "loss": 0.6543, "step": 8137 }, { "epoch": 0.23759890222183294, "grad_norm": 0.8124498585898305, "learning_rate": 4.235685320356853e-06, "loss": 0.716, "step": 8138 }, { "epoch": 0.2376280984496803, "grad_norm": 0.6902634795760167, "learning_rate": 4.235523114355231e-06, "loss": 0.5997, "step": 8139 }, { "epoch": 0.23765729467752766, "grad_norm": 0.7014043471877045, "learning_rate": 4.235360908353609e-06, "loss": 0.64, "step": 8140 }, { "epoch": 0.23768649090537503, "grad_norm": 0.7277619875710124, "learning_rate": 4.235198702351987e-06, "loss": 0.6785, "step": 8141 }, { "epoch": 0.2377156871332224, "grad_norm": 0.7126967677985515, "learning_rate": 4.235036496350365e-06, "loss": 0.5704, "step": 8142 }, { "epoch": 0.23774488336106975, "grad_norm": 0.7555045298417343, "learning_rate": 4.234874290348743e-06, "loss": 0.705, "step": 8143 }, { "epoch": 0.2377740795889171, "grad_norm": 0.6766769298078746, "learning_rate": 4.234712084347121e-06, "loss": 0.6052, "step": 8144 }, { "epoch": 0.23780327581676447, "grad_norm": 0.9072764165223973, "learning_rate": 4.234549878345499e-06, "loss": 0.6997, "step": 8145 }, { "epoch": 0.23783247204461183, "grad_norm": 0.723396926455625, "learning_rate": 4.234387672343877e-06, "loss": 0.6473, "step": 8146 }, { "epoch": 0.2378616682724592, "grad_norm": 0.7710927488362763, "learning_rate": 4.2342254663422546e-06, "loss": 0.7339, "step": 8147 }, { "epoch": 0.23789086450030655, "grad_norm": 0.7079518987845431, "learning_rate": 4.2340632603406326e-06, "loss": 0.6295, "step": 8148 }, { "epoch": 0.23792006072815391, "grad_norm": 0.7357018406130155, "learning_rate": 4.233901054339011e-06, "loss": 0.6536, "step": 8149 }, { "epoch": 0.23794925695600128, "grad_norm": 0.67750546242318, "learning_rate": 4.233738848337389e-06, "loss": 0.6181, "step": 8150 }, { "epoch": 0.23797845318384864, "grad_norm": 0.896214978917812, "learning_rate": 4.233576642335767e-06, "loss": 0.5954, "step": 8151 }, { "epoch": 0.238007649411696, "grad_norm": 0.7199008883072925, "learning_rate": 4.233414436334145e-06, "loss": 0.6963, "step": 8152 }, { "epoch": 0.23803684563954336, "grad_norm": 0.7141321286561328, "learning_rate": 4.233252230332523e-06, "loss": 0.6816, "step": 8153 }, { "epoch": 0.23806604186739072, "grad_norm": 0.8100378689581237, "learning_rate": 4.233090024330901e-06, "loss": 0.7638, "step": 8154 }, { "epoch": 0.23809523809523808, "grad_norm": 0.7305598525702068, "learning_rate": 4.232927818329279e-06, "loss": 0.6868, "step": 8155 }, { "epoch": 0.23812443432308547, "grad_norm": 0.7819566329822318, "learning_rate": 4.232765612327657e-06, "loss": 0.6983, "step": 8156 }, { "epoch": 0.23815363055093283, "grad_norm": 0.7728045497007388, "learning_rate": 4.232603406326035e-06, "loss": 0.6303, "step": 8157 }, { "epoch": 0.2381828267787802, "grad_norm": 0.7691461863319513, "learning_rate": 4.232441200324413e-06, "loss": 0.6675, "step": 8158 }, { "epoch": 0.23821202300662755, "grad_norm": 0.6993560948426739, "learning_rate": 4.232278994322791e-06, "loss": 0.6449, "step": 8159 }, { "epoch": 0.23824121923447492, "grad_norm": 0.772248929799964, "learning_rate": 4.232116788321169e-06, "loss": 0.6942, "step": 8160 }, { "epoch": 0.23827041546232228, "grad_norm": 0.7260426125752817, "learning_rate": 4.231954582319547e-06, "loss": 0.6179, "step": 8161 }, { "epoch": 0.23829961169016964, "grad_norm": 0.7568564242248926, "learning_rate": 4.231792376317924e-06, "loss": 0.6176, "step": 8162 }, { "epoch": 0.238328807918017, "grad_norm": 0.7260574585938527, "learning_rate": 4.231630170316302e-06, "loss": 0.6801, "step": 8163 }, { "epoch": 0.23835800414586436, "grad_norm": 0.7992724079206983, "learning_rate": 4.23146796431468e-06, "loss": 0.6993, "step": 8164 }, { "epoch": 0.23838720037371172, "grad_norm": 0.6939313992400513, "learning_rate": 4.231305758313058e-06, "loss": 0.6078, "step": 8165 }, { "epoch": 0.23841639660155908, "grad_norm": 0.7636026812611814, "learning_rate": 4.231143552311436e-06, "loss": 0.6425, "step": 8166 }, { "epoch": 0.23844559282940644, "grad_norm": 0.7246441045327684, "learning_rate": 4.230981346309814e-06, "loss": 0.6849, "step": 8167 }, { "epoch": 0.2384747890572538, "grad_norm": 0.6976093966167901, "learning_rate": 4.230819140308192e-06, "loss": 0.5827, "step": 8168 }, { "epoch": 0.23850398528510117, "grad_norm": 0.740575008008598, "learning_rate": 4.23065693430657e-06, "loss": 0.6373, "step": 8169 }, { "epoch": 0.23853318151294853, "grad_norm": 0.7557494108793141, "learning_rate": 4.230494728304948e-06, "loss": 0.725, "step": 8170 }, { "epoch": 0.2385623777407959, "grad_norm": 0.7353827081114508, "learning_rate": 4.230332522303325e-06, "loss": 0.6365, "step": 8171 }, { "epoch": 0.23859157396864325, "grad_norm": 0.7616536008595481, "learning_rate": 4.230170316301703e-06, "loss": 0.6707, "step": 8172 }, { "epoch": 0.2386207701964906, "grad_norm": 0.6802305587323093, "learning_rate": 4.230008110300081e-06, "loss": 0.5805, "step": 8173 }, { "epoch": 0.23864996642433797, "grad_norm": 0.7558375495186531, "learning_rate": 4.22984590429846e-06, "loss": 0.7066, "step": 8174 }, { "epoch": 0.23867916265218533, "grad_norm": 0.8092984776787411, "learning_rate": 4.229683698296838e-06, "loss": 0.7371, "step": 8175 }, { "epoch": 0.2387083588800327, "grad_norm": 0.7013825141446005, "learning_rate": 4.229521492295215e-06, "loss": 0.5637, "step": 8176 }, { "epoch": 0.23873755510788006, "grad_norm": 0.8152963111200094, "learning_rate": 4.229359286293593e-06, "loss": 0.7504, "step": 8177 }, { "epoch": 0.23876675133572742, "grad_norm": 0.7447070225664292, "learning_rate": 4.229197080291971e-06, "loss": 0.7151, "step": 8178 }, { "epoch": 0.23879594756357478, "grad_norm": 0.8066614473542056, "learning_rate": 4.229034874290349e-06, "loss": 0.7364, "step": 8179 }, { "epoch": 0.23882514379142214, "grad_norm": 0.7140106086963859, "learning_rate": 4.228872668288727e-06, "loss": 0.6099, "step": 8180 }, { "epoch": 0.2388543400192695, "grad_norm": 0.7345150604889018, "learning_rate": 4.228710462287105e-06, "loss": 0.6997, "step": 8181 }, { "epoch": 0.23888353624711686, "grad_norm": 0.7602466810483235, "learning_rate": 4.228548256285483e-06, "loss": 0.6868, "step": 8182 }, { "epoch": 0.23891273247496422, "grad_norm": 0.7724750453150884, "learning_rate": 4.228386050283861e-06, "loss": 0.6663, "step": 8183 }, { "epoch": 0.23894192870281158, "grad_norm": 0.7808215742885174, "learning_rate": 4.228223844282239e-06, "loss": 0.7519, "step": 8184 }, { "epoch": 0.23897112493065895, "grad_norm": 0.7355045338361869, "learning_rate": 4.228061638280616e-06, "loss": 0.6876, "step": 8185 }, { "epoch": 0.23900032115850633, "grad_norm": 0.9543420532091879, "learning_rate": 4.227899432278994e-06, "loss": 0.7812, "step": 8186 }, { "epoch": 0.2390295173863537, "grad_norm": 0.7208959275698671, "learning_rate": 4.227737226277372e-06, "loss": 0.6949, "step": 8187 }, { "epoch": 0.23905871361420106, "grad_norm": 0.7231392845098105, "learning_rate": 4.22757502027575e-06, "loss": 0.6694, "step": 8188 }, { "epoch": 0.23908790984204842, "grad_norm": 0.7507159888039144, "learning_rate": 4.227412814274128e-06, "loss": 0.7028, "step": 8189 }, { "epoch": 0.23911710606989578, "grad_norm": 0.7891032810603086, "learning_rate": 4.227250608272506e-06, "loss": 0.6637, "step": 8190 }, { "epoch": 0.23914630229774314, "grad_norm": 0.7059348980761669, "learning_rate": 4.227088402270884e-06, "loss": 0.6418, "step": 8191 }, { "epoch": 0.2391754985255905, "grad_norm": 0.7373810953136019, "learning_rate": 4.226926196269262e-06, "loss": 0.6641, "step": 8192 }, { "epoch": 0.23920469475343786, "grad_norm": 0.7476670390520134, "learning_rate": 4.22676399026764e-06, "loss": 0.6749, "step": 8193 }, { "epoch": 0.23923389098128522, "grad_norm": 0.6968722434443708, "learning_rate": 4.226601784266018e-06, "loss": 0.6324, "step": 8194 }, { "epoch": 0.23926308720913259, "grad_norm": 0.7971997049216296, "learning_rate": 4.226439578264396e-06, "loss": 0.7101, "step": 8195 }, { "epoch": 0.23929228343697995, "grad_norm": 0.7443103547243205, "learning_rate": 4.226277372262774e-06, "loss": 0.65, "step": 8196 }, { "epoch": 0.2393214796648273, "grad_norm": 0.7272680702517165, "learning_rate": 4.226115166261152e-06, "loss": 0.6905, "step": 8197 }, { "epoch": 0.23935067589267467, "grad_norm": 0.8766828193820349, "learning_rate": 4.22595296025953e-06, "loss": 0.7471, "step": 8198 }, { "epoch": 0.23937987212052203, "grad_norm": 0.7204406394008348, "learning_rate": 4.225790754257908e-06, "loss": 0.6529, "step": 8199 }, { "epoch": 0.2394090683483694, "grad_norm": 0.7373932224263237, "learning_rate": 4.2256285482562855e-06, "loss": 0.6689, "step": 8200 }, { "epoch": 0.23943826457621675, "grad_norm": 0.8116414013927883, "learning_rate": 4.2254663422546635e-06, "loss": 0.7664, "step": 8201 }, { "epoch": 0.23946746080406411, "grad_norm": 0.7607881881491627, "learning_rate": 4.2253041362530415e-06, "loss": 0.6369, "step": 8202 }, { "epoch": 0.23949665703191148, "grad_norm": 0.7308338189162178, "learning_rate": 4.2251419302514196e-06, "loss": 0.6442, "step": 8203 }, { "epoch": 0.23952585325975884, "grad_norm": 0.8010663545230788, "learning_rate": 4.2249797242497976e-06, "loss": 0.7415, "step": 8204 }, { "epoch": 0.2395550494876062, "grad_norm": 0.7610311854244539, "learning_rate": 4.2248175182481756e-06, "loss": 0.6506, "step": 8205 }, { "epoch": 0.23958424571545356, "grad_norm": 0.7130803890997478, "learning_rate": 4.2246553122465536e-06, "loss": 0.5624, "step": 8206 }, { "epoch": 0.23961344194330092, "grad_norm": 0.780851611466096, "learning_rate": 4.224493106244932e-06, "loss": 0.6826, "step": 8207 }, { "epoch": 0.23964263817114828, "grad_norm": 0.7749093689570095, "learning_rate": 4.224330900243309e-06, "loss": 0.6734, "step": 8208 }, { "epoch": 0.23967183439899564, "grad_norm": 0.757314419778925, "learning_rate": 4.224168694241687e-06, "loss": 0.6535, "step": 8209 }, { "epoch": 0.239701030626843, "grad_norm": 0.7908625434046763, "learning_rate": 4.224006488240065e-06, "loss": 0.6863, "step": 8210 }, { "epoch": 0.23973022685469036, "grad_norm": 1.0209821109484305, "learning_rate": 4.223844282238443e-06, "loss": 0.6572, "step": 8211 }, { "epoch": 0.23975942308253773, "grad_norm": 0.7418114780340576, "learning_rate": 4.223682076236822e-06, "loss": 0.6502, "step": 8212 }, { "epoch": 0.2397886193103851, "grad_norm": 0.7135933058619818, "learning_rate": 4.2235198702352e-06, "loss": 0.6264, "step": 8213 }, { "epoch": 0.23981781553823245, "grad_norm": 0.7973950635752839, "learning_rate": 4.223357664233577e-06, "loss": 0.7094, "step": 8214 }, { "epoch": 0.2398470117660798, "grad_norm": 0.6935218750682484, "learning_rate": 4.223195458231955e-06, "loss": 0.6465, "step": 8215 }, { "epoch": 0.23987620799392717, "grad_norm": 0.7551544679722725, "learning_rate": 4.223033252230333e-06, "loss": 0.6755, "step": 8216 }, { "epoch": 0.23990540422177456, "grad_norm": 0.7101197857136036, "learning_rate": 4.222871046228711e-06, "loss": 0.659, "step": 8217 }, { "epoch": 0.23993460044962192, "grad_norm": 0.7254876664883393, "learning_rate": 4.222708840227089e-06, "loss": 0.6452, "step": 8218 }, { "epoch": 0.23996379667746928, "grad_norm": 0.773139243095494, "learning_rate": 4.222546634225467e-06, "loss": 0.7239, "step": 8219 }, { "epoch": 0.23999299290531664, "grad_norm": 0.7653262484776957, "learning_rate": 4.222384428223845e-06, "loss": 0.6816, "step": 8220 }, { "epoch": 0.240022189133164, "grad_norm": 0.7575436468681372, "learning_rate": 4.222222222222223e-06, "loss": 0.7057, "step": 8221 }, { "epoch": 0.24005138536101137, "grad_norm": 0.6904538206708104, "learning_rate": 4.222060016220601e-06, "loss": 0.6269, "step": 8222 }, { "epoch": 0.24008058158885873, "grad_norm": 0.8768501086655538, "learning_rate": 4.221897810218978e-06, "loss": 0.8103, "step": 8223 }, { "epoch": 0.2401097778167061, "grad_norm": 0.7326958909263065, "learning_rate": 4.221735604217356e-06, "loss": 0.6841, "step": 8224 }, { "epoch": 0.24013897404455345, "grad_norm": 0.7994833855785834, "learning_rate": 4.221573398215734e-06, "loss": 0.7638, "step": 8225 }, { "epoch": 0.2401681702724008, "grad_norm": 0.7660568782205746, "learning_rate": 4.221411192214112e-06, "loss": 0.6719, "step": 8226 }, { "epoch": 0.24019736650024817, "grad_norm": 0.7165095738435148, "learning_rate": 4.22124898621249e-06, "loss": 0.6183, "step": 8227 }, { "epoch": 0.24022656272809553, "grad_norm": 0.7430742547008711, "learning_rate": 4.221086780210868e-06, "loss": 0.6944, "step": 8228 }, { "epoch": 0.2402557589559429, "grad_norm": 0.7582887182095055, "learning_rate": 4.220924574209246e-06, "loss": 0.7157, "step": 8229 }, { "epoch": 0.24028495518379026, "grad_norm": 0.7119400292825208, "learning_rate": 4.220762368207624e-06, "loss": 0.649, "step": 8230 }, { "epoch": 0.24031415141163762, "grad_norm": 0.7823133793674248, "learning_rate": 4.220600162206002e-06, "loss": 0.7569, "step": 8231 }, { "epoch": 0.24034334763948498, "grad_norm": 0.7377843036996102, "learning_rate": 4.22043795620438e-06, "loss": 0.6592, "step": 8232 }, { "epoch": 0.24037254386733234, "grad_norm": 0.6937679573245065, "learning_rate": 4.220275750202758e-06, "loss": 0.5774, "step": 8233 }, { "epoch": 0.2404017400951797, "grad_norm": 0.6828283284626594, "learning_rate": 4.220113544201136e-06, "loss": 0.5537, "step": 8234 }, { "epoch": 0.24043093632302706, "grad_norm": 0.7494863901726243, "learning_rate": 4.219951338199514e-06, "loss": 0.7367, "step": 8235 }, { "epoch": 0.24046013255087442, "grad_norm": 1.0380416907253684, "learning_rate": 4.219789132197892e-06, "loss": 0.7918, "step": 8236 }, { "epoch": 0.24048932877872178, "grad_norm": 0.7252029712868864, "learning_rate": 4.21962692619627e-06, "loss": 0.6497, "step": 8237 }, { "epoch": 0.24051852500656914, "grad_norm": 0.809859303216746, "learning_rate": 4.219464720194647e-06, "loss": 0.7754, "step": 8238 }, { "epoch": 0.2405477212344165, "grad_norm": 0.7953167244747251, "learning_rate": 4.219302514193025e-06, "loss": 0.728, "step": 8239 }, { "epoch": 0.24057691746226387, "grad_norm": 0.7294990230449441, "learning_rate": 4.219140308191403e-06, "loss": 0.6442, "step": 8240 }, { "epoch": 0.24060611369011123, "grad_norm": 0.7203530814788879, "learning_rate": 4.218978102189781e-06, "loss": 0.629, "step": 8241 }, { "epoch": 0.2406353099179586, "grad_norm": 0.7910086338115144, "learning_rate": 4.218815896188159e-06, "loss": 0.7523, "step": 8242 }, { "epoch": 0.24066450614580595, "grad_norm": 0.7450013061615803, "learning_rate": 4.218653690186537e-06, "loss": 0.6266, "step": 8243 }, { "epoch": 0.2406937023736533, "grad_norm": 0.734593989701299, "learning_rate": 4.218491484184915e-06, "loss": 0.6825, "step": 8244 }, { "epoch": 0.24072289860150067, "grad_norm": 0.7466145716221324, "learning_rate": 4.218329278183293e-06, "loss": 0.6642, "step": 8245 }, { "epoch": 0.24075209482934803, "grad_norm": 0.7428484751983242, "learning_rate": 4.2181670721816705e-06, "loss": 0.6797, "step": 8246 }, { "epoch": 0.24078129105719542, "grad_norm": 0.8409568766290081, "learning_rate": 4.2180048661800485e-06, "loss": 0.7506, "step": 8247 }, { "epoch": 0.24081048728504278, "grad_norm": 0.8060956026806382, "learning_rate": 4.2178426601784265e-06, "loss": 0.709, "step": 8248 }, { "epoch": 0.24083968351289015, "grad_norm": 0.7713446475682074, "learning_rate": 4.217680454176805e-06, "loss": 0.6864, "step": 8249 }, { "epoch": 0.2408688797407375, "grad_norm": 0.7255788148872517, "learning_rate": 4.217518248175183e-06, "loss": 0.6538, "step": 8250 }, { "epoch": 0.24089807596858487, "grad_norm": 0.7722388696188561, "learning_rate": 4.217356042173561e-06, "loss": 0.7436, "step": 8251 }, { "epoch": 0.24092727219643223, "grad_norm": 0.7313991709280058, "learning_rate": 4.2171938361719385e-06, "loss": 0.615, "step": 8252 }, { "epoch": 0.2409564684242796, "grad_norm": 0.7205364354098729, "learning_rate": 4.2170316301703165e-06, "loss": 0.6115, "step": 8253 }, { "epoch": 0.24098566465212695, "grad_norm": 0.9619262514711069, "learning_rate": 4.2168694241686945e-06, "loss": 0.6566, "step": 8254 }, { "epoch": 0.2410148608799743, "grad_norm": 0.8124596064106069, "learning_rate": 4.2167072181670725e-06, "loss": 0.7079, "step": 8255 }, { "epoch": 0.24104405710782167, "grad_norm": 0.7494908397127104, "learning_rate": 4.2165450121654505e-06, "loss": 0.6296, "step": 8256 }, { "epoch": 0.24107325333566904, "grad_norm": 0.8180996465204794, "learning_rate": 4.2163828061638285e-06, "loss": 0.6856, "step": 8257 }, { "epoch": 0.2411024495635164, "grad_norm": 0.7709449447206864, "learning_rate": 4.2162206001622065e-06, "loss": 0.723, "step": 8258 }, { "epoch": 0.24113164579136376, "grad_norm": 0.832303193800163, "learning_rate": 4.2160583941605845e-06, "loss": 0.6158, "step": 8259 }, { "epoch": 0.24116084201921112, "grad_norm": 0.751568104935914, "learning_rate": 4.2158961881589626e-06, "loss": 0.6376, "step": 8260 }, { "epoch": 0.24119003824705848, "grad_norm": 0.8221634453522214, "learning_rate": 4.21573398215734e-06, "loss": 0.7922, "step": 8261 }, { "epoch": 0.24121923447490584, "grad_norm": 0.7534684059331908, "learning_rate": 4.215571776155718e-06, "loss": 0.712, "step": 8262 }, { "epoch": 0.2412484307027532, "grad_norm": 0.749346313597281, "learning_rate": 4.215409570154096e-06, "loss": 0.6749, "step": 8263 }, { "epoch": 0.24127762693060056, "grad_norm": 0.7627219326432778, "learning_rate": 4.215247364152474e-06, "loss": 0.7226, "step": 8264 }, { "epoch": 0.24130682315844793, "grad_norm": 0.7370740813424094, "learning_rate": 4.215085158150852e-06, "loss": 0.6959, "step": 8265 }, { "epoch": 0.2413360193862953, "grad_norm": 0.8418774807658462, "learning_rate": 4.21492295214923e-06, "loss": 0.7402, "step": 8266 }, { "epoch": 0.24136521561414265, "grad_norm": 0.7680971934267316, "learning_rate": 4.214760746147608e-06, "loss": 0.7259, "step": 8267 }, { "epoch": 0.24139441184199, "grad_norm": 0.7258087771132453, "learning_rate": 4.214598540145986e-06, "loss": 0.6529, "step": 8268 }, { "epoch": 0.24142360806983737, "grad_norm": 0.7475427531808702, "learning_rate": 4.214436334144364e-06, "loss": 0.6711, "step": 8269 }, { "epoch": 0.24145280429768473, "grad_norm": 0.7627943448874546, "learning_rate": 4.214274128142742e-06, "loss": 0.701, "step": 8270 }, { "epoch": 0.2414820005255321, "grad_norm": 0.7025200905712415, "learning_rate": 4.21411192214112e-06, "loss": 0.6412, "step": 8271 }, { "epoch": 0.24151119675337945, "grad_norm": 0.7939130853847397, "learning_rate": 4.213949716139498e-06, "loss": 0.7056, "step": 8272 }, { "epoch": 0.24154039298122681, "grad_norm": 0.7011738452844182, "learning_rate": 4.213787510137876e-06, "loss": 0.6211, "step": 8273 }, { "epoch": 0.24156958920907418, "grad_norm": 0.8415478948287918, "learning_rate": 4.213625304136254e-06, "loss": 0.7157, "step": 8274 }, { "epoch": 0.24159878543692154, "grad_norm": 0.7588182079140732, "learning_rate": 4.213463098134632e-06, "loss": 0.7434, "step": 8275 }, { "epoch": 0.2416279816647689, "grad_norm": 0.7495459499756743, "learning_rate": 4.213300892133009e-06, "loss": 0.667, "step": 8276 }, { "epoch": 0.2416571778926163, "grad_norm": 0.8122882271053906, "learning_rate": 4.213138686131387e-06, "loss": 0.6372, "step": 8277 }, { "epoch": 0.24168637412046365, "grad_norm": 0.7397058378108673, "learning_rate": 4.212976480129765e-06, "loss": 0.7151, "step": 8278 }, { "epoch": 0.241715570348311, "grad_norm": 0.7901009431120307, "learning_rate": 4.212814274128143e-06, "loss": 0.7608, "step": 8279 }, { "epoch": 0.24174476657615837, "grad_norm": 0.8015391015875863, "learning_rate": 4.212652068126521e-06, "loss": 0.7447, "step": 8280 }, { "epoch": 0.24177396280400573, "grad_norm": 0.763391230140344, "learning_rate": 4.212489862124899e-06, "loss": 0.6708, "step": 8281 }, { "epoch": 0.2418031590318531, "grad_norm": 0.7434493074989571, "learning_rate": 4.212327656123277e-06, "loss": 0.6748, "step": 8282 }, { "epoch": 0.24183235525970045, "grad_norm": 0.7337609071457959, "learning_rate": 4.212165450121655e-06, "loss": 0.6608, "step": 8283 }, { "epoch": 0.24186155148754782, "grad_norm": 0.7359712343062192, "learning_rate": 4.212003244120032e-06, "loss": 0.6648, "step": 8284 }, { "epoch": 0.24189074771539518, "grad_norm": 0.7721196726946543, "learning_rate": 4.21184103811841e-06, "loss": 0.6932, "step": 8285 }, { "epoch": 0.24191994394324254, "grad_norm": 0.712145230083024, "learning_rate": 4.211678832116788e-06, "loss": 0.6294, "step": 8286 }, { "epoch": 0.2419491401710899, "grad_norm": 0.7459261693931181, "learning_rate": 4.211516626115167e-06, "loss": 0.6533, "step": 8287 }, { "epoch": 0.24197833639893726, "grad_norm": 0.7026312043821712, "learning_rate": 4.211354420113545e-06, "loss": 0.671, "step": 8288 }, { "epoch": 0.24200753262678462, "grad_norm": 0.7228960439686865, "learning_rate": 4.211192214111923e-06, "loss": 0.6522, "step": 8289 }, { "epoch": 0.24203672885463198, "grad_norm": 0.8024781442289467, "learning_rate": 4.2110300081103e-06, "loss": 0.8043, "step": 8290 }, { "epoch": 0.24206592508247934, "grad_norm": 0.7186052194788217, "learning_rate": 4.210867802108678e-06, "loss": 0.6254, "step": 8291 }, { "epoch": 0.2420951213103267, "grad_norm": 0.7491871604592942, "learning_rate": 4.210705596107056e-06, "loss": 0.6499, "step": 8292 }, { "epoch": 0.24212431753817407, "grad_norm": 0.8535108540753054, "learning_rate": 4.210543390105434e-06, "loss": 0.7764, "step": 8293 }, { "epoch": 0.24215351376602143, "grad_norm": 0.8836966601773877, "learning_rate": 4.210381184103812e-06, "loss": 0.7657, "step": 8294 }, { "epoch": 0.2421827099938688, "grad_norm": 0.7377580351860291, "learning_rate": 4.21021897810219e-06, "loss": 0.6108, "step": 8295 }, { "epoch": 0.24221190622171615, "grad_norm": 0.6758187476463383, "learning_rate": 4.210056772100568e-06, "loss": 0.5925, "step": 8296 }, { "epoch": 0.2422411024495635, "grad_norm": 0.6936283208123201, "learning_rate": 4.209894566098946e-06, "loss": 0.6183, "step": 8297 }, { "epoch": 0.24227029867741087, "grad_norm": 1.1456709658507906, "learning_rate": 4.209732360097324e-06, "loss": 0.7194, "step": 8298 }, { "epoch": 0.24229949490525823, "grad_norm": 0.7414447839998918, "learning_rate": 4.2095701540957014e-06, "loss": 0.6938, "step": 8299 }, { "epoch": 0.2423286911331056, "grad_norm": 0.7285687688256807, "learning_rate": 4.2094079480940794e-06, "loss": 0.6967, "step": 8300 }, { "epoch": 0.24235788736095296, "grad_norm": 0.8185055672696298, "learning_rate": 4.2092457420924574e-06, "loss": 0.7654, "step": 8301 }, { "epoch": 0.24238708358880032, "grad_norm": 0.7490000797892619, "learning_rate": 4.2090835360908355e-06, "loss": 0.7343, "step": 8302 }, { "epoch": 0.24241627981664768, "grad_norm": 0.7377767897054366, "learning_rate": 4.2089213300892135e-06, "loss": 0.6407, "step": 8303 }, { "epoch": 0.24244547604449504, "grad_norm": 0.7608199181829711, "learning_rate": 4.2087591240875915e-06, "loss": 0.6736, "step": 8304 }, { "epoch": 0.2424746722723424, "grad_norm": 0.7763625184267325, "learning_rate": 4.2085969180859695e-06, "loss": 0.7151, "step": 8305 }, { "epoch": 0.24250386850018976, "grad_norm": 0.8274055855013701, "learning_rate": 4.2084347120843475e-06, "loss": 0.7592, "step": 8306 }, { "epoch": 0.24253306472803715, "grad_norm": 0.7614489606334932, "learning_rate": 4.2082725060827255e-06, "loss": 0.6963, "step": 8307 }, { "epoch": 0.2425622609558845, "grad_norm": 0.709832593748193, "learning_rate": 4.2081103000811035e-06, "loss": 0.6436, "step": 8308 }, { "epoch": 0.24259145718373187, "grad_norm": 0.7141333510388588, "learning_rate": 4.2079480940794815e-06, "loss": 0.6441, "step": 8309 }, { "epoch": 0.24262065341157923, "grad_norm": 0.7467853561929342, "learning_rate": 4.2077858880778595e-06, "loss": 0.7195, "step": 8310 }, { "epoch": 0.2426498496394266, "grad_norm": 0.6801148902768421, "learning_rate": 4.2076236820762375e-06, "loss": 0.5853, "step": 8311 }, { "epoch": 0.24267904586727396, "grad_norm": 0.8275605340504117, "learning_rate": 4.2074614760746155e-06, "loss": 0.816, "step": 8312 }, { "epoch": 0.24270824209512132, "grad_norm": 0.705272844860288, "learning_rate": 4.207299270072993e-06, "loss": 0.6364, "step": 8313 }, { "epoch": 0.24273743832296868, "grad_norm": 0.8530854766543321, "learning_rate": 4.207137064071371e-06, "loss": 0.8154, "step": 8314 }, { "epoch": 0.24276663455081604, "grad_norm": 0.7758634800303366, "learning_rate": 4.206974858069749e-06, "loss": 0.7164, "step": 8315 }, { "epoch": 0.2427958307786634, "grad_norm": 0.9919353043298502, "learning_rate": 4.206812652068127e-06, "loss": 0.674, "step": 8316 }, { "epoch": 0.24282502700651076, "grad_norm": 0.7385626745386457, "learning_rate": 4.206650446066505e-06, "loss": 0.6615, "step": 8317 }, { "epoch": 0.24285422323435812, "grad_norm": 0.7626999286259151, "learning_rate": 4.206488240064883e-06, "loss": 0.6937, "step": 8318 }, { "epoch": 0.24288341946220549, "grad_norm": 0.8313608459656396, "learning_rate": 4.206326034063261e-06, "loss": 0.7172, "step": 8319 }, { "epoch": 0.24291261569005285, "grad_norm": 0.7525446996338465, "learning_rate": 4.206163828061639e-06, "loss": 0.6783, "step": 8320 }, { "epoch": 0.2429418119179002, "grad_norm": 0.7332090136500585, "learning_rate": 4.206001622060017e-06, "loss": 0.64, "step": 8321 }, { "epoch": 0.24297100814574757, "grad_norm": 0.7195495681397562, "learning_rate": 4.205839416058394e-06, "loss": 0.5892, "step": 8322 }, { "epoch": 0.24300020437359493, "grad_norm": 0.7683970939249609, "learning_rate": 4.205677210056772e-06, "loss": 0.6455, "step": 8323 }, { "epoch": 0.2430294006014423, "grad_norm": 0.8629068690384596, "learning_rate": 4.20551500405515e-06, "loss": 0.7675, "step": 8324 }, { "epoch": 0.24305859682928965, "grad_norm": 0.7274682965450537, "learning_rate": 4.205352798053529e-06, "loss": 0.6272, "step": 8325 }, { "epoch": 0.243087793057137, "grad_norm": 0.8002830688779069, "learning_rate": 4.205190592051907e-06, "loss": 0.7792, "step": 8326 }, { "epoch": 0.24311698928498437, "grad_norm": 0.7349316581950803, "learning_rate": 4.205028386050285e-06, "loss": 0.6496, "step": 8327 }, { "epoch": 0.24314618551283174, "grad_norm": 0.7443111877106009, "learning_rate": 4.204866180048662e-06, "loss": 0.7111, "step": 8328 }, { "epoch": 0.2431753817406791, "grad_norm": 0.7107330990031689, "learning_rate": 4.20470397404704e-06, "loss": 0.6197, "step": 8329 }, { "epoch": 0.24320457796852646, "grad_norm": 0.7114944508936495, "learning_rate": 4.204541768045418e-06, "loss": 0.6432, "step": 8330 }, { "epoch": 0.24323377419637382, "grad_norm": 0.7838646352710368, "learning_rate": 4.204379562043796e-06, "loss": 0.6952, "step": 8331 }, { "epoch": 0.24326297042422118, "grad_norm": 0.67170485053019, "learning_rate": 4.204217356042174e-06, "loss": 0.5888, "step": 8332 }, { "epoch": 0.24329216665206854, "grad_norm": 0.7254635831158548, "learning_rate": 4.204055150040552e-06, "loss": 0.7151, "step": 8333 }, { "epoch": 0.2433213628799159, "grad_norm": 0.7553676962510708, "learning_rate": 4.20389294403893e-06, "loss": 0.6529, "step": 8334 }, { "epoch": 0.24335055910776326, "grad_norm": 0.8088528811081512, "learning_rate": 4.203730738037308e-06, "loss": 0.7423, "step": 8335 }, { "epoch": 0.24337975533561063, "grad_norm": 0.7853059735651531, "learning_rate": 4.203568532035686e-06, "loss": 0.7574, "step": 8336 }, { "epoch": 0.24340895156345801, "grad_norm": 0.7268025883622986, "learning_rate": 4.203406326034063e-06, "loss": 0.6278, "step": 8337 }, { "epoch": 0.24343814779130538, "grad_norm": 0.7671184596433147, "learning_rate": 4.203244120032441e-06, "loss": 0.6893, "step": 8338 }, { "epoch": 0.24346734401915274, "grad_norm": 0.7154255198587534, "learning_rate": 4.203081914030819e-06, "loss": 0.6412, "step": 8339 }, { "epoch": 0.2434965402470001, "grad_norm": 0.7739941003538654, "learning_rate": 4.202919708029197e-06, "loss": 0.6962, "step": 8340 }, { "epoch": 0.24352573647484746, "grad_norm": 0.8094557558373832, "learning_rate": 4.202757502027575e-06, "loss": 0.7696, "step": 8341 }, { "epoch": 0.24355493270269482, "grad_norm": 0.7374230128378996, "learning_rate": 4.202595296025953e-06, "loss": 0.7284, "step": 8342 }, { "epoch": 0.24358412893054218, "grad_norm": 0.6991384643402684, "learning_rate": 4.202433090024331e-06, "loss": 0.5955, "step": 8343 }, { "epoch": 0.24361332515838954, "grad_norm": 0.8326299591789649, "learning_rate": 4.202270884022709e-06, "loss": 0.7285, "step": 8344 }, { "epoch": 0.2436425213862369, "grad_norm": 0.790320191529444, "learning_rate": 4.202108678021087e-06, "loss": 0.7615, "step": 8345 }, { "epoch": 0.24367171761408427, "grad_norm": 0.6829312440831933, "learning_rate": 4.201946472019465e-06, "loss": 0.5847, "step": 8346 }, { "epoch": 0.24370091384193163, "grad_norm": 0.6942238668161198, "learning_rate": 4.201784266017843e-06, "loss": 0.632, "step": 8347 }, { "epoch": 0.243730110069779, "grad_norm": 0.8064402950103717, "learning_rate": 4.201622060016221e-06, "loss": 0.808, "step": 8348 }, { "epoch": 0.24375930629762635, "grad_norm": 0.7464206892154442, "learning_rate": 4.201459854014599e-06, "loss": 0.6666, "step": 8349 }, { "epoch": 0.2437885025254737, "grad_norm": 0.6984038118009792, "learning_rate": 4.201297648012977e-06, "loss": 0.6104, "step": 8350 }, { "epoch": 0.24381769875332107, "grad_norm": 0.7264627063727312, "learning_rate": 4.201135442011354e-06, "loss": 0.6885, "step": 8351 }, { "epoch": 0.24384689498116843, "grad_norm": 0.7024952480545508, "learning_rate": 4.200973236009732e-06, "loss": 0.6341, "step": 8352 }, { "epoch": 0.2438760912090158, "grad_norm": 0.7305614814101711, "learning_rate": 4.20081103000811e-06, "loss": 0.6435, "step": 8353 }, { "epoch": 0.24390528743686316, "grad_norm": 0.8233175740891926, "learning_rate": 4.200648824006488e-06, "loss": 0.6098, "step": 8354 }, { "epoch": 0.24393448366471052, "grad_norm": 0.7509837058161462, "learning_rate": 4.2004866180048664e-06, "loss": 0.7033, "step": 8355 }, { "epoch": 0.24396367989255788, "grad_norm": 0.7538447896939237, "learning_rate": 4.2003244120032444e-06, "loss": 0.6565, "step": 8356 }, { "epoch": 0.24399287612040524, "grad_norm": 0.7509093480866239, "learning_rate": 4.2001622060016224e-06, "loss": 0.6955, "step": 8357 }, { "epoch": 0.2440220723482526, "grad_norm": 0.7369640430565275, "learning_rate": 4.2000000000000004e-06, "loss": 0.7069, "step": 8358 }, { "epoch": 0.24405126857609996, "grad_norm": 0.7736866300392427, "learning_rate": 4.1998377939983785e-06, "loss": 0.6914, "step": 8359 }, { "epoch": 0.24408046480394732, "grad_norm": 0.7508331109102659, "learning_rate": 4.199675587996756e-06, "loss": 0.723, "step": 8360 }, { "epoch": 0.24410966103179468, "grad_norm": 0.7613752283414416, "learning_rate": 4.199513381995134e-06, "loss": 0.6438, "step": 8361 }, { "epoch": 0.24413885725964204, "grad_norm": 0.7513954058572405, "learning_rate": 4.199351175993512e-06, "loss": 0.6926, "step": 8362 }, { "epoch": 0.2441680534874894, "grad_norm": 0.7291041902742399, "learning_rate": 4.1991889699918905e-06, "loss": 0.6175, "step": 8363 }, { "epoch": 0.24419724971533677, "grad_norm": 0.7123173305147865, "learning_rate": 4.1990267639902685e-06, "loss": 0.6204, "step": 8364 }, { "epoch": 0.24422644594318413, "grad_norm": 0.7551285840721138, "learning_rate": 4.1988645579886465e-06, "loss": 0.6412, "step": 8365 }, { "epoch": 0.2442556421710315, "grad_norm": 0.7738889389955634, "learning_rate": 4.198702351987024e-06, "loss": 0.712, "step": 8366 }, { "epoch": 0.24428483839887888, "grad_norm": 0.7273840941783681, "learning_rate": 4.198540145985402e-06, "loss": 0.6751, "step": 8367 }, { "epoch": 0.24431403462672624, "grad_norm": 0.7484920542181588, "learning_rate": 4.19837793998378e-06, "loss": 0.6577, "step": 8368 }, { "epoch": 0.2443432308545736, "grad_norm": 0.707507474611642, "learning_rate": 4.198215733982158e-06, "loss": 0.6053, "step": 8369 }, { "epoch": 0.24437242708242096, "grad_norm": 0.7681892832536348, "learning_rate": 4.198053527980536e-06, "loss": 0.714, "step": 8370 }, { "epoch": 0.24440162331026832, "grad_norm": 0.7166872822266775, "learning_rate": 4.197891321978914e-06, "loss": 0.6469, "step": 8371 }, { "epoch": 0.24443081953811568, "grad_norm": 0.7405557303556728, "learning_rate": 4.197729115977292e-06, "loss": 0.6616, "step": 8372 }, { "epoch": 0.24446001576596305, "grad_norm": 1.056664787548725, "learning_rate": 4.19756690997567e-06, "loss": 0.674, "step": 8373 }, { "epoch": 0.2444892119938104, "grad_norm": 0.7305113610300998, "learning_rate": 4.197404703974048e-06, "loss": 0.6884, "step": 8374 }, { "epoch": 0.24451840822165777, "grad_norm": 0.7647488761127808, "learning_rate": 4.197242497972425e-06, "loss": 0.6778, "step": 8375 }, { "epoch": 0.24454760444950513, "grad_norm": 0.7592821526477461, "learning_rate": 4.197080291970803e-06, "loss": 0.7212, "step": 8376 }, { "epoch": 0.2445768006773525, "grad_norm": 0.8045245804360056, "learning_rate": 4.196918085969181e-06, "loss": 0.6661, "step": 8377 }, { "epoch": 0.24460599690519985, "grad_norm": 0.79633909594303, "learning_rate": 4.196755879967559e-06, "loss": 0.5247, "step": 8378 }, { "epoch": 0.2446351931330472, "grad_norm": 0.7442130415722491, "learning_rate": 4.196593673965937e-06, "loss": 0.6992, "step": 8379 }, { "epoch": 0.24466438936089457, "grad_norm": 0.7891492319706707, "learning_rate": 4.196431467964315e-06, "loss": 0.7376, "step": 8380 }, { "epoch": 0.24469358558874194, "grad_norm": 0.7916141067750609, "learning_rate": 4.196269261962693e-06, "loss": 0.6839, "step": 8381 }, { "epoch": 0.2447227818165893, "grad_norm": 0.7617331603422587, "learning_rate": 4.196107055961071e-06, "loss": 0.6842, "step": 8382 }, { "epoch": 0.24475197804443666, "grad_norm": 0.7236736369933225, "learning_rate": 4.195944849959449e-06, "loss": 0.6413, "step": 8383 }, { "epoch": 0.24478117427228402, "grad_norm": 0.8168748283202271, "learning_rate": 4.195782643957827e-06, "loss": 0.7582, "step": 8384 }, { "epoch": 0.24481037050013138, "grad_norm": 0.7793320361903422, "learning_rate": 4.195620437956205e-06, "loss": 0.7218, "step": 8385 }, { "epoch": 0.24483956672797874, "grad_norm": 0.7472520060518731, "learning_rate": 4.195458231954583e-06, "loss": 0.6077, "step": 8386 }, { "epoch": 0.2448687629558261, "grad_norm": 0.7402131385354264, "learning_rate": 4.195296025952961e-06, "loss": 0.6318, "step": 8387 }, { "epoch": 0.24489795918367346, "grad_norm": 0.7546571381158613, "learning_rate": 4.195133819951339e-06, "loss": 0.7035, "step": 8388 }, { "epoch": 0.24492715541152082, "grad_norm": 0.7637816639767835, "learning_rate": 4.194971613949716e-06, "loss": 0.6477, "step": 8389 }, { "epoch": 0.2449563516393682, "grad_norm": 0.7523446516891755, "learning_rate": 4.194809407948094e-06, "loss": 0.7277, "step": 8390 }, { "epoch": 0.24498554786721555, "grad_norm": 0.733666215326306, "learning_rate": 4.194647201946472e-06, "loss": 0.608, "step": 8391 }, { "epoch": 0.2450147440950629, "grad_norm": 0.7417309738729525, "learning_rate": 4.19448499594485e-06, "loss": 0.6599, "step": 8392 }, { "epoch": 0.24504394032291027, "grad_norm": 0.7945264711191105, "learning_rate": 4.194322789943228e-06, "loss": 0.6916, "step": 8393 }, { "epoch": 0.24507313655075763, "grad_norm": 0.7466518952252273, "learning_rate": 4.194160583941606e-06, "loss": 0.6352, "step": 8394 }, { "epoch": 0.245102332778605, "grad_norm": 0.7194508565364142, "learning_rate": 4.193998377939984e-06, "loss": 0.6337, "step": 8395 }, { "epoch": 0.24513152900645235, "grad_norm": 0.8760363352561583, "learning_rate": 4.193836171938362e-06, "loss": 0.6841, "step": 8396 }, { "epoch": 0.24516072523429974, "grad_norm": 0.7340597576474789, "learning_rate": 4.19367396593674e-06, "loss": 0.6912, "step": 8397 }, { "epoch": 0.2451899214621471, "grad_norm": 0.6979737025860582, "learning_rate": 4.193511759935117e-06, "loss": 0.5902, "step": 8398 }, { "epoch": 0.24521911768999446, "grad_norm": 0.7073560139699006, "learning_rate": 4.193349553933495e-06, "loss": 0.5979, "step": 8399 }, { "epoch": 0.24524831391784183, "grad_norm": 0.6838973024047246, "learning_rate": 4.193187347931874e-06, "loss": 0.612, "step": 8400 }, { "epoch": 0.2452775101456892, "grad_norm": 0.7576167461583319, "learning_rate": 4.193025141930252e-06, "loss": 0.7066, "step": 8401 }, { "epoch": 0.24530670637353655, "grad_norm": 0.714179529759759, "learning_rate": 4.19286293592863e-06, "loss": 0.6263, "step": 8402 }, { "epoch": 0.2453359026013839, "grad_norm": 0.780764312478899, "learning_rate": 4.192700729927008e-06, "loss": 0.7075, "step": 8403 }, { "epoch": 0.24536509882923127, "grad_norm": 0.7553306048914475, "learning_rate": 4.192538523925385e-06, "loss": 0.6674, "step": 8404 }, { "epoch": 0.24539429505707863, "grad_norm": 0.7443839922209122, "learning_rate": 4.192376317923763e-06, "loss": 0.5819, "step": 8405 }, { "epoch": 0.245423491284926, "grad_norm": 0.6995337640960588, "learning_rate": 4.192214111922141e-06, "loss": 0.6372, "step": 8406 }, { "epoch": 0.24545268751277335, "grad_norm": 0.7926743992857018, "learning_rate": 4.192051905920519e-06, "loss": 0.7274, "step": 8407 }, { "epoch": 0.24548188374062072, "grad_norm": 0.7275002283425689, "learning_rate": 4.191889699918897e-06, "loss": 0.6816, "step": 8408 }, { "epoch": 0.24551107996846808, "grad_norm": 0.676656073005067, "learning_rate": 4.191727493917275e-06, "loss": 0.5897, "step": 8409 }, { "epoch": 0.24554027619631544, "grad_norm": 0.7169420009439351, "learning_rate": 4.191565287915653e-06, "loss": 0.6174, "step": 8410 }, { "epoch": 0.2455694724241628, "grad_norm": 0.799178740772683, "learning_rate": 4.191403081914031e-06, "loss": 0.7012, "step": 8411 }, { "epoch": 0.24559866865201016, "grad_norm": 0.721530890505103, "learning_rate": 4.1912408759124094e-06, "loss": 0.6448, "step": 8412 }, { "epoch": 0.24562786487985752, "grad_norm": 0.698355527690907, "learning_rate": 4.191078669910787e-06, "loss": 0.6076, "step": 8413 }, { "epoch": 0.24565706110770488, "grad_norm": 0.7525447330401321, "learning_rate": 4.190916463909165e-06, "loss": 0.6542, "step": 8414 }, { "epoch": 0.24568625733555224, "grad_norm": 0.6899119976442318, "learning_rate": 4.190754257907543e-06, "loss": 0.576, "step": 8415 }, { "epoch": 0.2457154535633996, "grad_norm": 0.8463464785296088, "learning_rate": 4.190592051905921e-06, "loss": 0.6787, "step": 8416 }, { "epoch": 0.24574464979124697, "grad_norm": 0.8072364571870095, "learning_rate": 4.190429845904299e-06, "loss": 0.8266, "step": 8417 }, { "epoch": 0.24577384601909433, "grad_norm": 0.6893547707787621, "learning_rate": 4.190267639902677e-06, "loss": 0.6315, "step": 8418 }, { "epoch": 0.2458030422469417, "grad_norm": 0.7953894366774666, "learning_rate": 4.190105433901055e-06, "loss": 0.7329, "step": 8419 }, { "epoch": 0.24583223847478905, "grad_norm": 0.7370672339959258, "learning_rate": 4.189943227899433e-06, "loss": 0.6524, "step": 8420 }, { "epoch": 0.2458614347026364, "grad_norm": 0.757840077344436, "learning_rate": 4.189781021897811e-06, "loss": 0.7385, "step": 8421 }, { "epoch": 0.24589063093048377, "grad_norm": 0.7242275763876981, "learning_rate": 4.189618815896189e-06, "loss": 0.6975, "step": 8422 }, { "epoch": 0.24591982715833113, "grad_norm": 0.7391208754474217, "learning_rate": 4.189456609894567e-06, "loss": 0.6827, "step": 8423 }, { "epoch": 0.2459490233861785, "grad_norm": 1.1212200323292631, "learning_rate": 4.189294403892945e-06, "loss": 0.676, "step": 8424 }, { "epoch": 0.24597821961402586, "grad_norm": 0.7716859165569, "learning_rate": 4.189132197891323e-06, "loss": 0.7039, "step": 8425 }, { "epoch": 0.24600741584187322, "grad_norm": 0.8109326829981903, "learning_rate": 4.188969991889701e-06, "loss": 0.6335, "step": 8426 }, { "epoch": 0.24603661206972058, "grad_norm": 0.7689022394179548, "learning_rate": 4.188807785888078e-06, "loss": 0.72, "step": 8427 }, { "epoch": 0.24606580829756797, "grad_norm": 0.7847694339420008, "learning_rate": 4.188645579886456e-06, "loss": 0.7235, "step": 8428 }, { "epoch": 0.24609500452541533, "grad_norm": 0.7708653446417699, "learning_rate": 4.188483373884834e-06, "loss": 0.7155, "step": 8429 }, { "epoch": 0.2461242007532627, "grad_norm": 0.920782259233925, "learning_rate": 4.188321167883212e-06, "loss": 0.7798, "step": 8430 }, { "epoch": 0.24615339698111005, "grad_norm": 0.7727920100738814, "learning_rate": 4.18815896188159e-06, "loss": 0.7065, "step": 8431 }, { "epoch": 0.2461825932089574, "grad_norm": 0.8040726905116204, "learning_rate": 4.187996755879968e-06, "loss": 0.6255, "step": 8432 }, { "epoch": 0.24621178943680477, "grad_norm": 0.7287152414043053, "learning_rate": 4.187834549878346e-06, "loss": 0.6398, "step": 8433 }, { "epoch": 0.24624098566465213, "grad_norm": 0.7226299132709826, "learning_rate": 4.187672343876724e-06, "loss": 0.7007, "step": 8434 }, { "epoch": 0.2462701818924995, "grad_norm": 0.73244364237576, "learning_rate": 4.187510137875102e-06, "loss": 0.6733, "step": 8435 }, { "epoch": 0.24629937812034686, "grad_norm": 0.7417948612289093, "learning_rate": 4.187347931873479e-06, "loss": 0.7019, "step": 8436 }, { "epoch": 0.24632857434819422, "grad_norm": 0.747542418209102, "learning_rate": 4.187185725871857e-06, "loss": 0.6862, "step": 8437 }, { "epoch": 0.24635777057604158, "grad_norm": 0.7710118564790458, "learning_rate": 4.187023519870236e-06, "loss": 0.7148, "step": 8438 }, { "epoch": 0.24638696680388894, "grad_norm": 0.82728085357023, "learning_rate": 4.186861313868614e-06, "loss": 0.7918, "step": 8439 }, { "epoch": 0.2464161630317363, "grad_norm": 0.7145462027684009, "learning_rate": 4.186699107866992e-06, "loss": 0.7146, "step": 8440 }, { "epoch": 0.24644535925958366, "grad_norm": 0.7853369112390687, "learning_rate": 4.18653690186537e-06, "loss": 0.6587, "step": 8441 }, { "epoch": 0.24647455548743102, "grad_norm": 0.6845827563644774, "learning_rate": 4.186374695863747e-06, "loss": 0.5782, "step": 8442 }, { "epoch": 0.24650375171527839, "grad_norm": 0.7047729509109618, "learning_rate": 4.186212489862125e-06, "loss": 0.6005, "step": 8443 }, { "epoch": 0.24653294794312575, "grad_norm": 0.7735069584011819, "learning_rate": 4.186050283860503e-06, "loss": 0.6973, "step": 8444 }, { "epoch": 0.2465621441709731, "grad_norm": 0.7890311789827864, "learning_rate": 4.185888077858881e-06, "loss": 0.6722, "step": 8445 }, { "epoch": 0.24659134039882047, "grad_norm": 0.7357439992952642, "learning_rate": 4.185725871857259e-06, "loss": 0.6749, "step": 8446 }, { "epoch": 0.24662053662666783, "grad_norm": 0.7704060613598437, "learning_rate": 4.185563665855637e-06, "loss": 0.6567, "step": 8447 }, { "epoch": 0.2466497328545152, "grad_norm": 0.7790044168766023, "learning_rate": 4.185401459854015e-06, "loss": 0.6938, "step": 8448 }, { "epoch": 0.24667892908236255, "grad_norm": 0.7644285000978611, "learning_rate": 4.185239253852393e-06, "loss": 0.6734, "step": 8449 }, { "epoch": 0.2467081253102099, "grad_norm": 0.7263084547515777, "learning_rate": 4.185077047850771e-06, "loss": 0.6288, "step": 8450 }, { "epoch": 0.24673732153805727, "grad_norm": 0.7254834823368101, "learning_rate": 4.184914841849148e-06, "loss": 0.6376, "step": 8451 }, { "epoch": 0.24676651776590464, "grad_norm": 0.6719187252260382, "learning_rate": 4.184752635847526e-06, "loss": 0.5531, "step": 8452 }, { "epoch": 0.246795713993752, "grad_norm": 0.7075102047570327, "learning_rate": 4.184590429845904e-06, "loss": 0.6679, "step": 8453 }, { "epoch": 0.24682491022159936, "grad_norm": 0.7422309613111167, "learning_rate": 4.184428223844282e-06, "loss": 0.6779, "step": 8454 }, { "epoch": 0.24685410644944672, "grad_norm": 0.8645259905386244, "learning_rate": 4.18426601784266e-06, "loss": 0.6108, "step": 8455 }, { "epoch": 0.24688330267729408, "grad_norm": 0.734482955356571, "learning_rate": 4.184103811841038e-06, "loss": 0.6753, "step": 8456 }, { "epoch": 0.24691249890514144, "grad_norm": 0.6615506714804794, "learning_rate": 4.183941605839416e-06, "loss": 0.5703, "step": 8457 }, { "epoch": 0.24694169513298883, "grad_norm": 0.8071565659538491, "learning_rate": 4.183779399837794e-06, "loss": 0.721, "step": 8458 }, { "epoch": 0.2469708913608362, "grad_norm": 0.6964390762874871, "learning_rate": 4.183617193836172e-06, "loss": 0.5794, "step": 8459 }, { "epoch": 0.24700008758868355, "grad_norm": 0.8604394547314435, "learning_rate": 4.18345498783455e-06, "loss": 0.6488, "step": 8460 }, { "epoch": 0.24702928381653091, "grad_norm": 0.7843415799279833, "learning_rate": 4.183292781832928e-06, "loss": 0.69, "step": 8461 }, { "epoch": 0.24705848004437828, "grad_norm": 0.7233738772279941, "learning_rate": 4.183130575831306e-06, "loss": 0.6535, "step": 8462 }, { "epoch": 0.24708767627222564, "grad_norm": 0.7013887804075213, "learning_rate": 4.182968369829684e-06, "loss": 0.5935, "step": 8463 }, { "epoch": 0.247116872500073, "grad_norm": 0.7804135315987353, "learning_rate": 4.182806163828062e-06, "loss": 0.7047, "step": 8464 }, { "epoch": 0.24714606872792036, "grad_norm": 0.7719676555631171, "learning_rate": 4.1826439578264395e-06, "loss": 0.6793, "step": 8465 }, { "epoch": 0.24717526495576772, "grad_norm": 0.7504456892551091, "learning_rate": 4.1824817518248176e-06, "loss": 0.684, "step": 8466 }, { "epoch": 0.24720446118361508, "grad_norm": 0.7964790534643418, "learning_rate": 4.1823195458231956e-06, "loss": 0.7291, "step": 8467 }, { "epoch": 0.24723365741146244, "grad_norm": 0.6969650609040924, "learning_rate": 4.1821573398215736e-06, "loss": 0.6528, "step": 8468 }, { "epoch": 0.2472628536393098, "grad_norm": 0.7781035025250683, "learning_rate": 4.181995133819952e-06, "loss": 0.7001, "step": 8469 }, { "epoch": 0.24729204986715717, "grad_norm": 0.8282661087897426, "learning_rate": 4.18183292781833e-06, "loss": 0.7578, "step": 8470 }, { "epoch": 0.24732124609500453, "grad_norm": 0.7577665187462785, "learning_rate": 4.181670721816708e-06, "loss": 0.6801, "step": 8471 }, { "epoch": 0.2473504423228519, "grad_norm": 0.7342666398501311, "learning_rate": 4.181508515815086e-06, "loss": 0.7127, "step": 8472 }, { "epoch": 0.24737963855069925, "grad_norm": 1.0141806433868774, "learning_rate": 4.181346309813464e-06, "loss": 0.7374, "step": 8473 }, { "epoch": 0.2474088347785466, "grad_norm": 0.6918868752606073, "learning_rate": 4.181184103811841e-06, "loss": 0.573, "step": 8474 }, { "epoch": 0.24743803100639397, "grad_norm": 0.8427979103117742, "learning_rate": 4.181021897810219e-06, "loss": 0.668, "step": 8475 }, { "epoch": 0.24746722723424133, "grad_norm": 0.7332295617789765, "learning_rate": 4.180859691808598e-06, "loss": 0.7529, "step": 8476 }, { "epoch": 0.2474964234620887, "grad_norm": 0.7266879392503519, "learning_rate": 4.180697485806976e-06, "loss": 0.6541, "step": 8477 }, { "epoch": 0.24752561968993606, "grad_norm": 0.7429176425302804, "learning_rate": 4.180535279805354e-06, "loss": 0.6432, "step": 8478 }, { "epoch": 0.24755481591778342, "grad_norm": 0.7666280139483904, "learning_rate": 4.180373073803732e-06, "loss": 0.7123, "step": 8479 }, { "epoch": 0.24758401214563078, "grad_norm": 0.7763652083081559, "learning_rate": 4.180210867802109e-06, "loss": 0.6908, "step": 8480 }, { "epoch": 0.24761320837347814, "grad_norm": 0.7320930451774355, "learning_rate": 4.180048661800487e-06, "loss": 0.7117, "step": 8481 }, { "epoch": 0.2476424046013255, "grad_norm": 0.736856156148421, "learning_rate": 4.179886455798865e-06, "loss": 0.657, "step": 8482 }, { "epoch": 0.24767160082917286, "grad_norm": 0.7681932112785728, "learning_rate": 4.179724249797243e-06, "loss": 0.7089, "step": 8483 }, { "epoch": 0.24770079705702022, "grad_norm": 0.7019412607777041, "learning_rate": 4.179562043795621e-06, "loss": 0.6117, "step": 8484 }, { "epoch": 0.24772999328486758, "grad_norm": 0.7330210973591718, "learning_rate": 4.179399837793999e-06, "loss": 0.6537, "step": 8485 }, { "epoch": 0.24775918951271494, "grad_norm": 0.8919642777962898, "learning_rate": 4.179237631792377e-06, "loss": 0.7886, "step": 8486 }, { "epoch": 0.2477883857405623, "grad_norm": 0.6856643675369318, "learning_rate": 4.179075425790755e-06, "loss": 0.5858, "step": 8487 }, { "epoch": 0.2478175819684097, "grad_norm": 0.7575092957083193, "learning_rate": 4.178913219789133e-06, "loss": 0.6892, "step": 8488 }, { "epoch": 0.24784677819625706, "grad_norm": 0.6833900938316722, "learning_rate": 4.17875101378751e-06, "loss": 0.5921, "step": 8489 }, { "epoch": 0.24787597442410442, "grad_norm": 0.7331244863073972, "learning_rate": 4.178588807785888e-06, "loss": 0.6717, "step": 8490 }, { "epoch": 0.24790517065195178, "grad_norm": 0.7400596348175328, "learning_rate": 4.178426601784266e-06, "loss": 0.6437, "step": 8491 }, { "epoch": 0.24793436687979914, "grad_norm": 0.6893416120364592, "learning_rate": 4.178264395782644e-06, "loss": 0.6245, "step": 8492 }, { "epoch": 0.2479635631076465, "grad_norm": 0.8109943435272697, "learning_rate": 4.178102189781022e-06, "loss": 0.6926, "step": 8493 }, { "epoch": 0.24799275933549386, "grad_norm": 0.7597123608975261, "learning_rate": 4.1779399837794e-06, "loss": 0.7568, "step": 8494 }, { "epoch": 0.24802195556334122, "grad_norm": 0.7003313213050093, "learning_rate": 4.177777777777778e-06, "loss": 0.5799, "step": 8495 }, { "epoch": 0.24805115179118858, "grad_norm": 0.8039161483460974, "learning_rate": 4.177615571776156e-06, "loss": 0.6626, "step": 8496 }, { "epoch": 0.24808034801903595, "grad_norm": 0.7708215358106628, "learning_rate": 4.177453365774534e-06, "loss": 0.7014, "step": 8497 }, { "epoch": 0.2481095442468833, "grad_norm": 0.7581118137631652, "learning_rate": 4.177291159772912e-06, "loss": 0.6681, "step": 8498 }, { "epoch": 0.24813874047473067, "grad_norm": 0.7617779299219091, "learning_rate": 4.17712895377129e-06, "loss": 0.7384, "step": 8499 }, { "epoch": 0.24816793670257803, "grad_norm": 0.8703722245264197, "learning_rate": 4.176966747769668e-06, "loss": 0.6891, "step": 8500 }, { "epoch": 0.2481971329304254, "grad_norm": 0.7387660752475315, "learning_rate": 4.176804541768046e-06, "loss": 0.6914, "step": 8501 }, { "epoch": 0.24822632915827275, "grad_norm": 0.7388352802130268, "learning_rate": 4.176642335766424e-06, "loss": 0.6637, "step": 8502 }, { "epoch": 0.2482555253861201, "grad_norm": 0.7796490592389436, "learning_rate": 4.176480129764801e-06, "loss": 0.7723, "step": 8503 }, { "epoch": 0.24828472161396747, "grad_norm": 0.7016865479923207, "learning_rate": 4.176317923763179e-06, "loss": 0.601, "step": 8504 }, { "epoch": 0.24831391784181484, "grad_norm": 0.7137986287823317, "learning_rate": 4.176155717761557e-06, "loss": 0.6054, "step": 8505 }, { "epoch": 0.2483431140696622, "grad_norm": 0.7356863016242443, "learning_rate": 4.175993511759935e-06, "loss": 0.7006, "step": 8506 }, { "epoch": 0.24837231029750956, "grad_norm": 0.7040690996784451, "learning_rate": 4.175831305758313e-06, "loss": 0.6314, "step": 8507 }, { "epoch": 0.24840150652535692, "grad_norm": 0.7565504110170319, "learning_rate": 4.175669099756691e-06, "loss": 0.7012, "step": 8508 }, { "epoch": 0.24843070275320428, "grad_norm": 0.7300019930717234, "learning_rate": 4.175506893755069e-06, "loss": 0.5867, "step": 8509 }, { "epoch": 0.24845989898105164, "grad_norm": 0.7236583193702463, "learning_rate": 4.175344687753447e-06, "loss": 0.6652, "step": 8510 }, { "epoch": 0.248489095208899, "grad_norm": 0.7240839409169664, "learning_rate": 4.175182481751825e-06, "loss": 0.6779, "step": 8511 }, { "epoch": 0.24851829143674636, "grad_norm": 0.7823854857938903, "learning_rate": 4.1750202757502025e-06, "loss": 0.717, "step": 8512 }, { "epoch": 0.24854748766459372, "grad_norm": 0.7408668523079002, "learning_rate": 4.1748580697485805e-06, "loss": 0.6513, "step": 8513 }, { "epoch": 0.24857668389244109, "grad_norm": 0.7248357305985538, "learning_rate": 4.174695863746959e-06, "loss": 0.6552, "step": 8514 }, { "epoch": 0.24860588012028845, "grad_norm": 0.7813012651638124, "learning_rate": 4.174533657745337e-06, "loss": 0.678, "step": 8515 }, { "epoch": 0.2486350763481358, "grad_norm": 0.7843367510419568, "learning_rate": 4.174371451743715e-06, "loss": 0.7031, "step": 8516 }, { "epoch": 0.24866427257598317, "grad_norm": 0.6890417266449347, "learning_rate": 4.174209245742093e-06, "loss": 0.5922, "step": 8517 }, { "epoch": 0.24869346880383056, "grad_norm": 0.8875616369501386, "learning_rate": 4.1740470397404705e-06, "loss": 0.6703, "step": 8518 }, { "epoch": 0.24872266503167792, "grad_norm": 0.7186340509222484, "learning_rate": 4.1738848337388485e-06, "loss": 0.6169, "step": 8519 }, { "epoch": 0.24875186125952528, "grad_norm": 0.7249658056499806, "learning_rate": 4.1737226277372265e-06, "loss": 0.6491, "step": 8520 }, { "epoch": 0.24878105748737264, "grad_norm": 0.7332935545598673, "learning_rate": 4.1735604217356045e-06, "loss": 0.6382, "step": 8521 }, { "epoch": 0.24881025371522, "grad_norm": 0.7455998140041946, "learning_rate": 4.1733982157339825e-06, "loss": 0.6523, "step": 8522 }, { "epoch": 0.24883944994306736, "grad_norm": 0.7075401020800399, "learning_rate": 4.1732360097323606e-06, "loss": 0.6406, "step": 8523 }, { "epoch": 0.24886864617091473, "grad_norm": 0.7100271082605941, "learning_rate": 4.1730738037307386e-06, "loss": 0.6474, "step": 8524 }, { "epoch": 0.2488978423987621, "grad_norm": 0.6622411010361083, "learning_rate": 4.1729115977291166e-06, "loss": 0.5702, "step": 8525 }, { "epoch": 0.24892703862660945, "grad_norm": 0.8057488575210894, "learning_rate": 4.1727493917274946e-06, "loss": 0.6959, "step": 8526 }, { "epoch": 0.2489562348544568, "grad_norm": 0.7681366361501925, "learning_rate": 4.172587185725872e-06, "loss": 0.7218, "step": 8527 }, { "epoch": 0.24898543108230417, "grad_norm": 0.8484690033163771, "learning_rate": 4.17242497972425e-06, "loss": 0.7375, "step": 8528 }, { "epoch": 0.24901462731015153, "grad_norm": 0.9790216208547817, "learning_rate": 4.172262773722628e-06, "loss": 0.7351, "step": 8529 }, { "epoch": 0.2490438235379989, "grad_norm": 0.6978300698790092, "learning_rate": 4.172100567721006e-06, "loss": 0.6178, "step": 8530 }, { "epoch": 0.24907301976584625, "grad_norm": 0.8206888539177267, "learning_rate": 4.171938361719384e-06, "loss": 0.7648, "step": 8531 }, { "epoch": 0.24910221599369362, "grad_norm": 0.7209748916535286, "learning_rate": 4.171776155717762e-06, "loss": 0.605, "step": 8532 }, { "epoch": 0.24913141222154098, "grad_norm": 0.68682506217388, "learning_rate": 4.17161394971614e-06, "loss": 0.6075, "step": 8533 }, { "epoch": 0.24916060844938834, "grad_norm": 0.7671146674366768, "learning_rate": 4.171451743714518e-06, "loss": 0.6969, "step": 8534 }, { "epoch": 0.2491898046772357, "grad_norm": 0.7696964438723467, "learning_rate": 4.171289537712896e-06, "loss": 0.7227, "step": 8535 }, { "epoch": 0.24921900090508306, "grad_norm": 0.7619317977078479, "learning_rate": 4.171127331711274e-06, "loss": 0.6871, "step": 8536 }, { "epoch": 0.24924819713293042, "grad_norm": 0.8398363583445085, "learning_rate": 4.170965125709652e-06, "loss": 0.65, "step": 8537 }, { "epoch": 0.24927739336077778, "grad_norm": 0.7557784558228513, "learning_rate": 4.17080291970803e-06, "loss": 0.6719, "step": 8538 }, { "epoch": 0.24930658958862514, "grad_norm": 0.7724181372558092, "learning_rate": 4.170640713706408e-06, "loss": 0.7259, "step": 8539 }, { "epoch": 0.2493357858164725, "grad_norm": 0.743127699815333, "learning_rate": 4.170478507704786e-06, "loss": 0.6663, "step": 8540 }, { "epoch": 0.24936498204431987, "grad_norm": 0.7120795219418983, "learning_rate": 4.170316301703163e-06, "loss": 0.6396, "step": 8541 }, { "epoch": 0.24939417827216723, "grad_norm": 0.679959270266698, "learning_rate": 4.170154095701541e-06, "loss": 0.5541, "step": 8542 }, { "epoch": 0.2494233745000146, "grad_norm": 0.7823399356735506, "learning_rate": 4.169991889699919e-06, "loss": 0.6515, "step": 8543 }, { "epoch": 0.24945257072786195, "grad_norm": 0.7578341807478921, "learning_rate": 4.169829683698297e-06, "loss": 0.6913, "step": 8544 }, { "epoch": 0.2494817669557093, "grad_norm": 1.1289232534690814, "learning_rate": 4.169667477696675e-06, "loss": 0.7139, "step": 8545 }, { "epoch": 0.24951096318355667, "grad_norm": 0.7260275362884762, "learning_rate": 4.169505271695053e-06, "loss": 0.6187, "step": 8546 }, { "epoch": 0.24954015941140403, "grad_norm": 0.7242146613552353, "learning_rate": 4.169343065693431e-06, "loss": 0.6687, "step": 8547 }, { "epoch": 0.24956935563925142, "grad_norm": 0.7329027709130297, "learning_rate": 4.169180859691809e-06, "loss": 0.643, "step": 8548 }, { "epoch": 0.24959855186709878, "grad_norm": 0.7794883614910605, "learning_rate": 4.169018653690187e-06, "loss": 0.6984, "step": 8549 }, { "epoch": 0.24962774809494614, "grad_norm": 0.755336007435546, "learning_rate": 4.168856447688564e-06, "loss": 0.6243, "step": 8550 }, { "epoch": 0.2496569443227935, "grad_norm": 0.7924814733511599, "learning_rate": 4.168694241686943e-06, "loss": 0.7438, "step": 8551 }, { "epoch": 0.24968614055064087, "grad_norm": 0.8048665042551065, "learning_rate": 4.168532035685321e-06, "loss": 0.7169, "step": 8552 }, { "epoch": 0.24971533677848823, "grad_norm": 0.6788306015967988, "learning_rate": 4.168369829683699e-06, "loss": 0.6017, "step": 8553 }, { "epoch": 0.2497445330063356, "grad_norm": 0.7104472443111933, "learning_rate": 4.168207623682077e-06, "loss": 0.6539, "step": 8554 }, { "epoch": 0.24977372923418295, "grad_norm": 1.4352524357530705, "learning_rate": 4.168045417680455e-06, "loss": 0.7209, "step": 8555 }, { "epoch": 0.2498029254620303, "grad_norm": 0.8431777755583904, "learning_rate": 4.167883211678832e-06, "loss": 0.6827, "step": 8556 }, { "epoch": 0.24983212168987767, "grad_norm": 0.817821965760063, "learning_rate": 4.16772100567721e-06, "loss": 0.7701, "step": 8557 }, { "epoch": 0.24986131791772503, "grad_norm": 0.8011983278427123, "learning_rate": 4.167558799675588e-06, "loss": 0.7689, "step": 8558 }, { "epoch": 0.2498905141455724, "grad_norm": 0.7807164805157548, "learning_rate": 4.167396593673966e-06, "loss": 0.6737, "step": 8559 }, { "epoch": 0.24991971037341976, "grad_norm": 0.7350052812404455, "learning_rate": 4.167234387672344e-06, "loss": 0.6961, "step": 8560 }, { "epoch": 0.24994890660126712, "grad_norm": 0.7694850362879638, "learning_rate": 4.167072181670722e-06, "loss": 0.7259, "step": 8561 }, { "epoch": 0.24997810282911448, "grad_norm": 0.7347990701697419, "learning_rate": 4.1669099756691e-06, "loss": 0.7349, "step": 8562 }, { "epoch": 0.25000729905696184, "grad_norm": 0.7219969477442423, "learning_rate": 4.166747769667478e-06, "loss": 0.6905, "step": 8563 }, { "epoch": 0.2500364952848092, "grad_norm": 0.8478694218351124, "learning_rate": 4.166585563665856e-06, "loss": 0.8327, "step": 8564 }, { "epoch": 0.25006569151265656, "grad_norm": 0.726732715930879, "learning_rate": 4.1664233576642335e-06, "loss": 0.6663, "step": 8565 }, { "epoch": 0.2500948877405039, "grad_norm": 0.7808585520156892, "learning_rate": 4.1662611516626115e-06, "loss": 0.6672, "step": 8566 }, { "epoch": 0.2501240839683513, "grad_norm": 0.7207820362240409, "learning_rate": 4.1660989456609895e-06, "loss": 0.6527, "step": 8567 }, { "epoch": 0.25015328019619865, "grad_norm": 0.8150446170951636, "learning_rate": 4.1659367396593675e-06, "loss": 0.6491, "step": 8568 }, { "epoch": 0.250182476424046, "grad_norm": 0.8008849053948753, "learning_rate": 4.1657745336577455e-06, "loss": 0.706, "step": 8569 }, { "epoch": 0.25021167265189337, "grad_norm": 0.7039696597397892, "learning_rate": 4.1656123276561235e-06, "loss": 0.6406, "step": 8570 }, { "epoch": 0.25024086887974073, "grad_norm": 0.7017476156320006, "learning_rate": 4.1654501216545015e-06, "loss": 0.6547, "step": 8571 }, { "epoch": 0.2502700651075881, "grad_norm": 0.7028946930315563, "learning_rate": 4.1652879156528795e-06, "loss": 0.608, "step": 8572 }, { "epoch": 0.25029926133543545, "grad_norm": 0.7284056613460521, "learning_rate": 4.1651257096512575e-06, "loss": 0.6421, "step": 8573 }, { "epoch": 0.2503284575632828, "grad_norm": 0.694749023157709, "learning_rate": 4.1649635036496355e-06, "loss": 0.624, "step": 8574 }, { "epoch": 0.2503576537911302, "grad_norm": 0.7122679497298794, "learning_rate": 4.1648012976480135e-06, "loss": 0.6087, "step": 8575 }, { "epoch": 0.25038685001897754, "grad_norm": 0.6963273588165269, "learning_rate": 4.1646390916463915e-06, "loss": 0.5585, "step": 8576 }, { "epoch": 0.2504160462468249, "grad_norm": 0.8104482738207126, "learning_rate": 4.1644768856447695e-06, "loss": 0.745, "step": 8577 }, { "epoch": 0.25044524247467226, "grad_norm": 1.3185315945268061, "learning_rate": 4.1643146796431475e-06, "loss": 0.6627, "step": 8578 }, { "epoch": 0.2504744387025196, "grad_norm": 0.7016208523067577, "learning_rate": 4.164152473641525e-06, "loss": 0.5752, "step": 8579 }, { "epoch": 0.250503634930367, "grad_norm": 0.7696959350735896, "learning_rate": 4.163990267639903e-06, "loss": 0.5672, "step": 8580 }, { "epoch": 0.25053283115821434, "grad_norm": 0.7904861720520027, "learning_rate": 4.163828061638281e-06, "loss": 0.6845, "step": 8581 }, { "epoch": 0.2505620273860617, "grad_norm": 0.7142542774310247, "learning_rate": 4.163665855636659e-06, "loss": 0.6059, "step": 8582 }, { "epoch": 0.25059122361390906, "grad_norm": 0.7552175506203738, "learning_rate": 4.163503649635037e-06, "loss": 0.6926, "step": 8583 }, { "epoch": 0.2506204198417564, "grad_norm": 0.7828633020330651, "learning_rate": 4.163341443633415e-06, "loss": 0.7048, "step": 8584 }, { "epoch": 0.2506496160696038, "grad_norm": 0.7312897219862177, "learning_rate": 4.163179237631793e-06, "loss": 0.6363, "step": 8585 }, { "epoch": 0.25067881229745115, "grad_norm": 0.7632799182198463, "learning_rate": 4.163017031630171e-06, "loss": 0.6927, "step": 8586 }, { "epoch": 0.2507080085252985, "grad_norm": 0.7487222651034194, "learning_rate": 4.162854825628549e-06, "loss": 0.6835, "step": 8587 }, { "epoch": 0.25073720475314587, "grad_norm": 0.7410637038942652, "learning_rate": 4.162692619626926e-06, "loss": 0.6718, "step": 8588 }, { "epoch": 0.25076640098099323, "grad_norm": 0.7849963196302181, "learning_rate": 4.162530413625305e-06, "loss": 0.6866, "step": 8589 }, { "epoch": 0.2507955972088406, "grad_norm": 0.7550360039589524, "learning_rate": 4.162368207623683e-06, "loss": 0.6361, "step": 8590 }, { "epoch": 0.25082479343668795, "grad_norm": 0.8506685376599686, "learning_rate": 4.162206001622061e-06, "loss": 0.753, "step": 8591 }, { "epoch": 0.2508539896645353, "grad_norm": 0.7504584974103359, "learning_rate": 4.162043795620439e-06, "loss": 0.6745, "step": 8592 }, { "epoch": 0.25088318589238273, "grad_norm": 1.020343915804816, "learning_rate": 4.161881589618817e-06, "loss": 0.7132, "step": 8593 }, { "epoch": 0.2509123821202301, "grad_norm": 0.730290886369022, "learning_rate": 4.161719383617194e-06, "loss": 0.6868, "step": 8594 }, { "epoch": 0.25094157834807745, "grad_norm": 0.7274081661541898, "learning_rate": 4.161557177615572e-06, "loss": 0.6116, "step": 8595 }, { "epoch": 0.2509707745759248, "grad_norm": 0.7451942927263712, "learning_rate": 4.16139497161395e-06, "loss": 0.6705, "step": 8596 }, { "epoch": 0.2509999708037722, "grad_norm": 0.7599186654717105, "learning_rate": 4.161232765612328e-06, "loss": 0.6928, "step": 8597 }, { "epoch": 0.25102916703161954, "grad_norm": 1.1496402280206326, "learning_rate": 4.161070559610706e-06, "loss": 0.6467, "step": 8598 }, { "epoch": 0.2510583632594669, "grad_norm": 0.7688721459538934, "learning_rate": 4.160908353609084e-06, "loss": 0.7107, "step": 8599 }, { "epoch": 0.25108755948731426, "grad_norm": 0.7411933986271759, "learning_rate": 4.160746147607462e-06, "loss": 0.6102, "step": 8600 }, { "epoch": 0.2511167557151616, "grad_norm": 0.7850279550954445, "learning_rate": 4.16058394160584e-06, "loss": 0.673, "step": 8601 }, { "epoch": 0.251145951943009, "grad_norm": 0.7886499378645745, "learning_rate": 4.160421735604217e-06, "loss": 0.6221, "step": 8602 }, { "epoch": 0.25117514817085634, "grad_norm": 0.8179953068158448, "learning_rate": 4.160259529602595e-06, "loss": 0.7391, "step": 8603 }, { "epoch": 0.2512043443987037, "grad_norm": 0.7586083721743266, "learning_rate": 4.160097323600973e-06, "loss": 0.6907, "step": 8604 }, { "epoch": 0.25123354062655107, "grad_norm": 0.7364128194245773, "learning_rate": 4.159935117599351e-06, "loss": 0.672, "step": 8605 }, { "epoch": 0.2512627368543984, "grad_norm": 0.801283463918581, "learning_rate": 4.159772911597729e-06, "loss": 0.683, "step": 8606 }, { "epoch": 0.2512919330822458, "grad_norm": 0.7798032027424846, "learning_rate": 4.159610705596107e-06, "loss": 0.7626, "step": 8607 }, { "epoch": 0.25132112931009315, "grad_norm": 0.7150997248742835, "learning_rate": 4.159448499594485e-06, "loss": 0.6138, "step": 8608 }, { "epoch": 0.2513503255379405, "grad_norm": 0.7324393769794912, "learning_rate": 4.159286293592863e-06, "loss": 0.6707, "step": 8609 }, { "epoch": 0.25137952176578787, "grad_norm": 0.8274061769011914, "learning_rate": 4.159124087591241e-06, "loss": 0.759, "step": 8610 }, { "epoch": 0.25140871799363523, "grad_norm": 0.7032840657083026, "learning_rate": 4.158961881589619e-06, "loss": 0.6047, "step": 8611 }, { "epoch": 0.2514379142214826, "grad_norm": 0.7561991644219932, "learning_rate": 4.158799675587997e-06, "loss": 0.6596, "step": 8612 }, { "epoch": 0.25146711044932996, "grad_norm": 0.8138522347602621, "learning_rate": 4.158637469586375e-06, "loss": 0.7765, "step": 8613 }, { "epoch": 0.2514963066771773, "grad_norm": 0.7330079211106311, "learning_rate": 4.158475263584753e-06, "loss": 0.6282, "step": 8614 }, { "epoch": 0.2515255029050247, "grad_norm": 0.7604578045437506, "learning_rate": 4.158313057583131e-06, "loss": 0.7036, "step": 8615 }, { "epoch": 0.25155469913287204, "grad_norm": 0.7090274932777427, "learning_rate": 4.158150851581509e-06, "loss": 0.6715, "step": 8616 }, { "epoch": 0.2515838953607194, "grad_norm": 0.7429153850991189, "learning_rate": 4.157988645579886e-06, "loss": 0.6792, "step": 8617 }, { "epoch": 0.25161309158856676, "grad_norm": 0.7577294221833386, "learning_rate": 4.1578264395782644e-06, "loss": 0.7085, "step": 8618 }, { "epoch": 0.2516422878164141, "grad_norm": 0.6908964273744382, "learning_rate": 4.1576642335766424e-06, "loss": 0.621, "step": 8619 }, { "epoch": 0.2516714840442615, "grad_norm": 0.6955590003349792, "learning_rate": 4.1575020275750204e-06, "loss": 0.6099, "step": 8620 }, { "epoch": 0.25170068027210885, "grad_norm": 0.7374525415541253, "learning_rate": 4.1573398215733984e-06, "loss": 0.6651, "step": 8621 }, { "epoch": 0.2517298764999562, "grad_norm": 0.7335605575378928, "learning_rate": 4.1571776155717765e-06, "loss": 0.6531, "step": 8622 }, { "epoch": 0.25175907272780357, "grad_norm": 0.7868327313509533, "learning_rate": 4.1570154095701545e-06, "loss": 0.7335, "step": 8623 }, { "epoch": 0.25178826895565093, "grad_norm": 0.7921702813119101, "learning_rate": 4.1568532035685325e-06, "loss": 0.7023, "step": 8624 }, { "epoch": 0.2518174651834983, "grad_norm": 0.6939472009456753, "learning_rate": 4.1566909975669105e-06, "loss": 0.6863, "step": 8625 }, { "epoch": 0.25184666141134565, "grad_norm": 0.7231362771079384, "learning_rate": 4.156528791565288e-06, "loss": 0.6619, "step": 8626 }, { "epoch": 0.251875857639193, "grad_norm": 0.886007995051617, "learning_rate": 4.1563665855636665e-06, "loss": 0.7564, "step": 8627 }, { "epoch": 0.2519050538670404, "grad_norm": 0.7131552058296365, "learning_rate": 4.1562043795620445e-06, "loss": 0.6331, "step": 8628 }, { "epoch": 0.25193425009488774, "grad_norm": 0.8036449393247035, "learning_rate": 4.1560421735604225e-06, "loss": 0.7844, "step": 8629 }, { "epoch": 0.2519634463227351, "grad_norm": 0.7017549336493178, "learning_rate": 4.1558799675588005e-06, "loss": 0.6252, "step": 8630 }, { "epoch": 0.25199264255058246, "grad_norm": 0.761250530571341, "learning_rate": 4.1557177615571785e-06, "loss": 0.6484, "step": 8631 }, { "epoch": 0.2520218387784298, "grad_norm": 0.7127213698350203, "learning_rate": 4.155555555555556e-06, "loss": 0.6543, "step": 8632 }, { "epoch": 0.2520510350062772, "grad_norm": 0.743910362606585, "learning_rate": 4.155393349553934e-06, "loss": 0.7862, "step": 8633 }, { "epoch": 0.25208023123412454, "grad_norm": 0.7161870697022372, "learning_rate": 4.155231143552312e-06, "loss": 0.6556, "step": 8634 }, { "epoch": 0.2521094274619719, "grad_norm": 0.7167053016495434, "learning_rate": 4.15506893755069e-06, "loss": 0.6394, "step": 8635 }, { "epoch": 0.25213862368981926, "grad_norm": 0.7193961270894058, "learning_rate": 4.154906731549068e-06, "loss": 0.638, "step": 8636 }, { "epoch": 0.2521678199176666, "grad_norm": 0.7938904581967194, "learning_rate": 4.154744525547446e-06, "loss": 0.6392, "step": 8637 }, { "epoch": 0.252197016145514, "grad_norm": 0.7152255722736038, "learning_rate": 4.154582319545824e-06, "loss": 0.6069, "step": 8638 }, { "epoch": 0.25222621237336135, "grad_norm": 0.7829274168954008, "learning_rate": 4.154420113544202e-06, "loss": 0.7775, "step": 8639 }, { "epoch": 0.2522554086012087, "grad_norm": 0.7355745332029626, "learning_rate": 4.154257907542579e-06, "loss": 0.6426, "step": 8640 }, { "epoch": 0.25228460482905607, "grad_norm": 0.7130874066985967, "learning_rate": 4.154095701540957e-06, "loss": 0.5991, "step": 8641 }, { "epoch": 0.25231380105690343, "grad_norm": 0.7497932299540248, "learning_rate": 4.153933495539335e-06, "loss": 0.6591, "step": 8642 }, { "epoch": 0.2523429972847508, "grad_norm": 0.8663850864088926, "learning_rate": 4.153771289537713e-06, "loss": 0.7136, "step": 8643 }, { "epoch": 0.25237219351259815, "grad_norm": 0.7394972599427101, "learning_rate": 4.153609083536091e-06, "loss": 0.6702, "step": 8644 }, { "epoch": 0.2524013897404455, "grad_norm": 0.7949331933427468, "learning_rate": 4.153446877534469e-06, "loss": 0.7761, "step": 8645 }, { "epoch": 0.2524305859682929, "grad_norm": 0.7457172801159702, "learning_rate": 4.153284671532847e-06, "loss": 0.6733, "step": 8646 }, { "epoch": 0.25245978219614024, "grad_norm": 0.7550053804264446, "learning_rate": 4.153122465531225e-06, "loss": 0.6519, "step": 8647 }, { "epoch": 0.2524889784239876, "grad_norm": 0.9153073140836137, "learning_rate": 4.152960259529603e-06, "loss": 0.7906, "step": 8648 }, { "epoch": 0.25251817465183496, "grad_norm": 0.8308086326072134, "learning_rate": 4.152798053527981e-06, "loss": 0.6699, "step": 8649 }, { "epoch": 0.2525473708796823, "grad_norm": 0.8263444354735172, "learning_rate": 4.152635847526359e-06, "loss": 0.7865, "step": 8650 }, { "epoch": 0.2525765671075297, "grad_norm": 0.7760725564696895, "learning_rate": 4.152473641524737e-06, "loss": 0.7304, "step": 8651 }, { "epoch": 0.25260576333537704, "grad_norm": 0.754674347885494, "learning_rate": 4.152311435523115e-06, "loss": 0.7585, "step": 8652 }, { "epoch": 0.2526349595632244, "grad_norm": 0.7608155619993056, "learning_rate": 4.152149229521493e-06, "loss": 0.6833, "step": 8653 }, { "epoch": 0.2526641557910718, "grad_norm": 0.7738263601134651, "learning_rate": 4.151987023519871e-06, "loss": 0.6295, "step": 8654 }, { "epoch": 0.2526933520189192, "grad_norm": 0.7533641855119202, "learning_rate": 4.151824817518248e-06, "loss": 0.6378, "step": 8655 }, { "epoch": 0.25272254824676654, "grad_norm": 0.727125514384377, "learning_rate": 4.151662611516626e-06, "loss": 0.6361, "step": 8656 }, { "epoch": 0.2527517444746139, "grad_norm": 0.7478095608136861, "learning_rate": 4.151500405515004e-06, "loss": 0.6861, "step": 8657 }, { "epoch": 0.25278094070246127, "grad_norm": 0.7197576929089492, "learning_rate": 4.151338199513382e-06, "loss": 0.6523, "step": 8658 }, { "epoch": 0.2528101369303086, "grad_norm": 0.7385500311332404, "learning_rate": 4.15117599351176e-06, "loss": 0.7092, "step": 8659 }, { "epoch": 0.252839333158156, "grad_norm": 0.7342266290793071, "learning_rate": 4.151013787510138e-06, "loss": 0.6827, "step": 8660 }, { "epoch": 0.25286852938600335, "grad_norm": 0.7779490598610965, "learning_rate": 4.150851581508516e-06, "loss": 0.6894, "step": 8661 }, { "epoch": 0.2528977256138507, "grad_norm": 0.8174214705287205, "learning_rate": 4.150689375506894e-06, "loss": 0.7597, "step": 8662 }, { "epoch": 0.25292692184169807, "grad_norm": 0.8160551250876213, "learning_rate": 4.150527169505272e-06, "loss": 0.7895, "step": 8663 }, { "epoch": 0.25295611806954543, "grad_norm": 0.7677585535184767, "learning_rate": 4.150364963503649e-06, "loss": 0.6931, "step": 8664 }, { "epoch": 0.2529853142973928, "grad_norm": 0.7385009232098597, "learning_rate": 4.150202757502028e-06, "loss": 0.6726, "step": 8665 }, { "epoch": 0.25301451052524015, "grad_norm": 0.9030682579801328, "learning_rate": 4.150040551500406e-06, "loss": 0.68, "step": 8666 }, { "epoch": 0.2530437067530875, "grad_norm": 0.732359921780786, "learning_rate": 4.149878345498784e-06, "loss": 0.6864, "step": 8667 }, { "epoch": 0.2530729029809349, "grad_norm": 0.7565935065818205, "learning_rate": 4.149716139497162e-06, "loss": 0.787, "step": 8668 }, { "epoch": 0.25310209920878224, "grad_norm": 0.750459220867186, "learning_rate": 4.14955393349554e-06, "loss": 0.6925, "step": 8669 }, { "epoch": 0.2531312954366296, "grad_norm": 0.7501141280635074, "learning_rate": 4.149391727493917e-06, "loss": 0.7248, "step": 8670 }, { "epoch": 0.25316049166447696, "grad_norm": 0.7835828655514279, "learning_rate": 4.149229521492295e-06, "loss": 0.7097, "step": 8671 }, { "epoch": 0.2531896878923243, "grad_norm": 0.7661827759737655, "learning_rate": 4.149067315490673e-06, "loss": 0.7611, "step": 8672 }, { "epoch": 0.2532188841201717, "grad_norm": 0.7137135686621329, "learning_rate": 4.148905109489051e-06, "loss": 0.6045, "step": 8673 }, { "epoch": 0.25324808034801904, "grad_norm": 0.7688040208395353, "learning_rate": 4.148742903487429e-06, "loss": 0.7607, "step": 8674 }, { "epoch": 0.2532772765758664, "grad_norm": 0.7496873855166396, "learning_rate": 4.1485806974858074e-06, "loss": 0.6827, "step": 8675 }, { "epoch": 0.25330647280371377, "grad_norm": 0.7097006398261003, "learning_rate": 4.1484184914841854e-06, "loss": 0.6435, "step": 8676 }, { "epoch": 0.25333566903156113, "grad_norm": 0.7569625528428288, "learning_rate": 4.1482562854825634e-06, "loss": 0.7754, "step": 8677 }, { "epoch": 0.2533648652594085, "grad_norm": 0.7169069080546462, "learning_rate": 4.148094079480941e-06, "loss": 0.6587, "step": 8678 }, { "epoch": 0.25339406148725585, "grad_norm": 0.7244072700849201, "learning_rate": 4.147931873479319e-06, "loss": 0.6568, "step": 8679 }, { "epoch": 0.2534232577151032, "grad_norm": 0.8164614651032375, "learning_rate": 4.147769667477697e-06, "loss": 0.7573, "step": 8680 }, { "epoch": 0.2534524539429506, "grad_norm": 0.7225355626700765, "learning_rate": 4.147607461476075e-06, "loss": 0.619, "step": 8681 }, { "epoch": 0.25348165017079793, "grad_norm": 1.0499282484334524, "learning_rate": 4.147445255474453e-06, "loss": 0.6842, "step": 8682 }, { "epoch": 0.2535108463986453, "grad_norm": 0.719425871398009, "learning_rate": 4.147283049472831e-06, "loss": 0.6484, "step": 8683 }, { "epoch": 0.25354004262649266, "grad_norm": 0.7701980663022763, "learning_rate": 4.147120843471209e-06, "loss": 0.6343, "step": 8684 }, { "epoch": 0.25356923885434, "grad_norm": 0.7422857576845485, "learning_rate": 4.146958637469587e-06, "loss": 0.7151, "step": 8685 }, { "epoch": 0.2535984350821874, "grad_norm": 0.7371251396386728, "learning_rate": 4.146796431467965e-06, "loss": 0.6634, "step": 8686 }, { "epoch": 0.25362763131003474, "grad_norm": 0.7949589525222965, "learning_rate": 4.146634225466343e-06, "loss": 0.7152, "step": 8687 }, { "epoch": 0.2536568275378821, "grad_norm": 0.7280963829622138, "learning_rate": 4.146472019464721e-06, "loss": 0.6498, "step": 8688 }, { "epoch": 0.25368602376572946, "grad_norm": 0.7427075117264774, "learning_rate": 4.146309813463099e-06, "loss": 0.6442, "step": 8689 }, { "epoch": 0.2537152199935768, "grad_norm": 0.7387899436648115, "learning_rate": 4.146147607461477e-06, "loss": 0.6781, "step": 8690 }, { "epoch": 0.2537444162214242, "grad_norm": 0.7553786832624397, "learning_rate": 4.145985401459855e-06, "loss": 0.6913, "step": 8691 }, { "epoch": 0.25377361244927155, "grad_norm": 0.7707826202158966, "learning_rate": 4.145823195458233e-06, "loss": 0.7374, "step": 8692 }, { "epoch": 0.2538028086771189, "grad_norm": 0.7723045308821834, "learning_rate": 4.14566098945661e-06, "loss": 0.7043, "step": 8693 }, { "epoch": 0.25383200490496627, "grad_norm": 0.7506015025163556, "learning_rate": 4.145498783454988e-06, "loss": 0.6886, "step": 8694 }, { "epoch": 0.25386120113281363, "grad_norm": 0.7624759848962279, "learning_rate": 4.145336577453366e-06, "loss": 0.7266, "step": 8695 }, { "epoch": 0.253890397360661, "grad_norm": 0.7731531584667031, "learning_rate": 4.145174371451744e-06, "loss": 0.6788, "step": 8696 }, { "epoch": 0.25391959358850835, "grad_norm": 0.7284818567707311, "learning_rate": 4.145012165450122e-06, "loss": 0.6293, "step": 8697 }, { "epoch": 0.2539487898163557, "grad_norm": 0.7652978027783299, "learning_rate": 4.1448499594485e-06, "loss": 0.6607, "step": 8698 }, { "epoch": 0.2539779860442031, "grad_norm": 0.7751284005078516, "learning_rate": 4.144687753446878e-06, "loss": 0.7154, "step": 8699 }, { "epoch": 0.25400718227205044, "grad_norm": 0.7767577710068182, "learning_rate": 4.144525547445256e-06, "loss": 0.7698, "step": 8700 }, { "epoch": 0.2540363784998978, "grad_norm": 0.7329525632913797, "learning_rate": 4.144363341443634e-06, "loss": 0.6428, "step": 8701 }, { "epoch": 0.25406557472774516, "grad_norm": 0.7068781017954902, "learning_rate": 4.144201135442012e-06, "loss": 0.5973, "step": 8702 }, { "epoch": 0.2540947709555925, "grad_norm": 0.7932283071437285, "learning_rate": 4.14403892944039e-06, "loss": 0.763, "step": 8703 }, { "epoch": 0.2541239671834399, "grad_norm": 0.6765120365273847, "learning_rate": 4.143876723438768e-06, "loss": 0.5944, "step": 8704 }, { "epoch": 0.25415316341128724, "grad_norm": 1.084360348032469, "learning_rate": 4.143714517437146e-06, "loss": 0.7016, "step": 8705 }, { "epoch": 0.2541823596391346, "grad_norm": 0.787772924255545, "learning_rate": 4.143552311435524e-06, "loss": 0.8133, "step": 8706 }, { "epoch": 0.25421155586698196, "grad_norm": 0.7041821404246399, "learning_rate": 4.143390105433901e-06, "loss": 0.5978, "step": 8707 }, { "epoch": 0.2542407520948293, "grad_norm": 0.7091981441877483, "learning_rate": 4.143227899432279e-06, "loss": 0.6023, "step": 8708 }, { "epoch": 0.2542699483226767, "grad_norm": 0.7122932566650035, "learning_rate": 4.143065693430657e-06, "loss": 0.6292, "step": 8709 }, { "epoch": 0.25429914455052405, "grad_norm": 0.7266535576720832, "learning_rate": 4.142903487429035e-06, "loss": 0.6782, "step": 8710 }, { "epoch": 0.2543283407783714, "grad_norm": 0.7471447632709287, "learning_rate": 4.142741281427413e-06, "loss": 0.6516, "step": 8711 }, { "epoch": 0.25435753700621877, "grad_norm": 0.7615417344321752, "learning_rate": 4.142579075425791e-06, "loss": 0.7464, "step": 8712 }, { "epoch": 0.25438673323406613, "grad_norm": 0.87319369239469, "learning_rate": 4.142416869424169e-06, "loss": 0.8066, "step": 8713 }, { "epoch": 0.25441592946191355, "grad_norm": 0.6753357910893438, "learning_rate": 4.142254663422547e-06, "loss": 0.6106, "step": 8714 }, { "epoch": 0.2544451256897609, "grad_norm": 0.7517089502272557, "learning_rate": 4.142092457420925e-06, "loss": 0.6779, "step": 8715 }, { "epoch": 0.25447432191760827, "grad_norm": 0.7165670551840031, "learning_rate": 4.141930251419302e-06, "loss": 0.6349, "step": 8716 }, { "epoch": 0.25450351814545563, "grad_norm": 0.7965052383079134, "learning_rate": 4.14176804541768e-06, "loss": 0.7363, "step": 8717 }, { "epoch": 0.254532714373303, "grad_norm": 0.7521391235913473, "learning_rate": 4.141605839416058e-06, "loss": 0.705, "step": 8718 }, { "epoch": 0.25456191060115035, "grad_norm": 0.7701000248048118, "learning_rate": 4.141443633414436e-06, "loss": 0.7149, "step": 8719 }, { "epoch": 0.2545911068289977, "grad_norm": 0.7413704239066459, "learning_rate": 4.141281427412814e-06, "loss": 0.6937, "step": 8720 }, { "epoch": 0.2546203030568451, "grad_norm": 0.7900882518562591, "learning_rate": 4.141119221411193e-06, "loss": 0.644, "step": 8721 }, { "epoch": 0.25464949928469244, "grad_norm": 0.8276802808936047, "learning_rate": 4.14095701540957e-06, "loss": 0.7746, "step": 8722 }, { "epoch": 0.2546786955125398, "grad_norm": 0.7184083094571493, "learning_rate": 4.140794809407948e-06, "loss": 0.6374, "step": 8723 }, { "epoch": 0.25470789174038716, "grad_norm": 0.8807767526256222, "learning_rate": 4.140632603406326e-06, "loss": 0.6891, "step": 8724 }, { "epoch": 0.2547370879682345, "grad_norm": 0.7401646650500021, "learning_rate": 4.140470397404704e-06, "loss": 0.6795, "step": 8725 }, { "epoch": 0.2547662841960819, "grad_norm": 0.757981524716189, "learning_rate": 4.140308191403082e-06, "loss": 0.6184, "step": 8726 }, { "epoch": 0.25479548042392924, "grad_norm": 0.746015256062676, "learning_rate": 4.14014598540146e-06, "loss": 0.6812, "step": 8727 }, { "epoch": 0.2548246766517766, "grad_norm": 0.7303550166291278, "learning_rate": 4.139983779399838e-06, "loss": 0.6865, "step": 8728 }, { "epoch": 0.25485387287962397, "grad_norm": 0.7812556043266629, "learning_rate": 4.139821573398216e-06, "loss": 0.724, "step": 8729 }, { "epoch": 0.2548830691074713, "grad_norm": 0.7089854627450338, "learning_rate": 4.139659367396594e-06, "loss": 0.5346, "step": 8730 }, { "epoch": 0.2549122653353187, "grad_norm": 0.7974568120369152, "learning_rate": 4.1394971613949716e-06, "loss": 0.7003, "step": 8731 }, { "epoch": 0.25494146156316605, "grad_norm": 0.7255305469457467, "learning_rate": 4.13933495539335e-06, "loss": 0.611, "step": 8732 }, { "epoch": 0.2549706577910134, "grad_norm": 0.7522265401419233, "learning_rate": 4.139172749391728e-06, "loss": 0.6785, "step": 8733 }, { "epoch": 0.25499985401886077, "grad_norm": 0.7354111549011815, "learning_rate": 4.139010543390106e-06, "loss": 0.6772, "step": 8734 }, { "epoch": 0.25502905024670813, "grad_norm": 0.7596875320044163, "learning_rate": 4.138848337388484e-06, "loss": 0.6276, "step": 8735 }, { "epoch": 0.2550582464745555, "grad_norm": 0.688480331173202, "learning_rate": 4.138686131386862e-06, "loss": 0.5821, "step": 8736 }, { "epoch": 0.25508744270240286, "grad_norm": 0.7874413223171073, "learning_rate": 4.13852392538524e-06, "loss": 0.6679, "step": 8737 }, { "epoch": 0.2551166389302502, "grad_norm": 0.7285639158323587, "learning_rate": 4.138361719383618e-06, "loss": 0.6894, "step": 8738 }, { "epoch": 0.2551458351580976, "grad_norm": 0.8106259230112474, "learning_rate": 4.138199513381996e-06, "loss": 0.7713, "step": 8739 }, { "epoch": 0.25517503138594494, "grad_norm": 0.8288816917451843, "learning_rate": 4.138037307380374e-06, "loss": 0.7051, "step": 8740 }, { "epoch": 0.2552042276137923, "grad_norm": 0.7136946692466835, "learning_rate": 4.137875101378752e-06, "loss": 0.6636, "step": 8741 }, { "epoch": 0.25523342384163966, "grad_norm": 0.7259177531936482, "learning_rate": 4.13771289537713e-06, "loss": 0.6092, "step": 8742 }, { "epoch": 0.255262620069487, "grad_norm": 0.8533138160104258, "learning_rate": 4.137550689375508e-06, "loss": 0.7838, "step": 8743 }, { "epoch": 0.2552918162973344, "grad_norm": 0.8373986649342179, "learning_rate": 4.137388483373886e-06, "loss": 0.7103, "step": 8744 }, { "epoch": 0.25532101252518175, "grad_norm": 0.7897591658618986, "learning_rate": 4.137226277372263e-06, "loss": 0.7033, "step": 8745 }, { "epoch": 0.2553502087530291, "grad_norm": 0.7535189992808873, "learning_rate": 4.137064071370641e-06, "loss": 0.6745, "step": 8746 }, { "epoch": 0.25537940498087647, "grad_norm": 0.7582793673463154, "learning_rate": 4.136901865369019e-06, "loss": 0.685, "step": 8747 }, { "epoch": 0.25540860120872383, "grad_norm": 0.7356749866418556, "learning_rate": 4.136739659367397e-06, "loss": 0.6406, "step": 8748 }, { "epoch": 0.2554377974365712, "grad_norm": 0.7696977444445087, "learning_rate": 4.136577453365775e-06, "loss": 0.6555, "step": 8749 }, { "epoch": 0.25546699366441855, "grad_norm": 0.729241556666642, "learning_rate": 4.136415247364153e-06, "loss": 0.7193, "step": 8750 }, { "epoch": 0.2554961898922659, "grad_norm": 0.6672625498848256, "learning_rate": 4.136253041362531e-06, "loss": 0.5575, "step": 8751 }, { "epoch": 0.2555253861201133, "grad_norm": 0.7468379643282459, "learning_rate": 4.136090835360909e-06, "loss": 0.7018, "step": 8752 }, { "epoch": 0.25555458234796063, "grad_norm": 0.9098363070123796, "learning_rate": 4.135928629359287e-06, "loss": 0.6968, "step": 8753 }, { "epoch": 0.255583778575808, "grad_norm": 0.7851390462341722, "learning_rate": 4.135766423357664e-06, "loss": 0.7055, "step": 8754 }, { "epoch": 0.25561297480365536, "grad_norm": 0.7872091049524577, "learning_rate": 4.135604217356042e-06, "loss": 0.7698, "step": 8755 }, { "epoch": 0.2556421710315027, "grad_norm": 0.758078723188112, "learning_rate": 4.13544201135442e-06, "loss": 0.6668, "step": 8756 }, { "epoch": 0.2556713672593501, "grad_norm": 0.7537703260598317, "learning_rate": 4.135279805352798e-06, "loss": 0.6782, "step": 8757 }, { "epoch": 0.25570056348719744, "grad_norm": 0.6552657497797155, "learning_rate": 4.135117599351176e-06, "loss": 0.5607, "step": 8758 }, { "epoch": 0.2557297597150448, "grad_norm": 0.7421253802644485, "learning_rate": 4.134955393349555e-06, "loss": 0.6632, "step": 8759 }, { "epoch": 0.25575895594289216, "grad_norm": 0.7025742293488216, "learning_rate": 4.134793187347932e-06, "loss": 0.5744, "step": 8760 }, { "epoch": 0.2557881521707395, "grad_norm": 0.906873863332232, "learning_rate": 4.13463098134631e-06, "loss": 0.7306, "step": 8761 }, { "epoch": 0.2558173483985869, "grad_norm": 0.6944197751799723, "learning_rate": 4.134468775344688e-06, "loss": 0.5991, "step": 8762 }, { "epoch": 0.25584654462643425, "grad_norm": 0.7799490769521052, "learning_rate": 4.134306569343066e-06, "loss": 0.7376, "step": 8763 }, { "epoch": 0.2558757408542816, "grad_norm": 0.7765493319873896, "learning_rate": 4.134144363341444e-06, "loss": 0.7019, "step": 8764 }, { "epoch": 0.25590493708212897, "grad_norm": 0.7499034305056486, "learning_rate": 4.133982157339822e-06, "loss": 0.6463, "step": 8765 }, { "epoch": 0.25593413330997633, "grad_norm": 0.753261604221952, "learning_rate": 4.1338199513382e-06, "loss": 0.6343, "step": 8766 }, { "epoch": 0.2559633295378237, "grad_norm": 0.7342575575032052, "learning_rate": 4.133657745336578e-06, "loss": 0.6525, "step": 8767 }, { "epoch": 0.25599252576567105, "grad_norm": 0.7293194636748118, "learning_rate": 4.133495539334956e-06, "loss": 0.708, "step": 8768 }, { "epoch": 0.2560217219935184, "grad_norm": 0.741737570093105, "learning_rate": 4.133333333333333e-06, "loss": 0.6432, "step": 8769 }, { "epoch": 0.2560509182213658, "grad_norm": 0.7767795575582841, "learning_rate": 4.133171127331711e-06, "loss": 0.7338, "step": 8770 }, { "epoch": 0.25608011444921314, "grad_norm": 0.9655387395974154, "learning_rate": 4.133008921330089e-06, "loss": 0.7374, "step": 8771 }, { "epoch": 0.2561093106770605, "grad_norm": 0.8791097800843587, "learning_rate": 4.132846715328467e-06, "loss": 0.7932, "step": 8772 }, { "epoch": 0.25613850690490786, "grad_norm": 0.8207309409360449, "learning_rate": 4.132684509326845e-06, "loss": 0.8004, "step": 8773 }, { "epoch": 0.2561677031327553, "grad_norm": 0.7416136169984531, "learning_rate": 4.132522303325223e-06, "loss": 0.6967, "step": 8774 }, { "epoch": 0.25619689936060264, "grad_norm": 0.8421695222380352, "learning_rate": 4.132360097323601e-06, "loss": 0.7379, "step": 8775 }, { "epoch": 0.25622609558845, "grad_norm": 0.7649444511304226, "learning_rate": 4.132197891321979e-06, "loss": 0.7461, "step": 8776 }, { "epoch": 0.25625529181629736, "grad_norm": 0.7412718561207992, "learning_rate": 4.132035685320357e-06, "loss": 0.5867, "step": 8777 }, { "epoch": 0.2562844880441447, "grad_norm": 0.7890499393323713, "learning_rate": 4.131873479318735e-06, "loss": 0.75, "step": 8778 }, { "epoch": 0.2563136842719921, "grad_norm": 0.7129483548607859, "learning_rate": 4.131711273317113e-06, "loss": 0.6619, "step": 8779 }, { "epoch": 0.25634288049983944, "grad_norm": 0.6751495581019091, "learning_rate": 4.131549067315491e-06, "loss": 0.5898, "step": 8780 }, { "epoch": 0.2563720767276868, "grad_norm": 0.7423327371498204, "learning_rate": 4.131386861313869e-06, "loss": 0.5797, "step": 8781 }, { "epoch": 0.25640127295553417, "grad_norm": 0.7448056170069157, "learning_rate": 4.131224655312247e-06, "loss": 0.5893, "step": 8782 }, { "epoch": 0.2564304691833815, "grad_norm": 0.7322769395761383, "learning_rate": 4.1310624493106245e-06, "loss": 0.6281, "step": 8783 }, { "epoch": 0.2564596654112289, "grad_norm": 0.7095061724195211, "learning_rate": 4.1309002433090025e-06, "loss": 0.6583, "step": 8784 }, { "epoch": 0.25648886163907625, "grad_norm": 0.666008119929678, "learning_rate": 4.1307380373073805e-06, "loss": 0.5709, "step": 8785 }, { "epoch": 0.2565180578669236, "grad_norm": 0.6986408478426029, "learning_rate": 4.1305758313057586e-06, "loss": 0.6296, "step": 8786 }, { "epoch": 0.25654725409477097, "grad_norm": 0.7361556207009387, "learning_rate": 4.1304136253041366e-06, "loss": 0.5963, "step": 8787 }, { "epoch": 0.25657645032261833, "grad_norm": 0.7285635860269842, "learning_rate": 4.1302514193025146e-06, "loss": 0.6444, "step": 8788 }, { "epoch": 0.2566056465504657, "grad_norm": 0.736998577473655, "learning_rate": 4.1300892133008926e-06, "loss": 0.6709, "step": 8789 }, { "epoch": 0.25663484277831305, "grad_norm": 0.7112056840127857, "learning_rate": 4.129927007299271e-06, "loss": 0.6227, "step": 8790 }, { "epoch": 0.2566640390061604, "grad_norm": 0.8053953586150867, "learning_rate": 4.129764801297649e-06, "loss": 0.6574, "step": 8791 }, { "epoch": 0.2566932352340078, "grad_norm": 0.7544621904173853, "learning_rate": 4.129602595296026e-06, "loss": 0.6715, "step": 8792 }, { "epoch": 0.25672243146185514, "grad_norm": 0.7517688605780432, "learning_rate": 4.129440389294404e-06, "loss": 0.6978, "step": 8793 }, { "epoch": 0.2567516276897025, "grad_norm": 0.6847235542909844, "learning_rate": 4.129278183292782e-06, "loss": 0.5977, "step": 8794 }, { "epoch": 0.25678082391754986, "grad_norm": 0.7372225244081458, "learning_rate": 4.12911597729116e-06, "loss": 0.6481, "step": 8795 }, { "epoch": 0.2568100201453972, "grad_norm": 0.8344651114867336, "learning_rate": 4.128953771289538e-06, "loss": 0.6496, "step": 8796 }, { "epoch": 0.2568392163732446, "grad_norm": 0.7499044824360929, "learning_rate": 4.128791565287917e-06, "loss": 0.5999, "step": 8797 }, { "epoch": 0.25686841260109194, "grad_norm": 0.7329465396788648, "learning_rate": 4.128629359286294e-06, "loss": 0.6533, "step": 8798 }, { "epoch": 0.2568976088289393, "grad_norm": 0.714944453621887, "learning_rate": 4.128467153284672e-06, "loss": 0.6141, "step": 8799 }, { "epoch": 0.25692680505678667, "grad_norm": 0.7338665178908537, "learning_rate": 4.12830494728305e-06, "loss": 0.6435, "step": 8800 }, { "epoch": 0.25695600128463403, "grad_norm": 0.7498103955824991, "learning_rate": 4.128142741281428e-06, "loss": 0.6895, "step": 8801 }, { "epoch": 0.2569851975124814, "grad_norm": 0.7715016555437536, "learning_rate": 4.127980535279806e-06, "loss": 0.6789, "step": 8802 }, { "epoch": 0.25701439374032875, "grad_norm": 0.8352298105496144, "learning_rate": 4.127818329278184e-06, "loss": 0.7172, "step": 8803 }, { "epoch": 0.2570435899681761, "grad_norm": 0.7365031532323004, "learning_rate": 4.127656123276562e-06, "loss": 0.6696, "step": 8804 }, { "epoch": 0.2570727861960235, "grad_norm": 0.6673620942172752, "learning_rate": 4.12749391727494e-06, "loss": 0.5998, "step": 8805 }, { "epoch": 0.25710198242387083, "grad_norm": 0.724680370845097, "learning_rate": 4.127331711273318e-06, "loss": 0.6446, "step": 8806 }, { "epoch": 0.2571311786517182, "grad_norm": 0.794148734114382, "learning_rate": 4.127169505271695e-06, "loss": 0.6957, "step": 8807 }, { "epoch": 0.25716037487956556, "grad_norm": 0.8797953597561818, "learning_rate": 4.127007299270073e-06, "loss": 0.7988, "step": 8808 }, { "epoch": 0.2571895711074129, "grad_norm": 0.793660910818178, "learning_rate": 4.126845093268451e-06, "loss": 0.6663, "step": 8809 }, { "epoch": 0.2572187673352603, "grad_norm": 0.9041720832863579, "learning_rate": 4.126682887266829e-06, "loss": 0.7777, "step": 8810 }, { "epoch": 0.25724796356310764, "grad_norm": 0.6962272324339712, "learning_rate": 4.126520681265207e-06, "loss": 0.5791, "step": 8811 }, { "epoch": 0.257277159790955, "grad_norm": 0.8242136985566445, "learning_rate": 4.126358475263585e-06, "loss": 0.7326, "step": 8812 }, { "epoch": 0.25730635601880236, "grad_norm": 0.8447387364925125, "learning_rate": 4.126196269261963e-06, "loss": 0.7278, "step": 8813 }, { "epoch": 0.2573355522466497, "grad_norm": 0.8362415840632509, "learning_rate": 4.126034063260341e-06, "loss": 0.8422, "step": 8814 }, { "epoch": 0.2573647484744971, "grad_norm": 0.7009655027530555, "learning_rate": 4.125871857258719e-06, "loss": 0.6058, "step": 8815 }, { "epoch": 0.25739394470234445, "grad_norm": 0.7559263976628747, "learning_rate": 4.125709651257097e-06, "loss": 0.7053, "step": 8816 }, { "epoch": 0.2574231409301918, "grad_norm": 0.702354947757498, "learning_rate": 4.125547445255475e-06, "loss": 0.5857, "step": 8817 }, { "epoch": 0.25745233715803917, "grad_norm": 0.7508842394955272, "learning_rate": 4.125385239253853e-06, "loss": 0.6867, "step": 8818 }, { "epoch": 0.25748153338588653, "grad_norm": 0.7505287897806785, "learning_rate": 4.125223033252231e-06, "loss": 0.7763, "step": 8819 }, { "epoch": 0.2575107296137339, "grad_norm": 0.7276288893723372, "learning_rate": 4.125060827250609e-06, "loss": 0.6972, "step": 8820 }, { "epoch": 0.25753992584158125, "grad_norm": 0.8892294666442375, "learning_rate": 4.124898621248986e-06, "loss": 0.6624, "step": 8821 }, { "epoch": 0.2575691220694286, "grad_norm": 0.6854452006140661, "learning_rate": 4.124736415247364e-06, "loss": 0.6007, "step": 8822 }, { "epoch": 0.257598318297276, "grad_norm": 0.800937906151281, "learning_rate": 4.124574209245742e-06, "loss": 0.7087, "step": 8823 }, { "epoch": 0.25762751452512334, "grad_norm": 0.8653203992044948, "learning_rate": 4.12441200324412e-06, "loss": 0.7591, "step": 8824 }, { "epoch": 0.2576567107529707, "grad_norm": 0.8408828696932327, "learning_rate": 4.124249797242498e-06, "loss": 0.7784, "step": 8825 }, { "epoch": 0.25768590698081806, "grad_norm": 0.8447219976484931, "learning_rate": 4.124087591240876e-06, "loss": 0.7691, "step": 8826 }, { "epoch": 0.2577151032086654, "grad_norm": 0.744266668929357, "learning_rate": 4.123925385239254e-06, "loss": 0.6793, "step": 8827 }, { "epoch": 0.2577442994365128, "grad_norm": 0.7776331993535422, "learning_rate": 4.123763179237632e-06, "loss": 0.6471, "step": 8828 }, { "epoch": 0.25777349566436014, "grad_norm": 0.9001948121902412, "learning_rate": 4.12360097323601e-06, "loss": 0.5991, "step": 8829 }, { "epoch": 0.2578026918922075, "grad_norm": 0.8050368279003748, "learning_rate": 4.1234387672343875e-06, "loss": 0.7155, "step": 8830 }, { "epoch": 0.25783188812005486, "grad_norm": 0.7451004638932391, "learning_rate": 4.1232765612327655e-06, "loss": 0.6321, "step": 8831 }, { "epoch": 0.2578610843479022, "grad_norm": 0.73544013767254, "learning_rate": 4.1231143552311435e-06, "loss": 0.6527, "step": 8832 }, { "epoch": 0.2578902805757496, "grad_norm": 0.8638070063142492, "learning_rate": 4.1229521492295215e-06, "loss": 0.7461, "step": 8833 }, { "epoch": 0.25791947680359695, "grad_norm": 0.6932063163692792, "learning_rate": 4.1227899432278995e-06, "loss": 0.6368, "step": 8834 }, { "epoch": 0.25794867303144436, "grad_norm": 0.6601013939330077, "learning_rate": 4.122627737226278e-06, "loss": 0.5379, "step": 8835 }, { "epoch": 0.2579778692592917, "grad_norm": 0.7174204968107193, "learning_rate": 4.1224655312246555e-06, "loss": 0.66, "step": 8836 }, { "epoch": 0.2580070654871391, "grad_norm": 0.7344051916366103, "learning_rate": 4.1223033252230335e-06, "loss": 0.6678, "step": 8837 }, { "epoch": 0.25803626171498645, "grad_norm": 0.7791195123974597, "learning_rate": 4.1221411192214115e-06, "loss": 0.7374, "step": 8838 }, { "epoch": 0.2580654579428338, "grad_norm": 0.7194674959251208, "learning_rate": 4.1219789132197895e-06, "loss": 0.6277, "step": 8839 }, { "epoch": 0.25809465417068117, "grad_norm": 0.8064542526259945, "learning_rate": 4.1218167072181675e-06, "loss": 0.804, "step": 8840 }, { "epoch": 0.25812385039852853, "grad_norm": 0.8074498338097545, "learning_rate": 4.1216545012165455e-06, "loss": 0.6856, "step": 8841 }, { "epoch": 0.2581530466263759, "grad_norm": 0.6801663044252733, "learning_rate": 4.1214922952149235e-06, "loss": 0.5515, "step": 8842 }, { "epoch": 0.25818224285422325, "grad_norm": 0.7324681320537092, "learning_rate": 4.1213300892133016e-06, "loss": 0.6356, "step": 8843 }, { "epoch": 0.2582114390820706, "grad_norm": 0.6635107093073922, "learning_rate": 4.1211678832116796e-06, "loss": 0.5623, "step": 8844 }, { "epoch": 0.258240635309918, "grad_norm": 0.756442137368866, "learning_rate": 4.121005677210057e-06, "loss": 0.6338, "step": 8845 }, { "epoch": 0.25826983153776534, "grad_norm": 0.7624912174510212, "learning_rate": 4.120843471208435e-06, "loss": 0.749, "step": 8846 }, { "epoch": 0.2582990277656127, "grad_norm": 0.7649898128149994, "learning_rate": 4.120681265206813e-06, "loss": 0.6917, "step": 8847 }, { "epoch": 0.25832822399346006, "grad_norm": 0.7069250779008501, "learning_rate": 4.120519059205191e-06, "loss": 0.6342, "step": 8848 }, { "epoch": 0.2583574202213074, "grad_norm": 0.7528842296915503, "learning_rate": 4.120356853203569e-06, "loss": 0.7024, "step": 8849 }, { "epoch": 0.2583866164491548, "grad_norm": 0.7316726372128676, "learning_rate": 4.120194647201947e-06, "loss": 0.6618, "step": 8850 }, { "epoch": 0.25841581267700214, "grad_norm": 0.769317215866968, "learning_rate": 4.120032441200325e-06, "loss": 0.6907, "step": 8851 }, { "epoch": 0.2584450089048495, "grad_norm": 0.7107110107416278, "learning_rate": 4.119870235198703e-06, "loss": 0.5833, "step": 8852 }, { "epoch": 0.25847420513269687, "grad_norm": 0.7688056496751562, "learning_rate": 4.119708029197081e-06, "loss": 0.5955, "step": 8853 }, { "epoch": 0.2585034013605442, "grad_norm": 0.7467791597206925, "learning_rate": 4.119545823195459e-06, "loss": 0.6999, "step": 8854 }, { "epoch": 0.2585325975883916, "grad_norm": 0.7595829669187869, "learning_rate": 4.119383617193837e-06, "loss": 0.6556, "step": 8855 }, { "epoch": 0.25856179381623895, "grad_norm": 0.7616528318886845, "learning_rate": 4.119221411192215e-06, "loss": 0.7257, "step": 8856 }, { "epoch": 0.2585909900440863, "grad_norm": 0.7079834164949554, "learning_rate": 4.119059205190593e-06, "loss": 0.5879, "step": 8857 }, { "epoch": 0.25862018627193367, "grad_norm": 0.7082300046173631, "learning_rate": 4.118896999188971e-06, "loss": 0.603, "step": 8858 }, { "epoch": 0.25864938249978103, "grad_norm": 0.7586187356973502, "learning_rate": 4.118734793187348e-06, "loss": 0.7142, "step": 8859 }, { "epoch": 0.2586785787276284, "grad_norm": 0.7455387736674498, "learning_rate": 4.118572587185726e-06, "loss": 0.6874, "step": 8860 }, { "epoch": 0.25870777495547576, "grad_norm": 0.7458996411001806, "learning_rate": 4.118410381184104e-06, "loss": 0.6637, "step": 8861 }, { "epoch": 0.2587369711833231, "grad_norm": 0.7454495113990827, "learning_rate": 4.118248175182482e-06, "loss": 0.7227, "step": 8862 }, { "epoch": 0.2587661674111705, "grad_norm": 0.792845645521955, "learning_rate": 4.11808596918086e-06, "loss": 0.7098, "step": 8863 }, { "epoch": 0.25879536363901784, "grad_norm": 0.7174312416728157, "learning_rate": 4.117923763179238e-06, "loss": 0.6277, "step": 8864 }, { "epoch": 0.2588245598668652, "grad_norm": 0.7216525084378651, "learning_rate": 4.117761557177616e-06, "loss": 0.6943, "step": 8865 }, { "epoch": 0.25885375609471256, "grad_norm": 0.7144794596188172, "learning_rate": 4.117599351175994e-06, "loss": 0.6421, "step": 8866 }, { "epoch": 0.2588829523225599, "grad_norm": 0.749432739193017, "learning_rate": 4.117437145174372e-06, "loss": 0.6948, "step": 8867 }, { "epoch": 0.2589121485504073, "grad_norm": 1.0000571108629417, "learning_rate": 4.117274939172749e-06, "loss": 0.6356, "step": 8868 }, { "epoch": 0.25894134477825465, "grad_norm": 0.7271351525984571, "learning_rate": 4.117112733171127e-06, "loss": 0.5957, "step": 8869 }, { "epoch": 0.258970541006102, "grad_norm": 0.768115206045287, "learning_rate": 4.116950527169505e-06, "loss": 0.6892, "step": 8870 }, { "epoch": 0.25899973723394937, "grad_norm": 0.7131612921390803, "learning_rate": 4.116788321167883e-06, "loss": 0.5765, "step": 8871 }, { "epoch": 0.25902893346179673, "grad_norm": 0.7263796199167696, "learning_rate": 4.116626115166262e-06, "loss": 0.6614, "step": 8872 }, { "epoch": 0.2590581296896441, "grad_norm": 0.6983987939712283, "learning_rate": 4.11646390916464e-06, "loss": 0.6737, "step": 8873 }, { "epoch": 0.25908732591749145, "grad_norm": 0.8052259844880313, "learning_rate": 4.116301703163017e-06, "loss": 0.7552, "step": 8874 }, { "epoch": 0.2591165221453388, "grad_norm": 0.752424772676797, "learning_rate": 4.116139497161395e-06, "loss": 0.6914, "step": 8875 }, { "epoch": 0.2591457183731862, "grad_norm": 0.7999768241579983, "learning_rate": 4.115977291159773e-06, "loss": 0.773, "step": 8876 }, { "epoch": 0.25917491460103353, "grad_norm": 0.7376306597008366, "learning_rate": 4.115815085158151e-06, "loss": 0.7279, "step": 8877 }, { "epoch": 0.2592041108288809, "grad_norm": 0.6723053928772571, "learning_rate": 4.115652879156529e-06, "loss": 0.5335, "step": 8878 }, { "epoch": 0.25923330705672826, "grad_norm": 0.7043393483874043, "learning_rate": 4.115490673154907e-06, "loss": 0.6319, "step": 8879 }, { "epoch": 0.2592625032845756, "grad_norm": 0.8033339623163666, "learning_rate": 4.115328467153285e-06, "loss": 0.713, "step": 8880 }, { "epoch": 0.259291699512423, "grad_norm": 0.7161387980275867, "learning_rate": 4.115166261151663e-06, "loss": 0.6259, "step": 8881 }, { "epoch": 0.25932089574027034, "grad_norm": 0.7273947075660997, "learning_rate": 4.115004055150041e-06, "loss": 0.6666, "step": 8882 }, { "epoch": 0.2593500919681177, "grad_norm": 0.7338280183217822, "learning_rate": 4.1148418491484184e-06, "loss": 0.7007, "step": 8883 }, { "epoch": 0.25937928819596506, "grad_norm": 0.6444407447779723, "learning_rate": 4.1146796431467964e-06, "loss": 0.5539, "step": 8884 }, { "epoch": 0.2594084844238124, "grad_norm": 0.6988910006526262, "learning_rate": 4.1145174371451745e-06, "loss": 0.6163, "step": 8885 }, { "epoch": 0.2594376806516598, "grad_norm": 0.7358770664048881, "learning_rate": 4.1143552311435525e-06, "loss": 0.7034, "step": 8886 }, { "epoch": 0.25946687687950715, "grad_norm": 0.7801709640702658, "learning_rate": 4.1141930251419305e-06, "loss": 0.7315, "step": 8887 }, { "epoch": 0.2594960731073545, "grad_norm": 0.7025048170617401, "learning_rate": 4.1140308191403085e-06, "loss": 0.6101, "step": 8888 }, { "epoch": 0.25952526933520187, "grad_norm": 0.8451677282463262, "learning_rate": 4.1138686131386865e-06, "loss": 0.6678, "step": 8889 }, { "epoch": 0.25955446556304923, "grad_norm": 0.7566276209689092, "learning_rate": 4.1137064071370645e-06, "loss": 0.6588, "step": 8890 }, { "epoch": 0.2595836617908966, "grad_norm": 0.7332824039546603, "learning_rate": 4.1135442011354425e-06, "loss": 0.6846, "step": 8891 }, { "epoch": 0.25961285801874395, "grad_norm": 0.7728735522573318, "learning_rate": 4.1133819951338205e-06, "loss": 0.6569, "step": 8892 }, { "epoch": 0.2596420542465913, "grad_norm": 0.8094200727502363, "learning_rate": 4.1132197891321985e-06, "loss": 0.6533, "step": 8893 }, { "epoch": 0.2596712504744387, "grad_norm": 0.7301256800653522, "learning_rate": 4.1130575831305765e-06, "loss": 0.5948, "step": 8894 }, { "epoch": 0.2597004467022861, "grad_norm": 0.7833311977199053, "learning_rate": 4.1128953771289545e-06, "loss": 0.7468, "step": 8895 }, { "epoch": 0.25972964293013345, "grad_norm": 0.7038827904241991, "learning_rate": 4.1127331711273325e-06, "loss": 0.5868, "step": 8896 }, { "epoch": 0.2597588391579808, "grad_norm": 0.7419630876303498, "learning_rate": 4.11257096512571e-06, "loss": 0.6691, "step": 8897 }, { "epoch": 0.2597880353858282, "grad_norm": 0.7390784098493621, "learning_rate": 4.112408759124088e-06, "loss": 0.6806, "step": 8898 }, { "epoch": 0.25981723161367554, "grad_norm": 0.7417125389103499, "learning_rate": 4.112246553122466e-06, "loss": 0.6808, "step": 8899 }, { "epoch": 0.2598464278415229, "grad_norm": 0.7545074489350513, "learning_rate": 4.112084347120844e-06, "loss": 0.7037, "step": 8900 }, { "epoch": 0.25987562406937026, "grad_norm": 0.6294212583240063, "learning_rate": 4.111922141119222e-06, "loss": 0.5437, "step": 8901 }, { "epoch": 0.2599048202972176, "grad_norm": 0.7446056307746763, "learning_rate": 4.1117599351176e-06, "loss": 0.7041, "step": 8902 }, { "epoch": 0.259934016525065, "grad_norm": 0.7288665601666858, "learning_rate": 4.111597729115978e-06, "loss": 0.6988, "step": 8903 }, { "epoch": 0.25996321275291234, "grad_norm": 0.6744359197906656, "learning_rate": 4.111435523114356e-06, "loss": 0.5381, "step": 8904 }, { "epoch": 0.2599924089807597, "grad_norm": 0.6759656802970333, "learning_rate": 4.111273317112734e-06, "loss": 0.5606, "step": 8905 }, { "epoch": 0.26002160520860707, "grad_norm": 0.7649798842334836, "learning_rate": 4.111111111111111e-06, "loss": 0.7123, "step": 8906 }, { "epoch": 0.2600508014364544, "grad_norm": 0.661245050048778, "learning_rate": 4.110948905109489e-06, "loss": 0.5182, "step": 8907 }, { "epoch": 0.2600799976643018, "grad_norm": 0.7185284451298554, "learning_rate": 4.110786699107867e-06, "loss": 0.6033, "step": 8908 }, { "epoch": 0.26010919389214915, "grad_norm": 0.7282689997265914, "learning_rate": 4.110624493106245e-06, "loss": 0.6263, "step": 8909 }, { "epoch": 0.2601383901199965, "grad_norm": 0.7207183822964567, "learning_rate": 4.110462287104624e-06, "loss": 0.6763, "step": 8910 }, { "epoch": 0.26016758634784387, "grad_norm": 0.8084174554815337, "learning_rate": 4.110300081103002e-06, "loss": 0.711, "step": 8911 }, { "epoch": 0.26019678257569123, "grad_norm": 0.8519865121438966, "learning_rate": 4.110137875101379e-06, "loss": 0.7883, "step": 8912 }, { "epoch": 0.2602259788035386, "grad_norm": 0.8935522402546433, "learning_rate": 4.109975669099757e-06, "loss": 0.7335, "step": 8913 }, { "epoch": 0.26025517503138595, "grad_norm": 0.6751579325439037, "learning_rate": 4.109813463098135e-06, "loss": 0.5652, "step": 8914 }, { "epoch": 0.2602843712592333, "grad_norm": 0.7777920714386404, "learning_rate": 4.109651257096513e-06, "loss": 0.728, "step": 8915 }, { "epoch": 0.2603135674870807, "grad_norm": 0.7235552060502535, "learning_rate": 4.109489051094891e-06, "loss": 0.6634, "step": 8916 }, { "epoch": 0.26034276371492804, "grad_norm": 0.7691455782392776, "learning_rate": 4.109326845093269e-06, "loss": 0.6946, "step": 8917 }, { "epoch": 0.2603719599427754, "grad_norm": 0.7388287660652639, "learning_rate": 4.109164639091647e-06, "loss": 0.6575, "step": 8918 }, { "epoch": 0.26040115617062276, "grad_norm": 0.9962229805185748, "learning_rate": 4.109002433090025e-06, "loss": 0.792, "step": 8919 }, { "epoch": 0.2604303523984701, "grad_norm": 0.7832937484786648, "learning_rate": 4.108840227088403e-06, "loss": 0.7319, "step": 8920 }, { "epoch": 0.2604595486263175, "grad_norm": 0.6877614890926664, "learning_rate": 4.10867802108678e-06, "loss": 0.559, "step": 8921 }, { "epoch": 0.26048874485416484, "grad_norm": 0.7065123696315573, "learning_rate": 4.108515815085158e-06, "loss": 0.6216, "step": 8922 }, { "epoch": 0.2605179410820122, "grad_norm": 0.7152686751703121, "learning_rate": 4.108353609083536e-06, "loss": 0.6179, "step": 8923 }, { "epoch": 0.26054713730985957, "grad_norm": 0.7697201101524478, "learning_rate": 4.108191403081914e-06, "loss": 0.7507, "step": 8924 }, { "epoch": 0.26057633353770693, "grad_norm": 0.8541683740788145, "learning_rate": 4.108029197080292e-06, "loss": 0.7503, "step": 8925 }, { "epoch": 0.2606055297655543, "grad_norm": 0.7222233122541226, "learning_rate": 4.10786699107867e-06, "loss": 0.6038, "step": 8926 }, { "epoch": 0.26063472599340165, "grad_norm": 0.9860803326983131, "learning_rate": 4.107704785077048e-06, "loss": 0.7066, "step": 8927 }, { "epoch": 0.260663922221249, "grad_norm": 0.7793104865642242, "learning_rate": 4.107542579075426e-06, "loss": 0.6738, "step": 8928 }, { "epoch": 0.2606931184490964, "grad_norm": 0.6873436247425291, "learning_rate": 4.107380373073804e-06, "loss": 0.5672, "step": 8929 }, { "epoch": 0.26072231467694373, "grad_norm": 0.7496620979852219, "learning_rate": 4.107218167072182e-06, "loss": 0.7144, "step": 8930 }, { "epoch": 0.2607515109047911, "grad_norm": 0.6775662388143006, "learning_rate": 4.10705596107056e-06, "loss": 0.5762, "step": 8931 }, { "epoch": 0.26078070713263846, "grad_norm": 0.7138419112619381, "learning_rate": 4.106893755068938e-06, "loss": 0.6232, "step": 8932 }, { "epoch": 0.2608099033604858, "grad_norm": 0.772401404340081, "learning_rate": 4.106731549067316e-06, "loss": 0.5964, "step": 8933 }, { "epoch": 0.2608390995883332, "grad_norm": 0.7042164940839788, "learning_rate": 4.106569343065694e-06, "loss": 0.6528, "step": 8934 }, { "epoch": 0.26086829581618054, "grad_norm": 0.7993434089780113, "learning_rate": 4.106407137064071e-06, "loss": 0.741, "step": 8935 }, { "epoch": 0.2608974920440279, "grad_norm": 0.75428406572745, "learning_rate": 4.106244931062449e-06, "loss": 0.6941, "step": 8936 }, { "epoch": 0.26092668827187526, "grad_norm": 0.7896876151636513, "learning_rate": 4.106082725060827e-06, "loss": 0.739, "step": 8937 }, { "epoch": 0.2609558844997226, "grad_norm": 0.8088156219361061, "learning_rate": 4.1059205190592054e-06, "loss": 0.7487, "step": 8938 }, { "epoch": 0.26098508072757, "grad_norm": 0.7539894195214771, "learning_rate": 4.1057583130575834e-06, "loss": 0.7687, "step": 8939 }, { "epoch": 0.26101427695541735, "grad_norm": 0.8078590578024248, "learning_rate": 4.1055961070559614e-06, "loss": 0.6153, "step": 8940 }, { "epoch": 0.2610434731832647, "grad_norm": 0.7588935786155043, "learning_rate": 4.1054339010543394e-06, "loss": 0.6328, "step": 8941 }, { "epoch": 0.26107266941111207, "grad_norm": 0.7471006514282664, "learning_rate": 4.1052716950527175e-06, "loss": 0.638, "step": 8942 }, { "epoch": 0.26110186563895943, "grad_norm": 0.7468754874050672, "learning_rate": 4.1051094890510955e-06, "loss": 0.6947, "step": 8943 }, { "epoch": 0.2611310618668068, "grad_norm": 0.7951594947456243, "learning_rate": 4.104947283049473e-06, "loss": 0.7211, "step": 8944 }, { "epoch": 0.26116025809465415, "grad_norm": 0.781837122411624, "learning_rate": 4.104785077047851e-06, "loss": 0.7145, "step": 8945 }, { "epoch": 0.2611894543225015, "grad_norm": 0.849179395888881, "learning_rate": 4.104622871046229e-06, "loss": 0.6428, "step": 8946 }, { "epoch": 0.2612186505503489, "grad_norm": 0.6926194454192902, "learning_rate": 4.104460665044607e-06, "loss": 0.6025, "step": 8947 }, { "epoch": 0.26124784677819624, "grad_norm": 0.7565794587405149, "learning_rate": 4.1042984590429855e-06, "loss": 0.6782, "step": 8948 }, { "epoch": 0.2612770430060436, "grad_norm": 0.7105162299423186, "learning_rate": 4.1041362530413635e-06, "loss": 0.6868, "step": 8949 }, { "epoch": 0.26130623923389096, "grad_norm": 0.7811587329139864, "learning_rate": 4.103974047039741e-06, "loss": 0.6659, "step": 8950 }, { "epoch": 0.2613354354617383, "grad_norm": 0.7332342135404554, "learning_rate": 4.103811841038119e-06, "loss": 0.6198, "step": 8951 }, { "epoch": 0.2613646316895857, "grad_norm": 0.7723781812645862, "learning_rate": 4.103649635036497e-06, "loss": 0.6697, "step": 8952 }, { "epoch": 0.26139382791743304, "grad_norm": 0.7348630858621993, "learning_rate": 4.103487429034875e-06, "loss": 0.6612, "step": 8953 }, { "epoch": 0.2614230241452804, "grad_norm": 0.7374670001810055, "learning_rate": 4.103325223033253e-06, "loss": 0.6244, "step": 8954 }, { "epoch": 0.2614522203731278, "grad_norm": 0.7046077206888265, "learning_rate": 4.103163017031631e-06, "loss": 0.5624, "step": 8955 }, { "epoch": 0.2614814166009752, "grad_norm": 0.7098949826542696, "learning_rate": 4.103000811030009e-06, "loss": 0.6206, "step": 8956 }, { "epoch": 0.26151061282882254, "grad_norm": 0.8079777742157497, "learning_rate": 4.102838605028387e-06, "loss": 0.7536, "step": 8957 }, { "epoch": 0.2615398090566699, "grad_norm": 0.7741120650350235, "learning_rate": 4.102676399026764e-06, "loss": 0.6734, "step": 8958 }, { "epoch": 0.26156900528451726, "grad_norm": 0.737302990514329, "learning_rate": 4.102514193025142e-06, "loss": 0.6375, "step": 8959 }, { "epoch": 0.2615982015123646, "grad_norm": 0.7474250274908458, "learning_rate": 4.10235198702352e-06, "loss": 0.7289, "step": 8960 }, { "epoch": 0.261627397740212, "grad_norm": 0.7531813665157738, "learning_rate": 4.102189781021898e-06, "loss": 0.7276, "step": 8961 }, { "epoch": 0.26165659396805935, "grad_norm": 0.7211653622594679, "learning_rate": 4.102027575020276e-06, "loss": 0.6225, "step": 8962 }, { "epoch": 0.2616857901959067, "grad_norm": 0.7616762089596787, "learning_rate": 4.101865369018654e-06, "loss": 0.7055, "step": 8963 }, { "epoch": 0.26171498642375407, "grad_norm": 0.731553561589747, "learning_rate": 4.101703163017032e-06, "loss": 0.6763, "step": 8964 }, { "epoch": 0.26174418265160143, "grad_norm": 0.6988298852005709, "learning_rate": 4.10154095701541e-06, "loss": 0.5965, "step": 8965 }, { "epoch": 0.2617733788794488, "grad_norm": 0.7103036774560554, "learning_rate": 4.101378751013788e-06, "loss": 0.6271, "step": 8966 }, { "epoch": 0.26180257510729615, "grad_norm": 0.73322232514454, "learning_rate": 4.101216545012166e-06, "loss": 0.6525, "step": 8967 }, { "epoch": 0.2618317713351435, "grad_norm": 0.7148692169931068, "learning_rate": 4.101054339010544e-06, "loss": 0.6018, "step": 8968 }, { "epoch": 0.2618609675629909, "grad_norm": 0.7231778336086883, "learning_rate": 4.100892133008922e-06, "loss": 0.5912, "step": 8969 }, { "epoch": 0.26189016379083824, "grad_norm": 0.7661428180385007, "learning_rate": 4.1007299270073e-06, "loss": 0.7185, "step": 8970 }, { "epoch": 0.2619193600186856, "grad_norm": 0.7737580503609873, "learning_rate": 4.100567721005678e-06, "loss": 0.6536, "step": 8971 }, { "epoch": 0.26194855624653296, "grad_norm": 0.7341294653700781, "learning_rate": 4.100405515004056e-06, "loss": 0.6384, "step": 8972 }, { "epoch": 0.2619777524743803, "grad_norm": 0.7665643344019112, "learning_rate": 4.100243309002433e-06, "loss": 0.7853, "step": 8973 }, { "epoch": 0.2620069487022277, "grad_norm": 0.7221086466999511, "learning_rate": 4.100081103000811e-06, "loss": 0.6414, "step": 8974 }, { "epoch": 0.26203614493007504, "grad_norm": 0.7286441355032416, "learning_rate": 4.099918896999189e-06, "loss": 0.6941, "step": 8975 }, { "epoch": 0.2620653411579224, "grad_norm": 0.8289498708189733, "learning_rate": 4.099756690997567e-06, "loss": 0.7353, "step": 8976 }, { "epoch": 0.26209453738576977, "grad_norm": 0.7559908079944601, "learning_rate": 4.099594484995945e-06, "loss": 0.6928, "step": 8977 }, { "epoch": 0.2621237336136171, "grad_norm": 0.7033862823884751, "learning_rate": 4.099432278994323e-06, "loss": 0.5826, "step": 8978 }, { "epoch": 0.2621529298414645, "grad_norm": 0.7630956934196904, "learning_rate": 4.099270072992701e-06, "loss": 0.7191, "step": 8979 }, { "epoch": 0.26218212606931185, "grad_norm": 0.7906771486319513, "learning_rate": 4.099107866991079e-06, "loss": 0.6554, "step": 8980 }, { "epoch": 0.2622113222971592, "grad_norm": 0.7367414309793953, "learning_rate": 4.098945660989457e-06, "loss": 0.6355, "step": 8981 }, { "epoch": 0.26224051852500657, "grad_norm": 0.7209749134244874, "learning_rate": 4.098783454987834e-06, "loss": 0.5997, "step": 8982 }, { "epoch": 0.26226971475285393, "grad_norm": 0.7074471520448451, "learning_rate": 4.098621248986212e-06, "loss": 0.6059, "step": 8983 }, { "epoch": 0.2622989109807013, "grad_norm": 0.8102789837709649, "learning_rate": 4.09845904298459e-06, "loss": 0.677, "step": 8984 }, { "epoch": 0.26232810720854866, "grad_norm": 0.7023236939176744, "learning_rate": 4.098296836982968e-06, "loss": 0.6045, "step": 8985 }, { "epoch": 0.262357303436396, "grad_norm": 0.7796641436556334, "learning_rate": 4.098134630981347e-06, "loss": 0.7528, "step": 8986 }, { "epoch": 0.2623864996642434, "grad_norm": 0.7179214276946766, "learning_rate": 4.097972424979725e-06, "loss": 0.635, "step": 8987 }, { "epoch": 0.26241569589209074, "grad_norm": 0.791781414317311, "learning_rate": 4.097810218978102e-06, "loss": 0.6813, "step": 8988 }, { "epoch": 0.2624448921199381, "grad_norm": 0.7202237408468384, "learning_rate": 4.09764801297648e-06, "loss": 0.5623, "step": 8989 }, { "epoch": 0.26247408834778546, "grad_norm": 0.6765661402814512, "learning_rate": 4.097485806974858e-06, "loss": 0.5422, "step": 8990 }, { "epoch": 0.2625032845756328, "grad_norm": 0.7306295355364734, "learning_rate": 4.097323600973236e-06, "loss": 0.6853, "step": 8991 }, { "epoch": 0.2625324808034802, "grad_norm": 0.6868842093861542, "learning_rate": 4.097161394971614e-06, "loss": 0.6258, "step": 8992 }, { "epoch": 0.26256167703132754, "grad_norm": 0.769498281176858, "learning_rate": 4.096999188969992e-06, "loss": 0.7351, "step": 8993 }, { "epoch": 0.2625908732591749, "grad_norm": 0.7318557404270254, "learning_rate": 4.09683698296837e-06, "loss": 0.6737, "step": 8994 }, { "epoch": 0.26262006948702227, "grad_norm": 0.7175796805667937, "learning_rate": 4.0966747769667484e-06, "loss": 0.6734, "step": 8995 }, { "epoch": 0.26264926571486963, "grad_norm": 0.7746805376332009, "learning_rate": 4.096512570965126e-06, "loss": 0.7014, "step": 8996 }, { "epoch": 0.262678461942717, "grad_norm": 0.7340858642163366, "learning_rate": 4.096350364963504e-06, "loss": 0.6866, "step": 8997 }, { "epoch": 0.26270765817056435, "grad_norm": 0.7291288406531908, "learning_rate": 4.096188158961882e-06, "loss": 0.65, "step": 8998 }, { "epoch": 0.2627368543984117, "grad_norm": 0.7845265029860186, "learning_rate": 4.09602595296026e-06, "loss": 0.7563, "step": 8999 }, { "epoch": 0.2627660506262591, "grad_norm": 0.6971710349445476, "learning_rate": 4.095863746958638e-06, "loss": 0.6249, "step": 9000 }, { "epoch": 0.26279524685410643, "grad_norm": 0.7765192598819928, "learning_rate": 4.095701540957016e-06, "loss": 0.6972, "step": 9001 }, { "epoch": 0.2628244430819538, "grad_norm": 0.6871816918671216, "learning_rate": 4.095539334955394e-06, "loss": 0.5642, "step": 9002 }, { "epoch": 0.26285363930980116, "grad_norm": 0.7513407928962751, "learning_rate": 4.095377128953772e-06, "loss": 0.69, "step": 9003 }, { "epoch": 0.2628828355376485, "grad_norm": 0.8440959277031654, "learning_rate": 4.09521492295215e-06, "loss": 0.65, "step": 9004 }, { "epoch": 0.2629120317654959, "grad_norm": 0.6515589682723839, "learning_rate": 4.095052716950528e-06, "loss": 0.5753, "step": 9005 }, { "epoch": 0.26294122799334324, "grad_norm": 0.711976948163145, "learning_rate": 4.094890510948906e-06, "loss": 0.6194, "step": 9006 }, { "epoch": 0.2629704242211906, "grad_norm": 0.6953253353240352, "learning_rate": 4.094728304947284e-06, "loss": 0.6224, "step": 9007 }, { "epoch": 0.26299962044903796, "grad_norm": 0.8034750291374628, "learning_rate": 4.094566098945662e-06, "loss": 0.5736, "step": 9008 }, { "epoch": 0.2630288166768853, "grad_norm": 0.7613201459798679, "learning_rate": 4.09440389294404e-06, "loss": 0.6894, "step": 9009 }, { "epoch": 0.2630580129047327, "grad_norm": 0.7105074696826155, "learning_rate": 4.094241686942418e-06, "loss": 0.5723, "step": 9010 }, { "epoch": 0.26308720913258005, "grad_norm": 0.8168962302510218, "learning_rate": 4.094079480940795e-06, "loss": 0.6399, "step": 9011 }, { "epoch": 0.2631164053604274, "grad_norm": 0.7274231424069942, "learning_rate": 4.093917274939173e-06, "loss": 0.6728, "step": 9012 }, { "epoch": 0.26314560158827477, "grad_norm": 0.6533297165983418, "learning_rate": 4.093755068937551e-06, "loss": 0.5558, "step": 9013 }, { "epoch": 0.26317479781612213, "grad_norm": 0.6974028964910206, "learning_rate": 4.093592862935929e-06, "loss": 0.578, "step": 9014 }, { "epoch": 0.2632039940439695, "grad_norm": 0.7984591109799046, "learning_rate": 4.093430656934307e-06, "loss": 0.6092, "step": 9015 }, { "epoch": 0.2632331902718169, "grad_norm": 0.7440582478762371, "learning_rate": 4.093268450932685e-06, "loss": 0.6832, "step": 9016 }, { "epoch": 0.26326238649966427, "grad_norm": 0.8211466703427613, "learning_rate": 4.093106244931063e-06, "loss": 0.7651, "step": 9017 }, { "epoch": 0.26329158272751163, "grad_norm": 0.7916673313451373, "learning_rate": 4.092944038929441e-06, "loss": 0.6882, "step": 9018 }, { "epoch": 0.263320778955359, "grad_norm": 0.7906247493463, "learning_rate": 4.092781832927819e-06, "loss": 0.7795, "step": 9019 }, { "epoch": 0.26334997518320635, "grad_norm": 0.8021021901862406, "learning_rate": 4.092619626926196e-06, "loss": 0.7728, "step": 9020 }, { "epoch": 0.2633791714110537, "grad_norm": 0.8066809323674609, "learning_rate": 4.092457420924574e-06, "loss": 0.6863, "step": 9021 }, { "epoch": 0.2634083676389011, "grad_norm": 0.8138882285953779, "learning_rate": 4.092295214922952e-06, "loss": 0.7024, "step": 9022 }, { "epoch": 0.26343756386674844, "grad_norm": 0.7555652602629778, "learning_rate": 4.092133008921331e-06, "loss": 0.7134, "step": 9023 }, { "epoch": 0.2634667600945958, "grad_norm": 0.7128562052303447, "learning_rate": 4.091970802919709e-06, "loss": 0.5869, "step": 9024 }, { "epoch": 0.26349595632244316, "grad_norm": 0.7260838334522318, "learning_rate": 4.091808596918087e-06, "loss": 0.6639, "step": 9025 }, { "epoch": 0.2635251525502905, "grad_norm": 0.7349392952636795, "learning_rate": 4.091646390916464e-06, "loss": 0.6443, "step": 9026 }, { "epoch": 0.2635543487781379, "grad_norm": 0.781292023251644, "learning_rate": 4.091484184914842e-06, "loss": 0.7454, "step": 9027 }, { "epoch": 0.26358354500598524, "grad_norm": 0.7464105522243726, "learning_rate": 4.09132197891322e-06, "loss": 0.6024, "step": 9028 }, { "epoch": 0.2636127412338326, "grad_norm": 0.6586125856431568, "learning_rate": 4.091159772911598e-06, "loss": 0.5733, "step": 9029 }, { "epoch": 0.26364193746167996, "grad_norm": 0.7114293803860373, "learning_rate": 4.090997566909976e-06, "loss": 0.6153, "step": 9030 }, { "epoch": 0.2636711336895273, "grad_norm": 0.8198270525039028, "learning_rate": 4.090835360908354e-06, "loss": 0.7922, "step": 9031 }, { "epoch": 0.2637003299173747, "grad_norm": 0.6643349362385192, "learning_rate": 4.090673154906732e-06, "loss": 0.5667, "step": 9032 }, { "epoch": 0.26372952614522205, "grad_norm": 0.7126679353466626, "learning_rate": 4.09051094890511e-06, "loss": 0.6738, "step": 9033 }, { "epoch": 0.2637587223730694, "grad_norm": 0.7622429295193902, "learning_rate": 4.090348742903487e-06, "loss": 0.6726, "step": 9034 }, { "epoch": 0.26378791860091677, "grad_norm": 0.7144313772005538, "learning_rate": 4.090186536901865e-06, "loss": 0.6137, "step": 9035 }, { "epoch": 0.26381711482876413, "grad_norm": 0.6784372575709647, "learning_rate": 4.090024330900243e-06, "loss": 0.5888, "step": 9036 }, { "epoch": 0.2638463110566115, "grad_norm": 0.7515433499058608, "learning_rate": 4.089862124898621e-06, "loss": 0.6661, "step": 9037 }, { "epoch": 0.26387550728445885, "grad_norm": 0.7594434206690363, "learning_rate": 4.089699918896999e-06, "loss": 0.6545, "step": 9038 }, { "epoch": 0.2639047035123062, "grad_norm": 0.7466992161833331, "learning_rate": 4.089537712895377e-06, "loss": 0.6875, "step": 9039 }, { "epoch": 0.2639338997401536, "grad_norm": 0.7520447195453924, "learning_rate": 4.089375506893755e-06, "loss": 0.6692, "step": 9040 }, { "epoch": 0.26396309596800094, "grad_norm": 0.7558938419864365, "learning_rate": 4.089213300892133e-06, "loss": 0.6089, "step": 9041 }, { "epoch": 0.2639922921958483, "grad_norm": 0.7019514784727617, "learning_rate": 4.089051094890511e-06, "loss": 0.602, "step": 9042 }, { "epoch": 0.26402148842369566, "grad_norm": 0.8340745874683683, "learning_rate": 4.088888888888889e-06, "loss": 0.6959, "step": 9043 }, { "epoch": 0.264050684651543, "grad_norm": 0.7017283862651724, "learning_rate": 4.088726682887267e-06, "loss": 0.5951, "step": 9044 }, { "epoch": 0.2640798808793904, "grad_norm": 0.7154511498713791, "learning_rate": 4.088564476885645e-06, "loss": 0.5869, "step": 9045 }, { "epoch": 0.26410907710723774, "grad_norm": 0.7549036947660546, "learning_rate": 4.088402270884023e-06, "loss": 0.6246, "step": 9046 }, { "epoch": 0.2641382733350851, "grad_norm": 0.7530353974288533, "learning_rate": 4.088240064882401e-06, "loss": 0.6161, "step": 9047 }, { "epoch": 0.26416746956293247, "grad_norm": 0.7238038476419858, "learning_rate": 4.088077858880779e-06, "loss": 0.6458, "step": 9048 }, { "epoch": 0.2641966657907798, "grad_norm": 0.826664948737954, "learning_rate": 4.0879156528791566e-06, "loss": 0.6058, "step": 9049 }, { "epoch": 0.2642258620186272, "grad_norm": 0.7386711934534703, "learning_rate": 4.0877534468775346e-06, "loss": 0.6568, "step": 9050 }, { "epoch": 0.26425505824647455, "grad_norm": 0.826585884382266, "learning_rate": 4.0875912408759126e-06, "loss": 0.6861, "step": 9051 }, { "epoch": 0.2642842544743219, "grad_norm": 0.7926209437208765, "learning_rate": 4.087429034874291e-06, "loss": 0.7501, "step": 9052 }, { "epoch": 0.2643134507021693, "grad_norm": 0.7319805405048173, "learning_rate": 4.087266828872669e-06, "loss": 0.6959, "step": 9053 }, { "epoch": 0.26434264693001663, "grad_norm": 0.777705035409184, "learning_rate": 4.087104622871047e-06, "loss": 0.7127, "step": 9054 }, { "epoch": 0.264371843157864, "grad_norm": 0.8213494395563294, "learning_rate": 4.086942416869425e-06, "loss": 0.5909, "step": 9055 }, { "epoch": 0.26440103938571136, "grad_norm": 0.7652566528355154, "learning_rate": 4.086780210867803e-06, "loss": 0.681, "step": 9056 }, { "epoch": 0.2644302356135587, "grad_norm": 0.7031912017377576, "learning_rate": 4.086618004866181e-06, "loss": 0.6493, "step": 9057 }, { "epoch": 0.2644594318414061, "grad_norm": 0.6659561761569623, "learning_rate": 4.086455798864558e-06, "loss": 0.5667, "step": 9058 }, { "epoch": 0.26448862806925344, "grad_norm": 0.6920793290124585, "learning_rate": 4.086293592862936e-06, "loss": 0.5977, "step": 9059 }, { "epoch": 0.2645178242971008, "grad_norm": 0.7681034726938077, "learning_rate": 4.086131386861314e-06, "loss": 0.7147, "step": 9060 }, { "epoch": 0.26454702052494816, "grad_norm": 0.7325058663954335, "learning_rate": 4.085969180859693e-06, "loss": 0.6961, "step": 9061 }, { "epoch": 0.2645762167527955, "grad_norm": 0.7572639532293641, "learning_rate": 4.085806974858071e-06, "loss": 0.7161, "step": 9062 }, { "epoch": 0.2646054129806429, "grad_norm": 0.7458665432717545, "learning_rate": 4.085644768856449e-06, "loss": 0.6707, "step": 9063 }, { "epoch": 0.26463460920849025, "grad_norm": 0.7876796483095436, "learning_rate": 4.085482562854826e-06, "loss": 0.72, "step": 9064 }, { "epoch": 0.2646638054363376, "grad_norm": 1.1104481669194062, "learning_rate": 4.085320356853204e-06, "loss": 0.7454, "step": 9065 }, { "epoch": 0.26469300166418497, "grad_norm": 0.7185052126321548, "learning_rate": 4.085158150851582e-06, "loss": 0.6825, "step": 9066 }, { "epoch": 0.26472219789203233, "grad_norm": 0.7637789594328801, "learning_rate": 4.08499594484996e-06, "loss": 0.6844, "step": 9067 }, { "epoch": 0.2647513941198797, "grad_norm": 0.7536643546345285, "learning_rate": 4.084833738848338e-06, "loss": 0.7579, "step": 9068 }, { "epoch": 0.26478059034772705, "grad_norm": 0.8370594588403986, "learning_rate": 4.084671532846716e-06, "loss": 0.6582, "step": 9069 }, { "epoch": 0.2648097865755744, "grad_norm": 0.7797824608545514, "learning_rate": 4.084509326845094e-06, "loss": 0.6315, "step": 9070 }, { "epoch": 0.2648389828034218, "grad_norm": 0.7619501079076878, "learning_rate": 4.084347120843472e-06, "loss": 0.6668, "step": 9071 }, { "epoch": 0.26486817903126914, "grad_norm": 0.7271283691061239, "learning_rate": 4.084184914841849e-06, "loss": 0.676, "step": 9072 }, { "epoch": 0.2648973752591165, "grad_norm": 0.6879423817082231, "learning_rate": 4.084022708840227e-06, "loss": 0.5941, "step": 9073 }, { "epoch": 0.26492657148696386, "grad_norm": 0.7342390166232462, "learning_rate": 4.083860502838605e-06, "loss": 0.6377, "step": 9074 }, { "epoch": 0.2649557677148112, "grad_norm": 0.8125751308483835, "learning_rate": 4.083698296836983e-06, "loss": 0.7375, "step": 9075 }, { "epoch": 0.26498496394265864, "grad_norm": 0.7546163165591497, "learning_rate": 4.083536090835361e-06, "loss": 0.6811, "step": 9076 }, { "epoch": 0.265014160170506, "grad_norm": 0.7433855865696865, "learning_rate": 4.083373884833739e-06, "loss": 0.7257, "step": 9077 }, { "epoch": 0.26504335639835336, "grad_norm": 0.7642475083619182, "learning_rate": 4.083211678832117e-06, "loss": 0.7138, "step": 9078 }, { "epoch": 0.2650725526262007, "grad_norm": 0.8313765280479305, "learning_rate": 4.083049472830495e-06, "loss": 0.8199, "step": 9079 }, { "epoch": 0.2651017488540481, "grad_norm": 0.7338131517659184, "learning_rate": 4.082887266828873e-06, "loss": 0.6812, "step": 9080 }, { "epoch": 0.26513094508189544, "grad_norm": 0.736414543633254, "learning_rate": 4.082725060827251e-06, "loss": 0.6664, "step": 9081 }, { "epoch": 0.2651601413097428, "grad_norm": 0.741679676995219, "learning_rate": 4.082562854825629e-06, "loss": 0.6889, "step": 9082 }, { "epoch": 0.26518933753759016, "grad_norm": 0.7163614539592963, "learning_rate": 4.082400648824007e-06, "loss": 0.6212, "step": 9083 }, { "epoch": 0.2652185337654375, "grad_norm": 0.71780344834027, "learning_rate": 4.082238442822385e-06, "loss": 0.6117, "step": 9084 }, { "epoch": 0.2652477299932849, "grad_norm": 0.6973730518365654, "learning_rate": 4.082076236820763e-06, "loss": 0.5923, "step": 9085 }, { "epoch": 0.26527692622113225, "grad_norm": 0.6883945950460538, "learning_rate": 4.081914030819141e-06, "loss": 0.5962, "step": 9086 }, { "epoch": 0.2653061224489796, "grad_norm": 0.6964742940907003, "learning_rate": 4.081751824817518e-06, "loss": 0.6143, "step": 9087 }, { "epoch": 0.26533531867682697, "grad_norm": 0.7399366197353694, "learning_rate": 4.081589618815896e-06, "loss": 0.6829, "step": 9088 }, { "epoch": 0.26536451490467433, "grad_norm": 0.7889737660397314, "learning_rate": 4.081427412814274e-06, "loss": 0.6394, "step": 9089 }, { "epoch": 0.2653937111325217, "grad_norm": 0.7416850653652802, "learning_rate": 4.081265206812652e-06, "loss": 0.6806, "step": 9090 }, { "epoch": 0.26542290736036905, "grad_norm": 0.7904957892150063, "learning_rate": 4.08110300081103e-06, "loss": 0.7215, "step": 9091 }, { "epoch": 0.2654521035882164, "grad_norm": 0.7298563220351788, "learning_rate": 4.080940794809408e-06, "loss": 0.6319, "step": 9092 }, { "epoch": 0.2654812998160638, "grad_norm": 0.7955171079831003, "learning_rate": 4.080778588807786e-06, "loss": 0.7105, "step": 9093 }, { "epoch": 0.26551049604391114, "grad_norm": 0.7703015184710662, "learning_rate": 4.080616382806164e-06, "loss": 0.7189, "step": 9094 }, { "epoch": 0.2655396922717585, "grad_norm": 0.7384094787619814, "learning_rate": 4.080454176804542e-06, "loss": 0.6174, "step": 9095 }, { "epoch": 0.26556888849960586, "grad_norm": 0.7517100187980744, "learning_rate": 4.0802919708029195e-06, "loss": 0.7328, "step": 9096 }, { "epoch": 0.2655980847274532, "grad_norm": 0.6683532136334212, "learning_rate": 4.0801297648012975e-06, "loss": 0.6164, "step": 9097 }, { "epoch": 0.2656272809553006, "grad_norm": 0.8077939455587725, "learning_rate": 4.0799675587996755e-06, "loss": 0.806, "step": 9098 }, { "epoch": 0.26565647718314794, "grad_norm": 0.8201795428878519, "learning_rate": 4.079805352798054e-06, "loss": 0.7413, "step": 9099 }, { "epoch": 0.2656856734109953, "grad_norm": 0.7227002351774692, "learning_rate": 4.079643146796432e-06, "loss": 0.6309, "step": 9100 }, { "epoch": 0.26571486963884267, "grad_norm": 0.7774709481316506, "learning_rate": 4.0794809407948095e-06, "loss": 0.6963, "step": 9101 }, { "epoch": 0.26574406586669, "grad_norm": 0.7026132638775046, "learning_rate": 4.0793187347931875e-06, "loss": 0.5897, "step": 9102 }, { "epoch": 0.2657732620945374, "grad_norm": 0.739725140996377, "learning_rate": 4.0791565287915655e-06, "loss": 0.6133, "step": 9103 }, { "epoch": 0.26580245832238475, "grad_norm": 0.7509205131397567, "learning_rate": 4.0789943227899435e-06, "loss": 0.665, "step": 9104 }, { "epoch": 0.2658316545502321, "grad_norm": 0.8267863847044885, "learning_rate": 4.0788321167883215e-06, "loss": 0.7764, "step": 9105 }, { "epoch": 0.26586085077807947, "grad_norm": 0.7092651584077181, "learning_rate": 4.0786699107866996e-06, "loss": 0.629, "step": 9106 }, { "epoch": 0.26589004700592683, "grad_norm": 0.7549338332787122, "learning_rate": 4.0785077047850776e-06, "loss": 0.6485, "step": 9107 }, { "epoch": 0.2659192432337742, "grad_norm": 0.8045214390345217, "learning_rate": 4.0783454987834556e-06, "loss": 0.6966, "step": 9108 }, { "epoch": 0.26594843946162156, "grad_norm": 0.7637359998690461, "learning_rate": 4.0781832927818336e-06, "loss": 0.625, "step": 9109 }, { "epoch": 0.2659776356894689, "grad_norm": 0.7419042403654103, "learning_rate": 4.078021086780211e-06, "loss": 0.7057, "step": 9110 }, { "epoch": 0.2660068319173163, "grad_norm": 0.6983643671253957, "learning_rate": 4.077858880778589e-06, "loss": 0.5781, "step": 9111 }, { "epoch": 0.26603602814516364, "grad_norm": 0.7564154987742978, "learning_rate": 4.077696674776967e-06, "loss": 0.6718, "step": 9112 }, { "epoch": 0.266065224373011, "grad_norm": 0.7032767865671183, "learning_rate": 4.077534468775345e-06, "loss": 0.6414, "step": 9113 }, { "epoch": 0.26609442060085836, "grad_norm": 0.7347582796605752, "learning_rate": 4.077372262773723e-06, "loss": 0.6941, "step": 9114 }, { "epoch": 0.2661236168287057, "grad_norm": 0.8033461581625135, "learning_rate": 4.077210056772101e-06, "loss": 0.7112, "step": 9115 }, { "epoch": 0.2661528130565531, "grad_norm": 0.7284184710954527, "learning_rate": 4.077047850770479e-06, "loss": 0.6524, "step": 9116 }, { "epoch": 0.26618200928440044, "grad_norm": 0.8021237347371963, "learning_rate": 4.076885644768857e-06, "loss": 0.6459, "step": 9117 }, { "epoch": 0.2662112055122478, "grad_norm": 0.774621702513091, "learning_rate": 4.076723438767235e-06, "loss": 0.7266, "step": 9118 }, { "epoch": 0.26624040174009517, "grad_norm": 0.8353226800198192, "learning_rate": 4.076561232765613e-06, "loss": 0.6425, "step": 9119 }, { "epoch": 0.26626959796794253, "grad_norm": 0.7826909938347815, "learning_rate": 4.076399026763991e-06, "loss": 0.6502, "step": 9120 }, { "epoch": 0.2662987941957899, "grad_norm": 0.7418450316927191, "learning_rate": 4.076236820762369e-06, "loss": 0.7062, "step": 9121 }, { "epoch": 0.26632799042363725, "grad_norm": 0.7227011545990845, "learning_rate": 4.076074614760747e-06, "loss": 0.6524, "step": 9122 }, { "epoch": 0.2663571866514846, "grad_norm": 0.7153001960317558, "learning_rate": 4.075912408759125e-06, "loss": 0.6812, "step": 9123 }, { "epoch": 0.266386382879332, "grad_norm": 0.7352080338605391, "learning_rate": 4.075750202757503e-06, "loss": 0.6408, "step": 9124 }, { "epoch": 0.26641557910717933, "grad_norm": 0.7902504003757919, "learning_rate": 4.07558799675588e-06, "loss": 0.7614, "step": 9125 }, { "epoch": 0.2664447753350267, "grad_norm": 0.7865134239970264, "learning_rate": 4.075425790754258e-06, "loss": 0.6483, "step": 9126 }, { "epoch": 0.26647397156287406, "grad_norm": 0.7471229221811889, "learning_rate": 4.075263584752636e-06, "loss": 0.6221, "step": 9127 }, { "epoch": 0.2665031677907214, "grad_norm": 0.7144959013092581, "learning_rate": 4.075101378751014e-06, "loss": 0.6061, "step": 9128 }, { "epoch": 0.2665323640185688, "grad_norm": 0.6777488338379547, "learning_rate": 4.074939172749392e-06, "loss": 0.6119, "step": 9129 }, { "epoch": 0.26656156024641614, "grad_norm": 0.8108146270037818, "learning_rate": 4.07477696674777e-06, "loss": 0.6568, "step": 9130 }, { "epoch": 0.2665907564742635, "grad_norm": 0.7144826820685971, "learning_rate": 4.074614760746148e-06, "loss": 0.6456, "step": 9131 }, { "epoch": 0.26661995270211086, "grad_norm": 0.7313621658701112, "learning_rate": 4.074452554744526e-06, "loss": 0.7076, "step": 9132 }, { "epoch": 0.2666491489299582, "grad_norm": 0.7829397749100228, "learning_rate": 4.074290348742904e-06, "loss": 0.6483, "step": 9133 }, { "epoch": 0.2666783451578056, "grad_norm": 0.7258633602755126, "learning_rate": 4.074128142741281e-06, "loss": 0.6519, "step": 9134 }, { "epoch": 0.26670754138565295, "grad_norm": 0.7155707311522187, "learning_rate": 4.073965936739659e-06, "loss": 0.6618, "step": 9135 }, { "epoch": 0.26673673761350036, "grad_norm": 0.7162219895721668, "learning_rate": 4.073803730738037e-06, "loss": 0.653, "step": 9136 }, { "epoch": 0.2667659338413477, "grad_norm": 0.7259101131108147, "learning_rate": 4.073641524736416e-06, "loss": 0.6786, "step": 9137 }, { "epoch": 0.2667951300691951, "grad_norm": 0.6782948018489875, "learning_rate": 4.073479318734794e-06, "loss": 0.631, "step": 9138 }, { "epoch": 0.26682432629704245, "grad_norm": 0.7850267606296967, "learning_rate": 4.073317112733171e-06, "loss": 0.7028, "step": 9139 }, { "epoch": 0.2668535225248898, "grad_norm": 0.8166327523565556, "learning_rate": 4.073154906731549e-06, "loss": 0.6591, "step": 9140 }, { "epoch": 0.26688271875273717, "grad_norm": 0.8935177557284295, "learning_rate": 4.072992700729927e-06, "loss": 0.6614, "step": 9141 }, { "epoch": 0.26691191498058453, "grad_norm": 0.7553739186815662, "learning_rate": 4.072830494728305e-06, "loss": 0.6791, "step": 9142 }, { "epoch": 0.2669411112084319, "grad_norm": 0.7475264965620675, "learning_rate": 4.072668288726683e-06, "loss": 0.6656, "step": 9143 }, { "epoch": 0.26697030743627925, "grad_norm": 0.7703731416551081, "learning_rate": 4.072506082725061e-06, "loss": 0.6938, "step": 9144 }, { "epoch": 0.2669995036641266, "grad_norm": 0.7281791908791269, "learning_rate": 4.072343876723439e-06, "loss": 0.6783, "step": 9145 }, { "epoch": 0.267028699891974, "grad_norm": 0.7351120868608413, "learning_rate": 4.072181670721817e-06, "loss": 0.676, "step": 9146 }, { "epoch": 0.26705789611982134, "grad_norm": 0.7301233770668262, "learning_rate": 4.072019464720195e-06, "loss": 0.6308, "step": 9147 }, { "epoch": 0.2670870923476687, "grad_norm": 0.7492547414262734, "learning_rate": 4.0718572587185725e-06, "loss": 0.6745, "step": 9148 }, { "epoch": 0.26711628857551606, "grad_norm": 0.7096428552347834, "learning_rate": 4.0716950527169505e-06, "loss": 0.5979, "step": 9149 }, { "epoch": 0.2671454848033634, "grad_norm": 0.737793408152643, "learning_rate": 4.0715328467153285e-06, "loss": 0.6749, "step": 9150 }, { "epoch": 0.2671746810312108, "grad_norm": 1.0597660872617936, "learning_rate": 4.0713706407137065e-06, "loss": 0.6664, "step": 9151 }, { "epoch": 0.26720387725905814, "grad_norm": 0.754912526777845, "learning_rate": 4.0712084347120845e-06, "loss": 0.743, "step": 9152 }, { "epoch": 0.2672330734869055, "grad_norm": 1.1800171401243396, "learning_rate": 4.0710462287104625e-06, "loss": 0.7161, "step": 9153 }, { "epoch": 0.26726226971475286, "grad_norm": 0.7293068476253703, "learning_rate": 4.0708840227088405e-06, "loss": 0.6723, "step": 9154 }, { "epoch": 0.2672914659426002, "grad_norm": 0.7843546628197986, "learning_rate": 4.0707218167072185e-06, "loss": 0.7676, "step": 9155 }, { "epoch": 0.2673206621704476, "grad_norm": 0.7943198238512261, "learning_rate": 4.0705596107055965e-06, "loss": 0.7618, "step": 9156 }, { "epoch": 0.26734985839829495, "grad_norm": 0.7137182652133255, "learning_rate": 4.0703974047039745e-06, "loss": 0.5754, "step": 9157 }, { "epoch": 0.2673790546261423, "grad_norm": 0.7315768838212399, "learning_rate": 4.0702351987023525e-06, "loss": 0.6715, "step": 9158 }, { "epoch": 0.26740825085398967, "grad_norm": 0.7752549208600031, "learning_rate": 4.0700729927007305e-06, "loss": 0.7267, "step": 9159 }, { "epoch": 0.26743744708183703, "grad_norm": 0.7819726966578832, "learning_rate": 4.0699107866991085e-06, "loss": 0.7246, "step": 9160 }, { "epoch": 0.2674666433096844, "grad_norm": 0.6864562027623581, "learning_rate": 4.0697485806974865e-06, "loss": 0.5901, "step": 9161 }, { "epoch": 0.26749583953753175, "grad_norm": 0.8017181595312324, "learning_rate": 4.0695863746958645e-06, "loss": 0.6206, "step": 9162 }, { "epoch": 0.2675250357653791, "grad_norm": 0.8307925850647685, "learning_rate": 4.069424168694242e-06, "loss": 0.6748, "step": 9163 }, { "epoch": 0.2675542319932265, "grad_norm": 0.7158987486135316, "learning_rate": 4.06926196269262e-06, "loss": 0.6411, "step": 9164 }, { "epoch": 0.26758342822107384, "grad_norm": 0.8295916789087948, "learning_rate": 4.069099756690998e-06, "loss": 0.6998, "step": 9165 }, { "epoch": 0.2676126244489212, "grad_norm": 0.766078434110577, "learning_rate": 4.068937550689376e-06, "loss": 0.7049, "step": 9166 }, { "epoch": 0.26764182067676856, "grad_norm": 0.7418070535697793, "learning_rate": 4.068775344687754e-06, "loss": 0.6814, "step": 9167 }, { "epoch": 0.2676710169046159, "grad_norm": 0.8696045623588116, "learning_rate": 4.068613138686132e-06, "loss": 0.7271, "step": 9168 }, { "epoch": 0.2677002131324633, "grad_norm": 0.6702253296153634, "learning_rate": 4.06845093268451e-06, "loss": 0.5632, "step": 9169 }, { "epoch": 0.26772940936031064, "grad_norm": 0.7757949649802636, "learning_rate": 4.068288726682888e-06, "loss": 0.6346, "step": 9170 }, { "epoch": 0.267758605588158, "grad_norm": 0.7792318083419727, "learning_rate": 4.068126520681266e-06, "loss": 0.7082, "step": 9171 }, { "epoch": 0.26778780181600537, "grad_norm": 0.7645137111872848, "learning_rate": 4.067964314679643e-06, "loss": 0.6412, "step": 9172 }, { "epoch": 0.2678169980438527, "grad_norm": 0.7975704644125777, "learning_rate": 4.067802108678021e-06, "loss": 0.6891, "step": 9173 }, { "epoch": 0.2678461942717001, "grad_norm": 0.7495179802591845, "learning_rate": 4.0676399026764e-06, "loss": 0.6963, "step": 9174 }, { "epoch": 0.26787539049954745, "grad_norm": 0.767412563172551, "learning_rate": 4.067477696674778e-06, "loss": 0.7282, "step": 9175 }, { "epoch": 0.2679045867273948, "grad_norm": 0.7701343532374434, "learning_rate": 4.067315490673156e-06, "loss": 0.6531, "step": 9176 }, { "epoch": 0.2679337829552422, "grad_norm": 0.7392426642716494, "learning_rate": 4.067153284671533e-06, "loss": 0.7059, "step": 9177 }, { "epoch": 0.26796297918308953, "grad_norm": 0.7050028856163122, "learning_rate": 4.066991078669911e-06, "loss": 0.569, "step": 9178 }, { "epoch": 0.2679921754109369, "grad_norm": 0.8069128745978277, "learning_rate": 4.066828872668289e-06, "loss": 0.7909, "step": 9179 }, { "epoch": 0.26802137163878426, "grad_norm": 0.7100488888051556, "learning_rate": 4.066666666666667e-06, "loss": 0.5685, "step": 9180 }, { "epoch": 0.2680505678666316, "grad_norm": 0.7286378399851793, "learning_rate": 4.066504460665045e-06, "loss": 0.6364, "step": 9181 }, { "epoch": 0.268079764094479, "grad_norm": 0.7289062977075179, "learning_rate": 4.066342254663423e-06, "loss": 0.6464, "step": 9182 }, { "epoch": 0.26810896032232634, "grad_norm": 0.7664005856766837, "learning_rate": 4.066180048661801e-06, "loss": 0.7165, "step": 9183 }, { "epoch": 0.2681381565501737, "grad_norm": 0.7401168249897677, "learning_rate": 4.066017842660179e-06, "loss": 0.5942, "step": 9184 }, { "epoch": 0.26816735277802106, "grad_norm": 0.8310710538159415, "learning_rate": 4.065855636658557e-06, "loss": 0.6498, "step": 9185 }, { "epoch": 0.2681965490058684, "grad_norm": 0.7308655776777917, "learning_rate": 4.065693430656934e-06, "loss": 0.5966, "step": 9186 }, { "epoch": 0.2682257452337158, "grad_norm": 0.6819668914104984, "learning_rate": 4.065531224655312e-06, "loss": 0.5691, "step": 9187 }, { "epoch": 0.26825494146156315, "grad_norm": 0.8159550328248659, "learning_rate": 4.06536901865369e-06, "loss": 0.7274, "step": 9188 }, { "epoch": 0.2682841376894105, "grad_norm": 0.7528172212293742, "learning_rate": 4.065206812652068e-06, "loss": 0.6257, "step": 9189 }, { "epoch": 0.26831333391725787, "grad_norm": 0.9656972305209975, "learning_rate": 4.065044606650446e-06, "loss": 0.7819, "step": 9190 }, { "epoch": 0.26834253014510523, "grad_norm": 0.8133183237048457, "learning_rate": 4.064882400648824e-06, "loss": 0.7284, "step": 9191 }, { "epoch": 0.2683717263729526, "grad_norm": 0.7186515248080166, "learning_rate": 4.064720194647202e-06, "loss": 0.6518, "step": 9192 }, { "epoch": 0.26840092260079995, "grad_norm": 0.7591879892415354, "learning_rate": 4.06455798864558e-06, "loss": 0.6537, "step": 9193 }, { "epoch": 0.2684301188286473, "grad_norm": 0.731426170093371, "learning_rate": 4.064395782643958e-06, "loss": 0.644, "step": 9194 }, { "epoch": 0.2684593150564947, "grad_norm": 0.707775445644619, "learning_rate": 4.064233576642336e-06, "loss": 0.6322, "step": 9195 }, { "epoch": 0.2684885112843421, "grad_norm": 0.764190288211721, "learning_rate": 4.064071370640714e-06, "loss": 0.7257, "step": 9196 }, { "epoch": 0.26851770751218945, "grad_norm": 0.6945402519784505, "learning_rate": 4.063909164639092e-06, "loss": 0.6049, "step": 9197 }, { "epoch": 0.2685469037400368, "grad_norm": 0.7387051690764365, "learning_rate": 4.06374695863747e-06, "loss": 0.674, "step": 9198 }, { "epoch": 0.2685760999678842, "grad_norm": 0.6768653457284897, "learning_rate": 4.063584752635848e-06, "loss": 0.6218, "step": 9199 }, { "epoch": 0.26860529619573154, "grad_norm": 0.7545895133751008, "learning_rate": 4.063422546634226e-06, "loss": 0.6801, "step": 9200 }, { "epoch": 0.2686344924235789, "grad_norm": 0.7721705574577901, "learning_rate": 4.0632603406326034e-06, "loss": 0.7329, "step": 9201 }, { "epoch": 0.26866368865142626, "grad_norm": 0.6891950014187913, "learning_rate": 4.0630981346309814e-06, "loss": 0.5521, "step": 9202 }, { "epoch": 0.2686928848792736, "grad_norm": 0.8128387576981817, "learning_rate": 4.0629359286293594e-06, "loss": 0.7105, "step": 9203 }, { "epoch": 0.268722081107121, "grad_norm": 0.7519436700498672, "learning_rate": 4.0627737226277374e-06, "loss": 0.6807, "step": 9204 }, { "epoch": 0.26875127733496834, "grad_norm": 0.6873337446333438, "learning_rate": 4.0626115166261155e-06, "loss": 0.5991, "step": 9205 }, { "epoch": 0.2687804735628157, "grad_norm": 0.7736217014878638, "learning_rate": 4.0624493106244935e-06, "loss": 0.7327, "step": 9206 }, { "epoch": 0.26880966979066306, "grad_norm": 0.6703599742644051, "learning_rate": 4.0622871046228715e-06, "loss": 0.6163, "step": 9207 }, { "epoch": 0.2688388660185104, "grad_norm": 0.7014685594820211, "learning_rate": 4.0621248986212495e-06, "loss": 0.5848, "step": 9208 }, { "epoch": 0.2688680622463578, "grad_norm": 0.7007405873585903, "learning_rate": 4.0619626926196275e-06, "loss": 0.5902, "step": 9209 }, { "epoch": 0.26889725847420515, "grad_norm": 0.759501929944528, "learning_rate": 4.061800486618005e-06, "loss": 0.6776, "step": 9210 }, { "epoch": 0.2689264547020525, "grad_norm": 0.6981784122162182, "learning_rate": 4.061638280616383e-06, "loss": 0.6363, "step": 9211 }, { "epoch": 0.26895565092989987, "grad_norm": 0.8230291066474433, "learning_rate": 4.0614760746147615e-06, "loss": 0.7743, "step": 9212 }, { "epoch": 0.26898484715774723, "grad_norm": 0.7630351100806897, "learning_rate": 4.0613138686131395e-06, "loss": 0.7323, "step": 9213 }, { "epoch": 0.2690140433855946, "grad_norm": 0.8423967576097849, "learning_rate": 4.0611516626115175e-06, "loss": 0.707, "step": 9214 }, { "epoch": 0.26904323961344195, "grad_norm": 0.6903411838429162, "learning_rate": 4.060989456609895e-06, "loss": 0.6235, "step": 9215 }, { "epoch": 0.2690724358412893, "grad_norm": 0.759373119067455, "learning_rate": 4.060827250608273e-06, "loss": 0.6629, "step": 9216 }, { "epoch": 0.2691016320691367, "grad_norm": 0.7967369072031046, "learning_rate": 4.060665044606651e-06, "loss": 0.7441, "step": 9217 }, { "epoch": 0.26913082829698404, "grad_norm": 0.726514693484059, "learning_rate": 4.060502838605029e-06, "loss": 0.6471, "step": 9218 }, { "epoch": 0.2691600245248314, "grad_norm": 0.749509286069312, "learning_rate": 4.060340632603407e-06, "loss": 0.7216, "step": 9219 }, { "epoch": 0.26918922075267876, "grad_norm": 0.7005209207704344, "learning_rate": 4.060178426601785e-06, "loss": 0.6513, "step": 9220 }, { "epoch": 0.2692184169805261, "grad_norm": 0.7423236908233648, "learning_rate": 4.060016220600163e-06, "loss": 0.6397, "step": 9221 }, { "epoch": 0.2692476132083735, "grad_norm": 0.7196159457910538, "learning_rate": 4.059854014598541e-06, "loss": 0.6589, "step": 9222 }, { "epoch": 0.26927680943622084, "grad_norm": 0.6923823295647147, "learning_rate": 4.059691808596919e-06, "loss": 0.5821, "step": 9223 }, { "epoch": 0.2693060056640682, "grad_norm": 0.7437312235681585, "learning_rate": 4.059529602595296e-06, "loss": 0.6571, "step": 9224 }, { "epoch": 0.26933520189191557, "grad_norm": 0.8277986721642243, "learning_rate": 4.059367396593674e-06, "loss": 0.7221, "step": 9225 }, { "epoch": 0.2693643981197629, "grad_norm": 0.7675666862184353, "learning_rate": 4.059205190592052e-06, "loss": 0.6532, "step": 9226 }, { "epoch": 0.2693935943476103, "grad_norm": 0.7575784574038916, "learning_rate": 4.05904298459043e-06, "loss": 0.6849, "step": 9227 }, { "epoch": 0.26942279057545765, "grad_norm": 0.7424401051086108, "learning_rate": 4.058880778588808e-06, "loss": 0.6365, "step": 9228 }, { "epoch": 0.269451986803305, "grad_norm": 0.716934804465442, "learning_rate": 4.058718572587186e-06, "loss": 0.6424, "step": 9229 }, { "epoch": 0.26948118303115237, "grad_norm": 0.8367377827934043, "learning_rate": 4.058556366585564e-06, "loss": 0.6936, "step": 9230 }, { "epoch": 0.26951037925899973, "grad_norm": 0.7560633704771292, "learning_rate": 4.058394160583942e-06, "loss": 0.6647, "step": 9231 }, { "epoch": 0.2695395754868471, "grad_norm": 0.7426579223335967, "learning_rate": 4.05823195458232e-06, "loss": 0.6707, "step": 9232 }, { "epoch": 0.26956877171469446, "grad_norm": 0.8049733894581306, "learning_rate": 4.058069748580698e-06, "loss": 0.664, "step": 9233 }, { "epoch": 0.2695979679425418, "grad_norm": 0.7166591973476008, "learning_rate": 4.057907542579076e-06, "loss": 0.6216, "step": 9234 }, { "epoch": 0.2696271641703892, "grad_norm": 0.9748518889654241, "learning_rate": 4.057745336577454e-06, "loss": 0.7448, "step": 9235 }, { "epoch": 0.26965636039823654, "grad_norm": 0.799717347949819, "learning_rate": 4.057583130575832e-06, "loss": 0.6752, "step": 9236 }, { "epoch": 0.2696855566260839, "grad_norm": 0.8476997713627499, "learning_rate": 4.05742092457421e-06, "loss": 0.8106, "step": 9237 }, { "epoch": 0.26971475285393126, "grad_norm": 0.713642951943478, "learning_rate": 4.057258718572588e-06, "loss": 0.6689, "step": 9238 }, { "epoch": 0.2697439490817786, "grad_norm": 0.7492058517060475, "learning_rate": 4.057096512570965e-06, "loss": 0.6405, "step": 9239 }, { "epoch": 0.269773145309626, "grad_norm": 0.7190805761381565, "learning_rate": 4.056934306569343e-06, "loss": 0.6749, "step": 9240 }, { "epoch": 0.26980234153747334, "grad_norm": 0.7830776795467904, "learning_rate": 4.056772100567721e-06, "loss": 0.7301, "step": 9241 }, { "epoch": 0.2698315377653207, "grad_norm": 0.7650015950444945, "learning_rate": 4.056609894566099e-06, "loss": 0.6346, "step": 9242 }, { "epoch": 0.26986073399316807, "grad_norm": 0.7532986620785984, "learning_rate": 4.056447688564477e-06, "loss": 0.6941, "step": 9243 }, { "epoch": 0.26988993022101543, "grad_norm": 0.7045832546590299, "learning_rate": 4.056285482562855e-06, "loss": 0.6547, "step": 9244 }, { "epoch": 0.2699191264488628, "grad_norm": 0.7764533161085452, "learning_rate": 4.056123276561233e-06, "loss": 0.6715, "step": 9245 }, { "epoch": 0.26994832267671015, "grad_norm": 0.7775431949364354, "learning_rate": 4.055961070559611e-06, "loss": 0.6635, "step": 9246 }, { "epoch": 0.2699775189045575, "grad_norm": 0.7161237990935578, "learning_rate": 4.055798864557988e-06, "loss": 0.6214, "step": 9247 }, { "epoch": 0.2700067151324049, "grad_norm": 0.7741271347765851, "learning_rate": 4.055636658556366e-06, "loss": 0.7327, "step": 9248 }, { "epoch": 0.27003591136025223, "grad_norm": 0.7612008095771569, "learning_rate": 4.055474452554744e-06, "loss": 0.6847, "step": 9249 }, { "epoch": 0.2700651075880996, "grad_norm": 0.730853407844565, "learning_rate": 4.055312246553123e-06, "loss": 0.6787, "step": 9250 }, { "epoch": 0.27009430381594696, "grad_norm": 0.8039184848636701, "learning_rate": 4.055150040551501e-06, "loss": 0.7051, "step": 9251 }, { "epoch": 0.2701235000437943, "grad_norm": 0.6736696766227482, "learning_rate": 4.054987834549879e-06, "loss": 0.5401, "step": 9252 }, { "epoch": 0.2701526962716417, "grad_norm": 0.7537926289401561, "learning_rate": 4.054825628548256e-06, "loss": 0.662, "step": 9253 }, { "epoch": 0.27018189249948904, "grad_norm": 0.7669275106719079, "learning_rate": 4.054663422546634e-06, "loss": 0.7589, "step": 9254 }, { "epoch": 0.2702110887273364, "grad_norm": 0.7563779989479579, "learning_rate": 4.054501216545012e-06, "loss": 0.6481, "step": 9255 }, { "epoch": 0.27024028495518376, "grad_norm": 0.6981626843875467, "learning_rate": 4.05433901054339e-06, "loss": 0.6337, "step": 9256 }, { "epoch": 0.2702694811830312, "grad_norm": 0.8331803167099743, "learning_rate": 4.054176804541768e-06, "loss": 0.6698, "step": 9257 }, { "epoch": 0.27029867741087854, "grad_norm": 0.7361481694110805, "learning_rate": 4.0540145985401464e-06, "loss": 0.6539, "step": 9258 }, { "epoch": 0.2703278736387259, "grad_norm": 0.6684121083397828, "learning_rate": 4.0538523925385244e-06, "loss": 0.6003, "step": 9259 }, { "epoch": 0.27035706986657326, "grad_norm": 0.792721074733972, "learning_rate": 4.0536901865369024e-06, "loss": 0.7322, "step": 9260 }, { "epoch": 0.2703862660944206, "grad_norm": 0.775437064495628, "learning_rate": 4.0535279805352804e-06, "loss": 0.7331, "step": 9261 }, { "epoch": 0.270415462322268, "grad_norm": 0.7729929233735873, "learning_rate": 4.053365774533658e-06, "loss": 0.6999, "step": 9262 }, { "epoch": 0.27044465855011535, "grad_norm": 0.7069423805345898, "learning_rate": 4.053203568532036e-06, "loss": 0.6099, "step": 9263 }, { "epoch": 0.2704738547779627, "grad_norm": 0.7775338978837244, "learning_rate": 4.053041362530414e-06, "loss": 0.6819, "step": 9264 }, { "epoch": 0.27050305100581007, "grad_norm": 0.7984429954724249, "learning_rate": 4.052879156528792e-06, "loss": 0.7398, "step": 9265 }, { "epoch": 0.27053224723365743, "grad_norm": 0.745505993538121, "learning_rate": 4.05271695052717e-06, "loss": 0.6338, "step": 9266 }, { "epoch": 0.2705614434615048, "grad_norm": 0.7334674522608934, "learning_rate": 4.052554744525548e-06, "loss": 0.6652, "step": 9267 }, { "epoch": 0.27059063968935215, "grad_norm": 0.800704817112426, "learning_rate": 4.052392538523926e-06, "loss": 0.7049, "step": 9268 }, { "epoch": 0.2706198359171995, "grad_norm": 0.7493825194704151, "learning_rate": 4.052230332522304e-06, "loss": 0.6768, "step": 9269 }, { "epoch": 0.2706490321450469, "grad_norm": 0.7616943404626907, "learning_rate": 4.052068126520682e-06, "loss": 0.5695, "step": 9270 }, { "epoch": 0.27067822837289424, "grad_norm": 0.7193163326234074, "learning_rate": 4.05190592051906e-06, "loss": 0.6277, "step": 9271 }, { "epoch": 0.2707074246007416, "grad_norm": 0.7124716840072213, "learning_rate": 4.051743714517438e-06, "loss": 0.6592, "step": 9272 }, { "epoch": 0.27073662082858896, "grad_norm": 0.6969703857433986, "learning_rate": 4.051581508515816e-06, "loss": 0.5855, "step": 9273 }, { "epoch": 0.2707658170564363, "grad_norm": 0.6965993379059139, "learning_rate": 4.051419302514194e-06, "loss": 0.5931, "step": 9274 }, { "epoch": 0.2707950132842837, "grad_norm": 0.7449660220184052, "learning_rate": 4.051257096512572e-06, "loss": 0.6373, "step": 9275 }, { "epoch": 0.27082420951213104, "grad_norm": 0.6946668372502942, "learning_rate": 4.05109489051095e-06, "loss": 0.6677, "step": 9276 }, { "epoch": 0.2708534057399784, "grad_norm": 0.751824675449344, "learning_rate": 4.050932684509327e-06, "loss": 0.6492, "step": 9277 }, { "epoch": 0.27088260196782576, "grad_norm": 0.8164880481154287, "learning_rate": 4.050770478507705e-06, "loss": 0.6923, "step": 9278 }, { "epoch": 0.2709117981956731, "grad_norm": 0.725455901277016, "learning_rate": 4.050608272506083e-06, "loss": 0.6627, "step": 9279 }, { "epoch": 0.2709409944235205, "grad_norm": 0.7508535635268209, "learning_rate": 4.050446066504461e-06, "loss": 0.7023, "step": 9280 }, { "epoch": 0.27097019065136785, "grad_norm": 0.7635857866776451, "learning_rate": 4.050283860502839e-06, "loss": 0.6353, "step": 9281 }, { "epoch": 0.2709993868792152, "grad_norm": 0.7313423315787833, "learning_rate": 4.050121654501217e-06, "loss": 0.6562, "step": 9282 }, { "epoch": 0.27102858310706257, "grad_norm": 0.7436363289754051, "learning_rate": 4.049959448499595e-06, "loss": 0.7054, "step": 9283 }, { "epoch": 0.27105777933490993, "grad_norm": 0.7322284830478242, "learning_rate": 4.049797242497973e-06, "loss": 0.6444, "step": 9284 }, { "epoch": 0.2710869755627573, "grad_norm": 0.7835309426813387, "learning_rate": 4.04963503649635e-06, "loss": 0.664, "step": 9285 }, { "epoch": 0.27111617179060465, "grad_norm": 0.7718890397705725, "learning_rate": 4.049472830494728e-06, "loss": 0.7578, "step": 9286 }, { "epoch": 0.271145368018452, "grad_norm": 0.7035059989423718, "learning_rate": 4.049310624493106e-06, "loss": 0.6085, "step": 9287 }, { "epoch": 0.2711745642462994, "grad_norm": 0.7815364274796155, "learning_rate": 4.049148418491485e-06, "loss": 0.6743, "step": 9288 }, { "epoch": 0.27120376047414674, "grad_norm": 0.7655343372599872, "learning_rate": 4.048986212489863e-06, "loss": 0.625, "step": 9289 }, { "epoch": 0.2712329567019941, "grad_norm": 0.7013431042262864, "learning_rate": 4.048824006488241e-06, "loss": 0.66, "step": 9290 }, { "epoch": 0.27126215292984146, "grad_norm": 0.7427336174420146, "learning_rate": 4.048661800486618e-06, "loss": 0.6566, "step": 9291 }, { "epoch": 0.2712913491576888, "grad_norm": 0.7153285572902227, "learning_rate": 4.048499594484996e-06, "loss": 0.6825, "step": 9292 }, { "epoch": 0.2713205453855362, "grad_norm": 0.6895771070914023, "learning_rate": 4.048337388483374e-06, "loss": 0.5997, "step": 9293 }, { "epoch": 0.27134974161338354, "grad_norm": 0.6820966227610008, "learning_rate": 4.048175182481752e-06, "loss": 0.57, "step": 9294 }, { "epoch": 0.2713789378412309, "grad_norm": 0.7211400677260266, "learning_rate": 4.04801297648013e-06, "loss": 0.6756, "step": 9295 }, { "epoch": 0.27140813406907827, "grad_norm": 0.6787254975700667, "learning_rate": 4.047850770478508e-06, "loss": 0.6116, "step": 9296 }, { "epoch": 0.2714373302969256, "grad_norm": 0.7567947350163309, "learning_rate": 4.047688564476886e-06, "loss": 0.6828, "step": 9297 }, { "epoch": 0.271466526524773, "grad_norm": 0.7262945527157872, "learning_rate": 4.047526358475264e-06, "loss": 0.6171, "step": 9298 }, { "epoch": 0.27149572275262035, "grad_norm": 0.7065644941877076, "learning_rate": 4.047364152473642e-06, "loss": 0.6462, "step": 9299 }, { "epoch": 0.2715249189804677, "grad_norm": 0.8014235560754676, "learning_rate": 4.047201946472019e-06, "loss": 0.7016, "step": 9300 }, { "epoch": 0.27155411520831507, "grad_norm": 0.7650943634481945, "learning_rate": 4.047039740470397e-06, "loss": 0.6526, "step": 9301 }, { "epoch": 0.27158331143616243, "grad_norm": 0.7955229984558772, "learning_rate": 4.046877534468775e-06, "loss": 0.6497, "step": 9302 }, { "epoch": 0.2716125076640098, "grad_norm": 0.8013926012330219, "learning_rate": 4.046715328467153e-06, "loss": 0.7296, "step": 9303 }, { "epoch": 0.27164170389185716, "grad_norm": 0.7071680815154394, "learning_rate": 4.046553122465531e-06, "loss": 0.6301, "step": 9304 }, { "epoch": 0.2716709001197045, "grad_norm": 0.8114440318549012, "learning_rate": 4.046390916463909e-06, "loss": 0.6816, "step": 9305 }, { "epoch": 0.2717000963475519, "grad_norm": 0.7472744841354432, "learning_rate": 4.046228710462287e-06, "loss": 0.6526, "step": 9306 }, { "epoch": 0.27172929257539924, "grad_norm": 0.6881610979370303, "learning_rate": 4.046066504460665e-06, "loss": 0.5638, "step": 9307 }, { "epoch": 0.2717584888032466, "grad_norm": 0.7249131735089213, "learning_rate": 4.045904298459043e-06, "loss": 0.6752, "step": 9308 }, { "epoch": 0.27178768503109396, "grad_norm": 0.6966003298774874, "learning_rate": 4.045742092457421e-06, "loss": 0.6353, "step": 9309 }, { "epoch": 0.2718168812589413, "grad_norm": 0.7038199083219646, "learning_rate": 4.045579886455799e-06, "loss": 0.6128, "step": 9310 }, { "epoch": 0.2718460774867887, "grad_norm": 0.7288054078788802, "learning_rate": 4.045417680454177e-06, "loss": 0.6577, "step": 9311 }, { "epoch": 0.27187527371463605, "grad_norm": 0.7323301619544801, "learning_rate": 4.045255474452555e-06, "loss": 0.6498, "step": 9312 }, { "epoch": 0.2719044699424834, "grad_norm": 0.8113417203827497, "learning_rate": 4.045093268450933e-06, "loss": 0.7126, "step": 9313 }, { "epoch": 0.27193366617033077, "grad_norm": 0.7947539325752263, "learning_rate": 4.044931062449311e-06, "loss": 0.7946, "step": 9314 }, { "epoch": 0.27196286239817813, "grad_norm": 0.7766809555901252, "learning_rate": 4.044768856447689e-06, "loss": 0.6992, "step": 9315 }, { "epoch": 0.2719920586260255, "grad_norm": 0.84256655055605, "learning_rate": 4.044606650446067e-06, "loss": 0.7447, "step": 9316 }, { "epoch": 0.2720212548538729, "grad_norm": 0.7601940255605343, "learning_rate": 4.044444444444445e-06, "loss": 0.6698, "step": 9317 }, { "epoch": 0.27205045108172027, "grad_norm": 0.7479279450996772, "learning_rate": 4.044282238442823e-06, "loss": 0.665, "step": 9318 }, { "epoch": 0.27207964730956763, "grad_norm": 0.7603435243627658, "learning_rate": 4.044120032441201e-06, "loss": 0.6664, "step": 9319 }, { "epoch": 0.272108843537415, "grad_norm": 0.7817860876338969, "learning_rate": 4.043957826439579e-06, "loss": 0.5835, "step": 9320 }, { "epoch": 0.27213803976526235, "grad_norm": 0.7976715598688555, "learning_rate": 4.043795620437957e-06, "loss": 0.6965, "step": 9321 }, { "epoch": 0.2721672359931097, "grad_norm": 0.7376604020006993, "learning_rate": 4.043633414436335e-06, "loss": 0.6788, "step": 9322 }, { "epoch": 0.2721964322209571, "grad_norm": 0.7433413325108833, "learning_rate": 4.043471208434712e-06, "loss": 0.6553, "step": 9323 }, { "epoch": 0.27222562844880444, "grad_norm": 0.7106800704127014, "learning_rate": 4.04330900243309e-06, "loss": 0.6498, "step": 9324 }, { "epoch": 0.2722548246766518, "grad_norm": 0.7619932785096195, "learning_rate": 4.043146796431469e-06, "loss": 0.6821, "step": 9325 }, { "epoch": 0.27228402090449916, "grad_norm": 0.7782365078465399, "learning_rate": 4.042984590429847e-06, "loss": 0.6608, "step": 9326 }, { "epoch": 0.2723132171323465, "grad_norm": 0.775201715689319, "learning_rate": 4.042822384428225e-06, "loss": 0.7088, "step": 9327 }, { "epoch": 0.2723424133601939, "grad_norm": 0.6871859017130528, "learning_rate": 4.042660178426603e-06, "loss": 0.6059, "step": 9328 }, { "epoch": 0.27237160958804124, "grad_norm": 0.7170789380395457, "learning_rate": 4.04249797242498e-06, "loss": 0.6394, "step": 9329 }, { "epoch": 0.2724008058158886, "grad_norm": 0.7559410501582335, "learning_rate": 4.042335766423358e-06, "loss": 0.7204, "step": 9330 }, { "epoch": 0.27243000204373596, "grad_norm": 0.7993437371027041, "learning_rate": 4.042173560421736e-06, "loss": 0.6805, "step": 9331 }, { "epoch": 0.2724591982715833, "grad_norm": 0.7372158030909948, "learning_rate": 4.042011354420114e-06, "loss": 0.6537, "step": 9332 }, { "epoch": 0.2724883944994307, "grad_norm": 0.6711965870955162, "learning_rate": 4.041849148418492e-06, "loss": 0.5645, "step": 9333 }, { "epoch": 0.27251759072727805, "grad_norm": 0.7488396157427651, "learning_rate": 4.04168694241687e-06, "loss": 0.6687, "step": 9334 }, { "epoch": 0.2725467869551254, "grad_norm": 0.7067299713070871, "learning_rate": 4.041524736415248e-06, "loss": 0.6558, "step": 9335 }, { "epoch": 0.27257598318297277, "grad_norm": 0.7546253298146482, "learning_rate": 4.041362530413626e-06, "loss": 0.7407, "step": 9336 }, { "epoch": 0.27260517941082013, "grad_norm": 0.6726615797621871, "learning_rate": 4.041200324412004e-06, "loss": 0.5705, "step": 9337 }, { "epoch": 0.2726343756386675, "grad_norm": 0.7797869674178829, "learning_rate": 4.041038118410381e-06, "loss": 0.7077, "step": 9338 }, { "epoch": 0.27266357186651485, "grad_norm": 0.7417080647478859, "learning_rate": 4.040875912408759e-06, "loss": 0.6899, "step": 9339 }, { "epoch": 0.2726927680943622, "grad_norm": 0.7748503935788139, "learning_rate": 4.040713706407137e-06, "loss": 0.7045, "step": 9340 }, { "epoch": 0.2727219643222096, "grad_norm": 0.6779322848427207, "learning_rate": 4.040551500405515e-06, "loss": 0.6124, "step": 9341 }, { "epoch": 0.27275116055005694, "grad_norm": 0.7698218621831607, "learning_rate": 4.040389294403893e-06, "loss": 0.7025, "step": 9342 }, { "epoch": 0.2727803567779043, "grad_norm": 0.9847270890991933, "learning_rate": 4.040227088402271e-06, "loss": 0.7726, "step": 9343 }, { "epoch": 0.27280955300575166, "grad_norm": 0.750975421510399, "learning_rate": 4.040064882400649e-06, "loss": 0.7156, "step": 9344 }, { "epoch": 0.272838749233599, "grad_norm": 0.756942118669586, "learning_rate": 4.039902676399027e-06, "loss": 0.6694, "step": 9345 }, { "epoch": 0.2728679454614464, "grad_norm": 0.8749028010362654, "learning_rate": 4.039740470397405e-06, "loss": 0.7279, "step": 9346 }, { "epoch": 0.27289714168929374, "grad_norm": 0.7806948396659205, "learning_rate": 4.039578264395783e-06, "loss": 0.7442, "step": 9347 }, { "epoch": 0.2729263379171411, "grad_norm": 0.6957052914331061, "learning_rate": 4.039416058394161e-06, "loss": 0.6319, "step": 9348 }, { "epoch": 0.27295553414498847, "grad_norm": 0.741832562221654, "learning_rate": 4.039253852392539e-06, "loss": 0.6406, "step": 9349 }, { "epoch": 0.2729847303728358, "grad_norm": 0.7609095847461377, "learning_rate": 4.039091646390917e-06, "loss": 0.7409, "step": 9350 }, { "epoch": 0.2730139266006832, "grad_norm": 0.6863986209656501, "learning_rate": 4.038929440389295e-06, "loss": 0.5901, "step": 9351 }, { "epoch": 0.27304312282853055, "grad_norm": 0.9438268092961705, "learning_rate": 4.038767234387672e-06, "loss": 0.6957, "step": 9352 }, { "epoch": 0.2730723190563779, "grad_norm": 0.7636952835370758, "learning_rate": 4.03860502838605e-06, "loss": 0.6795, "step": 9353 }, { "epoch": 0.27310151528422527, "grad_norm": 0.7609227860257013, "learning_rate": 4.038442822384428e-06, "loss": 0.7129, "step": 9354 }, { "epoch": 0.27313071151207263, "grad_norm": 0.813605060406876, "learning_rate": 4.038280616382806e-06, "loss": 0.6575, "step": 9355 }, { "epoch": 0.27315990773992, "grad_norm": 0.687058829768349, "learning_rate": 4.038118410381184e-06, "loss": 0.6125, "step": 9356 }, { "epoch": 0.27318910396776735, "grad_norm": 0.7166933038209222, "learning_rate": 4.037956204379562e-06, "loss": 0.6905, "step": 9357 }, { "epoch": 0.2732183001956147, "grad_norm": 0.8358661351682416, "learning_rate": 4.03779399837794e-06, "loss": 0.7911, "step": 9358 }, { "epoch": 0.2732474964234621, "grad_norm": 0.6835319636971741, "learning_rate": 4.037631792376318e-06, "loss": 0.5968, "step": 9359 }, { "epoch": 0.27327669265130944, "grad_norm": 0.7168738222358343, "learning_rate": 4.037469586374696e-06, "loss": 0.6073, "step": 9360 }, { "epoch": 0.2733058888791568, "grad_norm": 0.7247410396781067, "learning_rate": 4.0373073803730735e-06, "loss": 0.6144, "step": 9361 }, { "epoch": 0.27333508510700416, "grad_norm": 0.7446775626946852, "learning_rate": 4.0371451743714515e-06, "loss": 0.6412, "step": 9362 }, { "epoch": 0.2733642813348515, "grad_norm": 0.7261786140199991, "learning_rate": 4.03698296836983e-06, "loss": 0.626, "step": 9363 }, { "epoch": 0.2733934775626989, "grad_norm": 0.7142312597066838, "learning_rate": 4.036820762368208e-06, "loss": 0.6705, "step": 9364 }, { "epoch": 0.27342267379054624, "grad_norm": 0.7392563810682767, "learning_rate": 4.036658556366586e-06, "loss": 0.6083, "step": 9365 }, { "epoch": 0.2734518700183936, "grad_norm": 0.6935069497569899, "learning_rate": 4.036496350364964e-06, "loss": 0.6376, "step": 9366 }, { "epoch": 0.27348106624624097, "grad_norm": 0.8065607590609976, "learning_rate": 4.0363341443633415e-06, "loss": 0.7426, "step": 9367 }, { "epoch": 0.27351026247408833, "grad_norm": 0.7436609033809595, "learning_rate": 4.0361719383617195e-06, "loss": 0.7404, "step": 9368 }, { "epoch": 0.2735394587019357, "grad_norm": 0.892560660908608, "learning_rate": 4.0360097323600976e-06, "loss": 0.6548, "step": 9369 }, { "epoch": 0.27356865492978305, "grad_norm": 0.7405190639455189, "learning_rate": 4.0358475263584756e-06, "loss": 0.7187, "step": 9370 }, { "epoch": 0.2735978511576304, "grad_norm": 0.7902022575038234, "learning_rate": 4.0356853203568536e-06, "loss": 0.7599, "step": 9371 }, { "epoch": 0.2736270473854778, "grad_norm": 0.7445328086450945, "learning_rate": 4.035523114355232e-06, "loss": 0.5731, "step": 9372 }, { "epoch": 0.27365624361332513, "grad_norm": 0.7797885397660907, "learning_rate": 4.03536090835361e-06, "loss": 0.6985, "step": 9373 }, { "epoch": 0.2736854398411725, "grad_norm": 0.6909504909696337, "learning_rate": 4.035198702351988e-06, "loss": 0.6269, "step": 9374 }, { "epoch": 0.27371463606901986, "grad_norm": 0.7513556114788924, "learning_rate": 4.035036496350366e-06, "loss": 0.6542, "step": 9375 }, { "epoch": 0.2737438322968672, "grad_norm": 0.7588676227433124, "learning_rate": 4.034874290348743e-06, "loss": 0.7268, "step": 9376 }, { "epoch": 0.27377302852471463, "grad_norm": 0.7153163396312388, "learning_rate": 4.034712084347121e-06, "loss": 0.6266, "step": 9377 }, { "epoch": 0.273802224752562, "grad_norm": 0.7081945327253867, "learning_rate": 4.034549878345499e-06, "loss": 0.613, "step": 9378 }, { "epoch": 0.27383142098040936, "grad_norm": 0.7380952180097402, "learning_rate": 4.034387672343877e-06, "loss": 0.674, "step": 9379 }, { "epoch": 0.2738606172082567, "grad_norm": 0.7316100643173958, "learning_rate": 4.034225466342255e-06, "loss": 0.6761, "step": 9380 }, { "epoch": 0.2738898134361041, "grad_norm": 0.7199410920764338, "learning_rate": 4.034063260340633e-06, "loss": 0.5944, "step": 9381 }, { "epoch": 0.27391900966395144, "grad_norm": 0.782686023828778, "learning_rate": 4.033901054339011e-06, "loss": 0.6854, "step": 9382 }, { "epoch": 0.2739482058917988, "grad_norm": 0.7248127111500395, "learning_rate": 4.033738848337389e-06, "loss": 0.6592, "step": 9383 }, { "epoch": 0.27397740211964616, "grad_norm": 0.7737493636178153, "learning_rate": 4.033576642335767e-06, "loss": 0.6977, "step": 9384 }, { "epoch": 0.2740065983474935, "grad_norm": 0.7246549409637932, "learning_rate": 4.033414436334145e-06, "loss": 0.6966, "step": 9385 }, { "epoch": 0.2740357945753409, "grad_norm": 0.7319806759574198, "learning_rate": 4.033252230332523e-06, "loss": 0.6842, "step": 9386 }, { "epoch": 0.27406499080318825, "grad_norm": 0.7479704752234722, "learning_rate": 4.033090024330901e-06, "loss": 0.6785, "step": 9387 }, { "epoch": 0.2740941870310356, "grad_norm": 0.7019223946621794, "learning_rate": 4.032927818329279e-06, "loss": 0.6034, "step": 9388 }, { "epoch": 0.27412338325888297, "grad_norm": 0.885607173441648, "learning_rate": 4.032765612327657e-06, "loss": 0.6953, "step": 9389 }, { "epoch": 0.27415257948673033, "grad_norm": 0.7326693170727683, "learning_rate": 4.032603406326034e-06, "loss": 0.6567, "step": 9390 }, { "epoch": 0.2741817757145777, "grad_norm": 0.7630176883683747, "learning_rate": 4.032441200324412e-06, "loss": 0.7207, "step": 9391 }, { "epoch": 0.27421097194242505, "grad_norm": 0.763075411986645, "learning_rate": 4.03227899432279e-06, "loss": 0.724, "step": 9392 }, { "epoch": 0.2742401681702724, "grad_norm": 0.6965559593231836, "learning_rate": 4.032116788321168e-06, "loss": 0.6335, "step": 9393 }, { "epoch": 0.2742693643981198, "grad_norm": 0.7076163264327476, "learning_rate": 4.031954582319546e-06, "loss": 0.5605, "step": 9394 }, { "epoch": 0.27429856062596714, "grad_norm": 0.7374911441259479, "learning_rate": 4.031792376317924e-06, "loss": 0.6894, "step": 9395 }, { "epoch": 0.2743277568538145, "grad_norm": 0.8022941511882183, "learning_rate": 4.031630170316302e-06, "loss": 0.7463, "step": 9396 }, { "epoch": 0.27435695308166186, "grad_norm": 0.9465734686932941, "learning_rate": 4.03146796431468e-06, "loss": 0.7549, "step": 9397 }, { "epoch": 0.2743861493095092, "grad_norm": 0.7528602186285197, "learning_rate": 4.031305758313058e-06, "loss": 0.6231, "step": 9398 }, { "epoch": 0.2744153455373566, "grad_norm": 0.7609251063702445, "learning_rate": 4.031143552311435e-06, "loss": 0.6794, "step": 9399 }, { "epoch": 0.27444454176520394, "grad_norm": 0.8106954319065856, "learning_rate": 4.030981346309813e-06, "loss": 0.6524, "step": 9400 }, { "epoch": 0.2744737379930513, "grad_norm": 0.7253783553641926, "learning_rate": 4.030819140308192e-06, "loss": 0.6111, "step": 9401 }, { "epoch": 0.27450293422089866, "grad_norm": 0.741543811258803, "learning_rate": 4.03065693430657e-06, "loss": 0.5991, "step": 9402 }, { "epoch": 0.274532130448746, "grad_norm": 0.7914422537126123, "learning_rate": 4.030494728304948e-06, "loss": 0.6332, "step": 9403 }, { "epoch": 0.2745613266765934, "grad_norm": 0.762497668850525, "learning_rate": 4.030332522303326e-06, "loss": 0.6923, "step": 9404 }, { "epoch": 0.27459052290444075, "grad_norm": 0.6741120766299251, "learning_rate": 4.030170316301703e-06, "loss": 0.5787, "step": 9405 }, { "epoch": 0.2746197191322881, "grad_norm": 0.6705135419863346, "learning_rate": 4.030008110300081e-06, "loss": 0.5608, "step": 9406 }, { "epoch": 0.27464891536013547, "grad_norm": 0.7285468066789876, "learning_rate": 4.029845904298459e-06, "loss": 0.6657, "step": 9407 }, { "epoch": 0.27467811158798283, "grad_norm": 0.7928124911443777, "learning_rate": 4.029683698296837e-06, "loss": 0.7051, "step": 9408 }, { "epoch": 0.2747073078158302, "grad_norm": 0.7717344226006535, "learning_rate": 4.029521492295215e-06, "loss": 0.7135, "step": 9409 }, { "epoch": 0.27473650404367755, "grad_norm": 0.8489131383669218, "learning_rate": 4.029359286293593e-06, "loss": 0.7045, "step": 9410 }, { "epoch": 0.2747657002715249, "grad_norm": 0.7816829604374173, "learning_rate": 4.029197080291971e-06, "loss": 0.718, "step": 9411 }, { "epoch": 0.2747948964993723, "grad_norm": 0.7446923288424238, "learning_rate": 4.029034874290349e-06, "loss": 0.69, "step": 9412 }, { "epoch": 0.27482409272721964, "grad_norm": 0.701375279751075, "learning_rate": 4.028872668288727e-06, "loss": 0.6509, "step": 9413 }, { "epoch": 0.274853288955067, "grad_norm": 0.6823458182765215, "learning_rate": 4.0287104622871045e-06, "loss": 0.5625, "step": 9414 }, { "epoch": 0.27488248518291436, "grad_norm": 0.7166150921573962, "learning_rate": 4.0285482562854825e-06, "loss": 0.5911, "step": 9415 }, { "epoch": 0.2749116814107617, "grad_norm": 0.9131730512032358, "learning_rate": 4.0283860502838605e-06, "loss": 0.7985, "step": 9416 }, { "epoch": 0.2749408776386091, "grad_norm": 0.7440736495653513, "learning_rate": 4.0282238442822385e-06, "loss": 0.68, "step": 9417 }, { "epoch": 0.27497007386645644, "grad_norm": 0.7237673856024166, "learning_rate": 4.0280616382806165e-06, "loss": 0.6467, "step": 9418 }, { "epoch": 0.2749992700943038, "grad_norm": 0.7339461712845035, "learning_rate": 4.0278994322789945e-06, "loss": 0.667, "step": 9419 }, { "epoch": 0.27502846632215117, "grad_norm": 0.8197560710669101, "learning_rate": 4.0277372262773725e-06, "loss": 0.7332, "step": 9420 }, { "epoch": 0.2750576625499985, "grad_norm": 0.8876974046499538, "learning_rate": 4.0275750202757505e-06, "loss": 0.6705, "step": 9421 }, { "epoch": 0.2750868587778459, "grad_norm": 0.7624974588371656, "learning_rate": 4.0274128142741285e-06, "loss": 0.6105, "step": 9422 }, { "epoch": 0.27511605500569325, "grad_norm": 0.715715226039798, "learning_rate": 4.0272506082725065e-06, "loss": 0.6232, "step": 9423 }, { "epoch": 0.2751452512335406, "grad_norm": 0.7154441617918934, "learning_rate": 4.0270884022708845e-06, "loss": 0.6131, "step": 9424 }, { "epoch": 0.27517444746138797, "grad_norm": 0.7937183900285566, "learning_rate": 4.0269261962692625e-06, "loss": 0.7314, "step": 9425 }, { "epoch": 0.27520364368923533, "grad_norm": 0.8284761633420759, "learning_rate": 4.0267639902676406e-06, "loss": 0.8126, "step": 9426 }, { "epoch": 0.2752328399170827, "grad_norm": 0.7848918236731696, "learning_rate": 4.0266017842660186e-06, "loss": 0.7756, "step": 9427 }, { "epoch": 0.27526203614493006, "grad_norm": 0.763637630142447, "learning_rate": 4.026439578264396e-06, "loss": 0.7187, "step": 9428 }, { "epoch": 0.2752912323727774, "grad_norm": 0.7987406601246817, "learning_rate": 4.026277372262774e-06, "loss": 0.6511, "step": 9429 }, { "epoch": 0.2753204286006248, "grad_norm": 0.7516081203189711, "learning_rate": 4.026115166261152e-06, "loss": 0.6792, "step": 9430 }, { "epoch": 0.27534962482847214, "grad_norm": 0.7454598383038552, "learning_rate": 4.02595296025953e-06, "loss": 0.6621, "step": 9431 }, { "epoch": 0.2753788210563195, "grad_norm": 0.7339206380434196, "learning_rate": 4.025790754257908e-06, "loss": 0.6933, "step": 9432 }, { "epoch": 0.27540801728416686, "grad_norm": 0.7920628025851573, "learning_rate": 4.025628548256286e-06, "loss": 0.7463, "step": 9433 }, { "epoch": 0.2754372135120142, "grad_norm": 0.70946897448805, "learning_rate": 4.025466342254664e-06, "loss": 0.5983, "step": 9434 }, { "epoch": 0.2754664097398616, "grad_norm": 0.781779086261205, "learning_rate": 4.025304136253042e-06, "loss": 0.6532, "step": 9435 }, { "epoch": 0.27549560596770895, "grad_norm": 0.7328700747567911, "learning_rate": 4.02514193025142e-06, "loss": 0.6325, "step": 9436 }, { "epoch": 0.2755248021955563, "grad_norm": 0.7584854418414085, "learning_rate": 4.024979724249797e-06, "loss": 0.672, "step": 9437 }, { "epoch": 0.2755539984234037, "grad_norm": 0.7288541744758509, "learning_rate": 4.024817518248175e-06, "loss": 0.6794, "step": 9438 }, { "epoch": 0.2755831946512511, "grad_norm": 0.710755669428647, "learning_rate": 4.024655312246554e-06, "loss": 0.6546, "step": 9439 }, { "epoch": 0.27561239087909845, "grad_norm": 0.723659888932243, "learning_rate": 4.024493106244932e-06, "loss": 0.6698, "step": 9440 }, { "epoch": 0.2756415871069458, "grad_norm": 0.7341652268680022, "learning_rate": 4.02433090024331e-06, "loss": 0.6515, "step": 9441 }, { "epoch": 0.27567078333479317, "grad_norm": 0.7081554800319289, "learning_rate": 4.024168694241688e-06, "loss": 0.5787, "step": 9442 }, { "epoch": 0.27569997956264053, "grad_norm": 0.7873369784124818, "learning_rate": 4.024006488240065e-06, "loss": 0.6779, "step": 9443 }, { "epoch": 0.2757291757904879, "grad_norm": 0.787674500073944, "learning_rate": 4.023844282238443e-06, "loss": 0.708, "step": 9444 }, { "epoch": 0.27575837201833525, "grad_norm": 0.834892951399931, "learning_rate": 4.023682076236821e-06, "loss": 0.7429, "step": 9445 }, { "epoch": 0.2757875682461826, "grad_norm": 0.7962659050723657, "learning_rate": 4.023519870235199e-06, "loss": 0.7584, "step": 9446 }, { "epoch": 0.27581676447403, "grad_norm": 0.8333833523790778, "learning_rate": 4.023357664233577e-06, "loss": 0.7406, "step": 9447 }, { "epoch": 0.27584596070187734, "grad_norm": 0.7350203608801378, "learning_rate": 4.023195458231955e-06, "loss": 0.6681, "step": 9448 }, { "epoch": 0.2758751569297247, "grad_norm": 0.7779324295110225, "learning_rate": 4.023033252230333e-06, "loss": 0.6999, "step": 9449 }, { "epoch": 0.27590435315757206, "grad_norm": 0.7473770233340905, "learning_rate": 4.022871046228711e-06, "loss": 0.6933, "step": 9450 }, { "epoch": 0.2759335493854194, "grad_norm": 0.7400846257731726, "learning_rate": 4.022708840227089e-06, "loss": 0.6557, "step": 9451 }, { "epoch": 0.2759627456132668, "grad_norm": 0.8709918362278097, "learning_rate": 4.022546634225466e-06, "loss": 0.6738, "step": 9452 }, { "epoch": 0.27599194184111414, "grad_norm": 0.7011900410804768, "learning_rate": 4.022384428223844e-06, "loss": 0.684, "step": 9453 }, { "epoch": 0.2760211380689615, "grad_norm": 0.7533915522840284, "learning_rate": 4.022222222222222e-06, "loss": 0.6296, "step": 9454 }, { "epoch": 0.27605033429680886, "grad_norm": 0.7443954039284569, "learning_rate": 4.0220600162206e-06, "loss": 0.6814, "step": 9455 }, { "epoch": 0.2760795305246562, "grad_norm": 0.7155567215071884, "learning_rate": 4.021897810218978e-06, "loss": 0.6288, "step": 9456 }, { "epoch": 0.2761087267525036, "grad_norm": 0.7943118699014988, "learning_rate": 4.021735604217356e-06, "loss": 0.7444, "step": 9457 }, { "epoch": 0.27613792298035095, "grad_norm": 0.724688542080359, "learning_rate": 4.021573398215734e-06, "loss": 0.6974, "step": 9458 }, { "epoch": 0.2761671192081983, "grad_norm": 0.8698922434998928, "learning_rate": 4.021411192214112e-06, "loss": 0.7201, "step": 9459 }, { "epoch": 0.27619631543604567, "grad_norm": 0.739894189063294, "learning_rate": 4.02124898621249e-06, "loss": 0.6475, "step": 9460 }, { "epoch": 0.27622551166389303, "grad_norm": 0.7780526935128633, "learning_rate": 4.021086780210868e-06, "loss": 0.6635, "step": 9461 }, { "epoch": 0.2762547078917404, "grad_norm": 0.7146428237455141, "learning_rate": 4.020924574209246e-06, "loss": 0.6244, "step": 9462 }, { "epoch": 0.27628390411958775, "grad_norm": 0.7574637503417576, "learning_rate": 4.020762368207624e-06, "loss": 0.7109, "step": 9463 }, { "epoch": 0.2763131003474351, "grad_norm": 0.7541905485003942, "learning_rate": 4.020600162206002e-06, "loss": 0.6897, "step": 9464 }, { "epoch": 0.2763422965752825, "grad_norm": 0.7786339621499341, "learning_rate": 4.02043795620438e-06, "loss": 0.7029, "step": 9465 }, { "epoch": 0.27637149280312984, "grad_norm": 0.7327762885184702, "learning_rate": 4.0202757502027574e-06, "loss": 0.619, "step": 9466 }, { "epoch": 0.2764006890309772, "grad_norm": 0.8584828824108627, "learning_rate": 4.0201135442011354e-06, "loss": 0.7589, "step": 9467 }, { "epoch": 0.27642988525882456, "grad_norm": 0.6983871743136847, "learning_rate": 4.0199513381995135e-06, "loss": 0.571, "step": 9468 }, { "epoch": 0.2764590814866719, "grad_norm": 0.6938879334572365, "learning_rate": 4.0197891321978915e-06, "loss": 0.6153, "step": 9469 }, { "epoch": 0.2764882777145193, "grad_norm": 0.8649708325226202, "learning_rate": 4.0196269261962695e-06, "loss": 0.6803, "step": 9470 }, { "epoch": 0.27651747394236664, "grad_norm": 1.2550575782448965, "learning_rate": 4.0194647201946475e-06, "loss": 0.7112, "step": 9471 }, { "epoch": 0.276546670170214, "grad_norm": 0.7895189949976603, "learning_rate": 4.0193025141930255e-06, "loss": 0.7751, "step": 9472 }, { "epoch": 0.27657586639806137, "grad_norm": 0.7174169813845547, "learning_rate": 4.0191403081914035e-06, "loss": 0.7002, "step": 9473 }, { "epoch": 0.2766050626259087, "grad_norm": 0.7170336088829334, "learning_rate": 4.0189781021897815e-06, "loss": 0.6218, "step": 9474 }, { "epoch": 0.2766342588537561, "grad_norm": 0.857587224596142, "learning_rate": 4.018815896188159e-06, "loss": 0.7539, "step": 9475 }, { "epoch": 0.27666345508160345, "grad_norm": 0.692184202071326, "learning_rate": 4.0186536901865375e-06, "loss": 0.6067, "step": 9476 }, { "epoch": 0.2766926513094508, "grad_norm": 0.916858835177827, "learning_rate": 4.0184914841849155e-06, "loss": 0.8519, "step": 9477 }, { "epoch": 0.27672184753729817, "grad_norm": 0.7523682675838623, "learning_rate": 4.0183292781832935e-06, "loss": 0.6136, "step": 9478 }, { "epoch": 0.27675104376514553, "grad_norm": 0.6870005923627792, "learning_rate": 4.0181670721816715e-06, "loss": 0.6423, "step": 9479 }, { "epoch": 0.2767802399929929, "grad_norm": 0.717783823501742, "learning_rate": 4.0180048661800495e-06, "loss": 0.5997, "step": 9480 }, { "epoch": 0.27680943622084025, "grad_norm": 0.7355704730043685, "learning_rate": 4.017842660178427e-06, "loss": 0.6684, "step": 9481 }, { "epoch": 0.2768386324486876, "grad_norm": 0.7848609534513442, "learning_rate": 4.017680454176805e-06, "loss": 0.7128, "step": 9482 }, { "epoch": 0.276867828676535, "grad_norm": 0.698285293061347, "learning_rate": 4.017518248175183e-06, "loss": 0.5961, "step": 9483 }, { "epoch": 0.27689702490438234, "grad_norm": 0.7118182847890504, "learning_rate": 4.017356042173561e-06, "loss": 0.667, "step": 9484 }, { "epoch": 0.2769262211322297, "grad_norm": 0.7115409768532379, "learning_rate": 4.017193836171939e-06, "loss": 0.6396, "step": 9485 }, { "epoch": 0.27695541736007706, "grad_norm": 0.6972020038582264, "learning_rate": 4.017031630170317e-06, "loss": 0.5977, "step": 9486 }, { "epoch": 0.2769846135879244, "grad_norm": 0.7302276856180604, "learning_rate": 4.016869424168695e-06, "loss": 0.6946, "step": 9487 }, { "epoch": 0.2770138098157718, "grad_norm": 0.7566168149151586, "learning_rate": 4.016707218167073e-06, "loss": 0.7069, "step": 9488 }, { "epoch": 0.27704300604361914, "grad_norm": 0.766847061312889, "learning_rate": 4.016545012165451e-06, "loss": 0.7008, "step": 9489 }, { "epoch": 0.2770722022714665, "grad_norm": 0.775290459110236, "learning_rate": 4.016382806163828e-06, "loss": 0.7879, "step": 9490 }, { "epoch": 0.27710139849931387, "grad_norm": 0.7478348312207961, "learning_rate": 4.016220600162206e-06, "loss": 0.6393, "step": 9491 }, { "epoch": 0.27713059472716123, "grad_norm": 0.6791374126710368, "learning_rate": 4.016058394160584e-06, "loss": 0.5893, "step": 9492 }, { "epoch": 0.2771597909550086, "grad_norm": 0.7259908954270818, "learning_rate": 4.015896188158962e-06, "loss": 0.6187, "step": 9493 }, { "epoch": 0.27718898718285595, "grad_norm": 0.7509733216976111, "learning_rate": 4.01573398215734e-06, "loss": 0.6573, "step": 9494 }, { "epoch": 0.2772181834107033, "grad_norm": 0.7498345255067654, "learning_rate": 4.015571776155718e-06, "loss": 0.725, "step": 9495 }, { "epoch": 0.2772473796385507, "grad_norm": 0.7107147574348291, "learning_rate": 4.015409570154096e-06, "loss": 0.6287, "step": 9496 }, { "epoch": 0.27727657586639803, "grad_norm": 0.7011318921607518, "learning_rate": 4.015247364152474e-06, "loss": 0.6183, "step": 9497 }, { "epoch": 0.27730577209424545, "grad_norm": 0.7576070832309193, "learning_rate": 4.015085158150852e-06, "loss": 0.6582, "step": 9498 }, { "epoch": 0.2773349683220928, "grad_norm": 0.7381787603459771, "learning_rate": 4.01492295214923e-06, "loss": 0.6876, "step": 9499 }, { "epoch": 0.2773641645499402, "grad_norm": 0.8300603787578105, "learning_rate": 4.014760746147608e-06, "loss": 0.6058, "step": 9500 }, { "epoch": 0.27739336077778753, "grad_norm": 0.7693144045262953, "learning_rate": 4.014598540145986e-06, "loss": 0.7337, "step": 9501 }, { "epoch": 0.2774225570056349, "grad_norm": 0.7419813561804706, "learning_rate": 4.014436334144364e-06, "loss": 0.6752, "step": 9502 }, { "epoch": 0.27745175323348226, "grad_norm": 0.7306657886783291, "learning_rate": 4.014274128142742e-06, "loss": 0.6784, "step": 9503 }, { "epoch": 0.2774809494613296, "grad_norm": 0.7583359635244985, "learning_rate": 4.014111922141119e-06, "loss": 0.7348, "step": 9504 }, { "epoch": 0.277510145689177, "grad_norm": 0.7546029931709233, "learning_rate": 4.013949716139497e-06, "loss": 0.7818, "step": 9505 }, { "epoch": 0.27753934191702434, "grad_norm": 0.7452256477166858, "learning_rate": 4.013787510137875e-06, "loss": 0.653, "step": 9506 }, { "epoch": 0.2775685381448717, "grad_norm": 0.889573745248044, "learning_rate": 4.013625304136253e-06, "loss": 0.6655, "step": 9507 }, { "epoch": 0.27759773437271906, "grad_norm": 0.7735656736573517, "learning_rate": 4.013463098134631e-06, "loss": 0.669, "step": 9508 }, { "epoch": 0.2776269306005664, "grad_norm": 0.7306057548056267, "learning_rate": 4.013300892133009e-06, "loss": 0.6633, "step": 9509 }, { "epoch": 0.2776561268284138, "grad_norm": 0.7368691130806108, "learning_rate": 4.013138686131387e-06, "loss": 0.6399, "step": 9510 }, { "epoch": 0.27768532305626115, "grad_norm": 0.785463059839576, "learning_rate": 4.012976480129765e-06, "loss": 0.6856, "step": 9511 }, { "epoch": 0.2777145192841085, "grad_norm": 0.778454440015178, "learning_rate": 4.012814274128143e-06, "loss": 0.7472, "step": 9512 }, { "epoch": 0.27774371551195587, "grad_norm": 1.708618248754198, "learning_rate": 4.01265206812652e-06, "loss": 0.6463, "step": 9513 }, { "epoch": 0.27777291173980323, "grad_norm": 0.7698728862053, "learning_rate": 4.012489862124899e-06, "loss": 0.7356, "step": 9514 }, { "epoch": 0.2778021079676506, "grad_norm": 0.7005892209136831, "learning_rate": 4.012327656123277e-06, "loss": 0.5903, "step": 9515 }, { "epoch": 0.27783130419549795, "grad_norm": 0.8260775169725751, "learning_rate": 4.012165450121655e-06, "loss": 0.72, "step": 9516 }, { "epoch": 0.2778605004233453, "grad_norm": 0.8569077646000545, "learning_rate": 4.012003244120033e-06, "loss": 0.7657, "step": 9517 }, { "epoch": 0.2778896966511927, "grad_norm": 0.7514951780948839, "learning_rate": 4.011841038118411e-06, "loss": 0.7133, "step": 9518 }, { "epoch": 0.27791889287904004, "grad_norm": 0.8052821849420754, "learning_rate": 4.011678832116788e-06, "loss": 0.6189, "step": 9519 }, { "epoch": 0.2779480891068874, "grad_norm": 0.7824188056789315, "learning_rate": 4.011516626115166e-06, "loss": 0.7271, "step": 9520 }, { "epoch": 0.27797728533473476, "grad_norm": 0.7840096712149359, "learning_rate": 4.0113544201135444e-06, "loss": 0.7169, "step": 9521 }, { "epoch": 0.2780064815625821, "grad_norm": 0.7157711579287083, "learning_rate": 4.0111922141119224e-06, "loss": 0.6172, "step": 9522 }, { "epoch": 0.2780356777904295, "grad_norm": 0.7895906660421046, "learning_rate": 4.0110300081103004e-06, "loss": 0.6838, "step": 9523 }, { "epoch": 0.27806487401827684, "grad_norm": 0.760555557256039, "learning_rate": 4.0108678021086784e-06, "loss": 0.7143, "step": 9524 }, { "epoch": 0.2780940702461242, "grad_norm": 0.7074145894936504, "learning_rate": 4.0107055961070565e-06, "loss": 0.6327, "step": 9525 }, { "epoch": 0.27812326647397156, "grad_norm": 0.6804987194062463, "learning_rate": 4.0105433901054345e-06, "loss": 0.5924, "step": 9526 }, { "epoch": 0.2781524627018189, "grad_norm": 0.712044143103163, "learning_rate": 4.0103811841038125e-06, "loss": 0.6282, "step": 9527 }, { "epoch": 0.2781816589296663, "grad_norm": 0.7518099700590463, "learning_rate": 4.01021897810219e-06, "loss": 0.6612, "step": 9528 }, { "epoch": 0.27821085515751365, "grad_norm": 0.7507604875419839, "learning_rate": 4.010056772100568e-06, "loss": 0.6629, "step": 9529 }, { "epoch": 0.278240051385361, "grad_norm": 0.7744352703738884, "learning_rate": 4.009894566098946e-06, "loss": 0.6467, "step": 9530 }, { "epoch": 0.27826924761320837, "grad_norm": 0.6821555532309025, "learning_rate": 4.009732360097324e-06, "loss": 0.5783, "step": 9531 }, { "epoch": 0.27829844384105573, "grad_norm": 0.6944368604309484, "learning_rate": 4.009570154095702e-06, "loss": 0.6097, "step": 9532 }, { "epoch": 0.2783276400689031, "grad_norm": 0.7642944855019038, "learning_rate": 4.00940794809408e-06, "loss": 0.7248, "step": 9533 }, { "epoch": 0.27835683629675045, "grad_norm": 0.786072317405547, "learning_rate": 4.009245742092458e-06, "loss": 0.7397, "step": 9534 }, { "epoch": 0.2783860325245978, "grad_norm": 0.7279909221001309, "learning_rate": 4.009083536090836e-06, "loss": 0.6924, "step": 9535 }, { "epoch": 0.2784152287524452, "grad_norm": 0.9299437682927512, "learning_rate": 4.008921330089214e-06, "loss": 0.6385, "step": 9536 }, { "epoch": 0.27844442498029254, "grad_norm": 1.532093870898538, "learning_rate": 4.008759124087592e-06, "loss": 0.7331, "step": 9537 }, { "epoch": 0.2784736212081399, "grad_norm": 0.7211854466511959, "learning_rate": 4.00859691808597e-06, "loss": 0.6128, "step": 9538 }, { "epoch": 0.27850281743598726, "grad_norm": 0.7394431633566022, "learning_rate": 4.008434712084348e-06, "loss": 0.6032, "step": 9539 }, { "epoch": 0.2785320136638346, "grad_norm": 0.8329447150659082, "learning_rate": 4.008272506082726e-06, "loss": 0.7149, "step": 9540 }, { "epoch": 0.278561209891682, "grad_norm": 0.8047567106946198, "learning_rate": 4.008110300081104e-06, "loss": 0.6943, "step": 9541 }, { "epoch": 0.27859040611952934, "grad_norm": 0.7620525582335331, "learning_rate": 4.007948094079481e-06, "loss": 0.6614, "step": 9542 }, { "epoch": 0.2786196023473767, "grad_norm": 0.7251706317833143, "learning_rate": 4.007785888077859e-06, "loss": 0.6698, "step": 9543 }, { "epoch": 0.27864879857522407, "grad_norm": 0.718496635313292, "learning_rate": 4.007623682076237e-06, "loss": 0.633, "step": 9544 }, { "epoch": 0.2786779948030714, "grad_norm": 0.696901455526247, "learning_rate": 4.007461476074615e-06, "loss": 0.6281, "step": 9545 }, { "epoch": 0.2787071910309188, "grad_norm": 0.9384131836274883, "learning_rate": 4.007299270072993e-06, "loss": 0.6852, "step": 9546 }, { "epoch": 0.27873638725876615, "grad_norm": 0.8707786311873092, "learning_rate": 4.007137064071371e-06, "loss": 0.652, "step": 9547 }, { "epoch": 0.2787655834866135, "grad_norm": 0.6656448792720403, "learning_rate": 4.006974858069749e-06, "loss": 0.5402, "step": 9548 }, { "epoch": 0.27879477971446087, "grad_norm": 0.8170468922377648, "learning_rate": 4.006812652068127e-06, "loss": 0.7738, "step": 9549 }, { "epoch": 0.27882397594230823, "grad_norm": 0.7183850153854284, "learning_rate": 4.006650446066505e-06, "loss": 0.6499, "step": 9550 }, { "epoch": 0.2788531721701556, "grad_norm": 0.7692958572639556, "learning_rate": 4.006488240064882e-06, "loss": 0.6458, "step": 9551 }, { "epoch": 0.27888236839800296, "grad_norm": 0.7092517622139113, "learning_rate": 4.006326034063261e-06, "loss": 0.6383, "step": 9552 }, { "epoch": 0.2789115646258503, "grad_norm": 0.7394744021190034, "learning_rate": 4.006163828061639e-06, "loss": 0.5944, "step": 9553 }, { "epoch": 0.2789407608536977, "grad_norm": 0.7649761179314564, "learning_rate": 4.006001622060017e-06, "loss": 0.7255, "step": 9554 }, { "epoch": 0.27896995708154504, "grad_norm": 0.8217365480155052, "learning_rate": 4.005839416058395e-06, "loss": 0.6284, "step": 9555 }, { "epoch": 0.2789991533093924, "grad_norm": 0.715129311526203, "learning_rate": 4.005677210056773e-06, "loss": 0.6467, "step": 9556 }, { "epoch": 0.27902834953723976, "grad_norm": 0.7402440112595807, "learning_rate": 4.00551500405515e-06, "loss": 0.6631, "step": 9557 }, { "epoch": 0.2790575457650872, "grad_norm": 0.7310681108005583, "learning_rate": 4.005352798053528e-06, "loss": 0.5996, "step": 9558 }, { "epoch": 0.27908674199293454, "grad_norm": 0.8248802872352775, "learning_rate": 4.005190592051906e-06, "loss": 0.7716, "step": 9559 }, { "epoch": 0.2791159382207819, "grad_norm": 0.6718257825477582, "learning_rate": 4.005028386050284e-06, "loss": 0.6034, "step": 9560 }, { "epoch": 0.27914513444862926, "grad_norm": 0.7866383335578121, "learning_rate": 4.004866180048662e-06, "loss": 0.7518, "step": 9561 }, { "epoch": 0.2791743306764766, "grad_norm": 0.7210120491029754, "learning_rate": 4.00470397404704e-06, "loss": 0.6286, "step": 9562 }, { "epoch": 0.279203526904324, "grad_norm": 0.7811558963879621, "learning_rate": 4.004541768045418e-06, "loss": 0.6933, "step": 9563 }, { "epoch": 0.27923272313217135, "grad_norm": 0.7172260566501211, "learning_rate": 4.004379562043796e-06, "loss": 0.6496, "step": 9564 }, { "epoch": 0.2792619193600187, "grad_norm": 0.7399201738089922, "learning_rate": 4.004217356042174e-06, "loss": 0.6193, "step": 9565 }, { "epoch": 0.27929111558786607, "grad_norm": 0.716570032913817, "learning_rate": 4.004055150040551e-06, "loss": 0.6145, "step": 9566 }, { "epoch": 0.27932031181571343, "grad_norm": 0.8377355278564811, "learning_rate": 4.003892944038929e-06, "loss": 0.7932, "step": 9567 }, { "epoch": 0.2793495080435608, "grad_norm": 0.7790574285118897, "learning_rate": 4.003730738037307e-06, "loss": 0.7327, "step": 9568 }, { "epoch": 0.27937870427140815, "grad_norm": 0.7277099969671265, "learning_rate": 4.003568532035685e-06, "loss": 0.661, "step": 9569 }, { "epoch": 0.2794079004992555, "grad_norm": 0.8728803650948493, "learning_rate": 4.003406326034063e-06, "loss": 0.6818, "step": 9570 }, { "epoch": 0.2794370967271029, "grad_norm": 0.689935101809758, "learning_rate": 4.003244120032441e-06, "loss": 0.6137, "step": 9571 }, { "epoch": 0.27946629295495024, "grad_norm": 0.7552347712468447, "learning_rate": 4.003081914030819e-06, "loss": 0.6597, "step": 9572 }, { "epoch": 0.2794954891827976, "grad_norm": 0.7561845191674119, "learning_rate": 4.002919708029197e-06, "loss": 0.6941, "step": 9573 }, { "epoch": 0.27952468541064496, "grad_norm": 0.7387290332585691, "learning_rate": 4.002757502027575e-06, "loss": 0.5842, "step": 9574 }, { "epoch": 0.2795538816384923, "grad_norm": 0.7844920791431957, "learning_rate": 4.002595296025953e-06, "loss": 0.6432, "step": 9575 }, { "epoch": 0.2795830778663397, "grad_norm": 0.7915540279895309, "learning_rate": 4.002433090024331e-06, "loss": 0.7304, "step": 9576 }, { "epoch": 0.27961227409418704, "grad_norm": 0.7246958031048933, "learning_rate": 4.002270884022709e-06, "loss": 0.6476, "step": 9577 }, { "epoch": 0.2796414703220344, "grad_norm": 0.6872796395890043, "learning_rate": 4.0021086780210874e-06, "loss": 0.6006, "step": 9578 }, { "epoch": 0.27967066654988176, "grad_norm": 0.7567622318617325, "learning_rate": 4.0019464720194654e-06, "loss": 0.7351, "step": 9579 }, { "epoch": 0.2796998627777291, "grad_norm": 0.8320061325644148, "learning_rate": 4.001784266017843e-06, "loss": 0.7176, "step": 9580 }, { "epoch": 0.2797290590055765, "grad_norm": 0.7651298595944007, "learning_rate": 4.001622060016221e-06, "loss": 0.7329, "step": 9581 }, { "epoch": 0.27975825523342385, "grad_norm": 0.781766728635674, "learning_rate": 4.001459854014599e-06, "loss": 0.7238, "step": 9582 }, { "epoch": 0.2797874514612712, "grad_norm": 0.7219655152229685, "learning_rate": 4.001297648012977e-06, "loss": 0.6397, "step": 9583 }, { "epoch": 0.27981664768911857, "grad_norm": 0.708066491438601, "learning_rate": 4.001135442011355e-06, "loss": 0.6563, "step": 9584 }, { "epoch": 0.27984584391696593, "grad_norm": 0.7176102329813434, "learning_rate": 4.000973236009733e-06, "loss": 0.6169, "step": 9585 }, { "epoch": 0.2798750401448133, "grad_norm": 0.7398933130589878, "learning_rate": 4.000811030008111e-06, "loss": 0.7103, "step": 9586 }, { "epoch": 0.27990423637266065, "grad_norm": 0.7578472456042632, "learning_rate": 4.000648824006489e-06, "loss": 0.6778, "step": 9587 }, { "epoch": 0.279933432600508, "grad_norm": 0.7332455659028766, "learning_rate": 4.000486618004867e-06, "loss": 0.6547, "step": 9588 }, { "epoch": 0.2799626288283554, "grad_norm": 0.7265809586534379, "learning_rate": 4.000324412003244e-06, "loss": 0.6482, "step": 9589 }, { "epoch": 0.27999182505620274, "grad_norm": 0.7160964654664326, "learning_rate": 4.000162206001623e-06, "loss": 0.6477, "step": 9590 }, { "epoch": 0.2800210212840501, "grad_norm": 0.6682839797670952, "learning_rate": 4.000000000000001e-06, "loss": 0.5495, "step": 9591 }, { "epoch": 0.28005021751189746, "grad_norm": 0.7460753644302804, "learning_rate": 3.999837793998379e-06, "loss": 0.6927, "step": 9592 }, { "epoch": 0.2800794137397448, "grad_norm": 0.8235770161915025, "learning_rate": 3.999675587996757e-06, "loss": 0.7278, "step": 9593 }, { "epoch": 0.2801086099675922, "grad_norm": 0.683222119434108, "learning_rate": 3.999513381995135e-06, "loss": 0.6244, "step": 9594 }, { "epoch": 0.28013780619543954, "grad_norm": 0.729804633895833, "learning_rate": 3.999351175993512e-06, "loss": 0.6459, "step": 9595 }, { "epoch": 0.2801670024232869, "grad_norm": 0.9510876921596448, "learning_rate": 3.99918896999189e-06, "loss": 0.6732, "step": 9596 }, { "epoch": 0.28019619865113427, "grad_norm": 0.7105765191064626, "learning_rate": 3.999026763990268e-06, "loss": 0.6672, "step": 9597 }, { "epoch": 0.2802253948789816, "grad_norm": 0.8135198348619145, "learning_rate": 3.998864557988646e-06, "loss": 0.7014, "step": 9598 }, { "epoch": 0.280254591106829, "grad_norm": 0.7270938778362018, "learning_rate": 3.998702351987024e-06, "loss": 0.6464, "step": 9599 }, { "epoch": 0.28028378733467635, "grad_norm": 0.7626447152587962, "learning_rate": 3.998540145985402e-06, "loss": 0.6528, "step": 9600 }, { "epoch": 0.2803129835625237, "grad_norm": 0.7345741560038606, "learning_rate": 3.99837793998378e-06, "loss": 0.628, "step": 9601 }, { "epoch": 0.28034217979037107, "grad_norm": 0.7740918628476259, "learning_rate": 3.998215733982158e-06, "loss": 0.6851, "step": 9602 }, { "epoch": 0.28037137601821843, "grad_norm": 0.7338172652666218, "learning_rate": 3.998053527980535e-06, "loss": 0.6524, "step": 9603 }, { "epoch": 0.2804005722460658, "grad_norm": 0.7424607726315385, "learning_rate": 3.997891321978913e-06, "loss": 0.633, "step": 9604 }, { "epoch": 0.28042976847391315, "grad_norm": 0.7893653028333613, "learning_rate": 3.997729115977291e-06, "loss": 0.738, "step": 9605 }, { "epoch": 0.2804589647017605, "grad_norm": 0.9369124816275892, "learning_rate": 3.997566909975669e-06, "loss": 0.6804, "step": 9606 }, { "epoch": 0.2804881609296079, "grad_norm": 0.7716308810015351, "learning_rate": 3.997404703974047e-06, "loss": 0.6958, "step": 9607 }, { "epoch": 0.28051735715745524, "grad_norm": 0.7456126214341887, "learning_rate": 3.997242497972426e-06, "loss": 0.6494, "step": 9608 }, { "epoch": 0.2805465533853026, "grad_norm": 0.7723642588670802, "learning_rate": 3.997080291970803e-06, "loss": 0.7338, "step": 9609 }, { "epoch": 0.28057574961314996, "grad_norm": 0.6556130638139386, "learning_rate": 3.996918085969181e-06, "loss": 0.5317, "step": 9610 }, { "epoch": 0.2806049458409973, "grad_norm": 0.8594775696802812, "learning_rate": 3.996755879967559e-06, "loss": 0.6729, "step": 9611 }, { "epoch": 0.2806341420688447, "grad_norm": 0.8066328963498102, "learning_rate": 3.996593673965937e-06, "loss": 0.7674, "step": 9612 }, { "epoch": 0.28066333829669204, "grad_norm": 0.7386178085141497, "learning_rate": 3.996431467964315e-06, "loss": 0.6465, "step": 9613 }, { "epoch": 0.2806925345245394, "grad_norm": 0.7236972597433574, "learning_rate": 3.996269261962693e-06, "loss": 0.6636, "step": 9614 }, { "epoch": 0.28072173075238677, "grad_norm": 0.7937436872685546, "learning_rate": 3.996107055961071e-06, "loss": 0.7281, "step": 9615 }, { "epoch": 0.28075092698023413, "grad_norm": 0.7972050079788041, "learning_rate": 3.995944849959449e-06, "loss": 0.6512, "step": 9616 }, { "epoch": 0.2807801232080815, "grad_norm": 0.7921822452792361, "learning_rate": 3.995782643957827e-06, "loss": 0.7039, "step": 9617 }, { "epoch": 0.28080931943592885, "grad_norm": 0.7723611887302109, "learning_rate": 3.995620437956204e-06, "loss": 0.7286, "step": 9618 }, { "epoch": 0.28083851566377627, "grad_norm": 0.7403521527441436, "learning_rate": 3.995458231954582e-06, "loss": 0.7102, "step": 9619 }, { "epoch": 0.28086771189162363, "grad_norm": 0.7229331178703999, "learning_rate": 3.99529602595296e-06, "loss": 0.6382, "step": 9620 }, { "epoch": 0.280896908119471, "grad_norm": 0.8795264550891624, "learning_rate": 3.995133819951338e-06, "loss": 0.7243, "step": 9621 }, { "epoch": 0.28092610434731835, "grad_norm": 0.7119500587415002, "learning_rate": 3.994971613949716e-06, "loss": 0.6271, "step": 9622 }, { "epoch": 0.2809553005751657, "grad_norm": 0.7621487567321085, "learning_rate": 3.994809407948094e-06, "loss": 0.6838, "step": 9623 }, { "epoch": 0.2809844968030131, "grad_norm": 0.7569058923543187, "learning_rate": 3.994647201946472e-06, "loss": 0.662, "step": 9624 }, { "epoch": 0.28101369303086043, "grad_norm": 0.7858570310385158, "learning_rate": 3.99448499594485e-06, "loss": 0.7412, "step": 9625 }, { "epoch": 0.2810428892587078, "grad_norm": 0.7569144575561434, "learning_rate": 3.994322789943228e-06, "loss": 0.674, "step": 9626 }, { "epoch": 0.28107208548655516, "grad_norm": 0.7935047846154842, "learning_rate": 3.994160583941606e-06, "loss": 0.7232, "step": 9627 }, { "epoch": 0.2811012817144025, "grad_norm": 0.704348578904271, "learning_rate": 3.993998377939984e-06, "loss": 0.6136, "step": 9628 }, { "epoch": 0.2811304779422499, "grad_norm": 0.7906729459384788, "learning_rate": 3.993836171938362e-06, "loss": 0.7409, "step": 9629 }, { "epoch": 0.28115967417009724, "grad_norm": 0.7561580244631255, "learning_rate": 3.99367396593674e-06, "loss": 0.6656, "step": 9630 }, { "epoch": 0.2811888703979446, "grad_norm": 0.7271774949919544, "learning_rate": 3.993511759935118e-06, "loss": 0.672, "step": 9631 }, { "epoch": 0.28121806662579196, "grad_norm": 0.7414839834289834, "learning_rate": 3.993349553933496e-06, "loss": 0.6784, "step": 9632 }, { "epoch": 0.2812472628536393, "grad_norm": 0.7265911266009841, "learning_rate": 3.9931873479318736e-06, "loss": 0.608, "step": 9633 }, { "epoch": 0.2812764590814867, "grad_norm": 0.7298410165103404, "learning_rate": 3.9930251419302516e-06, "loss": 0.6579, "step": 9634 }, { "epoch": 0.28130565530933405, "grad_norm": 0.7607325286347465, "learning_rate": 3.99286293592863e-06, "loss": 0.7309, "step": 9635 }, { "epoch": 0.2813348515371814, "grad_norm": 0.8115990104918572, "learning_rate": 3.992700729927008e-06, "loss": 0.6993, "step": 9636 }, { "epoch": 0.28136404776502877, "grad_norm": 0.725726296377398, "learning_rate": 3.992538523925386e-06, "loss": 0.6575, "step": 9637 }, { "epoch": 0.28139324399287613, "grad_norm": 0.6946770617378623, "learning_rate": 3.992376317923764e-06, "loss": 0.5765, "step": 9638 }, { "epoch": 0.2814224402207235, "grad_norm": 0.8299007601991752, "learning_rate": 3.992214111922142e-06, "loss": 0.6979, "step": 9639 }, { "epoch": 0.28145163644857085, "grad_norm": 0.8444254444282662, "learning_rate": 3.99205190592052e-06, "loss": 0.7378, "step": 9640 }, { "epoch": 0.2814808326764182, "grad_norm": 0.749391156862278, "learning_rate": 3.991889699918897e-06, "loss": 0.6564, "step": 9641 }, { "epoch": 0.2815100289042656, "grad_norm": 0.7188226085730166, "learning_rate": 3.991727493917275e-06, "loss": 0.6458, "step": 9642 }, { "epoch": 0.28153922513211294, "grad_norm": 0.7752123087799164, "learning_rate": 3.991565287915653e-06, "loss": 0.7005, "step": 9643 }, { "epoch": 0.2815684213599603, "grad_norm": 0.7939992431297312, "learning_rate": 3.991403081914031e-06, "loss": 0.7241, "step": 9644 }, { "epoch": 0.28159761758780766, "grad_norm": 0.7604211633480923, "learning_rate": 3.991240875912409e-06, "loss": 0.6519, "step": 9645 }, { "epoch": 0.281626813815655, "grad_norm": 0.7284844233483728, "learning_rate": 3.991078669910788e-06, "loss": 0.6574, "step": 9646 }, { "epoch": 0.2816560100435024, "grad_norm": 0.7300946854402423, "learning_rate": 3.990916463909165e-06, "loss": 0.6255, "step": 9647 }, { "epoch": 0.28168520627134974, "grad_norm": 0.6982122442054002, "learning_rate": 3.990754257907543e-06, "loss": 0.6124, "step": 9648 }, { "epoch": 0.2817144024991971, "grad_norm": 0.7608947395177379, "learning_rate": 3.990592051905921e-06, "loss": 0.6047, "step": 9649 }, { "epoch": 0.28174359872704446, "grad_norm": 0.7169309594777699, "learning_rate": 3.990429845904299e-06, "loss": 0.6246, "step": 9650 }, { "epoch": 0.2817727949548918, "grad_norm": 0.7268530330251353, "learning_rate": 3.990267639902677e-06, "loss": 0.6567, "step": 9651 }, { "epoch": 0.2818019911827392, "grad_norm": 0.7087430691635066, "learning_rate": 3.990105433901055e-06, "loss": 0.6492, "step": 9652 }, { "epoch": 0.28183118741058655, "grad_norm": 0.7352721803959437, "learning_rate": 3.989943227899433e-06, "loss": 0.6846, "step": 9653 }, { "epoch": 0.2818603836384339, "grad_norm": 0.7000196760936688, "learning_rate": 3.989781021897811e-06, "loss": 0.5959, "step": 9654 }, { "epoch": 0.28188957986628127, "grad_norm": 0.8033995537522495, "learning_rate": 3.989618815896189e-06, "loss": 0.7289, "step": 9655 }, { "epoch": 0.28191877609412863, "grad_norm": 0.7172224935583295, "learning_rate": 3.989456609894566e-06, "loss": 0.6642, "step": 9656 }, { "epoch": 0.281947972321976, "grad_norm": 0.7588962292588884, "learning_rate": 3.989294403892944e-06, "loss": 0.6618, "step": 9657 }, { "epoch": 0.28197716854982335, "grad_norm": 0.7163505487972682, "learning_rate": 3.989132197891322e-06, "loss": 0.6729, "step": 9658 }, { "epoch": 0.2820063647776707, "grad_norm": 0.7199450679010314, "learning_rate": 3.9889699918897e-06, "loss": 0.6062, "step": 9659 }, { "epoch": 0.2820355610055181, "grad_norm": 0.7490944221598586, "learning_rate": 3.988807785888078e-06, "loss": 0.7319, "step": 9660 }, { "epoch": 0.28206475723336544, "grad_norm": 0.6863485396040304, "learning_rate": 3.988645579886456e-06, "loss": 0.6034, "step": 9661 }, { "epoch": 0.2820939534612128, "grad_norm": 0.729371412121292, "learning_rate": 3.988483373884834e-06, "loss": 0.6818, "step": 9662 }, { "epoch": 0.28212314968906016, "grad_norm": 0.7843749008607903, "learning_rate": 3.988321167883212e-06, "loss": 0.6732, "step": 9663 }, { "epoch": 0.2821523459169075, "grad_norm": 0.7445448211250986, "learning_rate": 3.98815896188159e-06, "loss": 0.6414, "step": 9664 }, { "epoch": 0.2821815421447549, "grad_norm": 0.7336040779931249, "learning_rate": 3.987996755879968e-06, "loss": 0.6554, "step": 9665 }, { "epoch": 0.28221073837260224, "grad_norm": 0.7549524431229983, "learning_rate": 3.987834549878346e-06, "loss": 0.7033, "step": 9666 }, { "epoch": 0.2822399346004496, "grad_norm": 0.7629315719441082, "learning_rate": 3.987672343876724e-06, "loss": 0.6365, "step": 9667 }, { "epoch": 0.28226913082829697, "grad_norm": 0.8029778675905394, "learning_rate": 3.987510137875102e-06, "loss": 0.6578, "step": 9668 }, { "epoch": 0.2822983270561443, "grad_norm": 0.6525505424030761, "learning_rate": 3.98734793187348e-06, "loss": 0.5563, "step": 9669 }, { "epoch": 0.2823275232839917, "grad_norm": 0.8191582634800313, "learning_rate": 3.987185725871858e-06, "loss": 0.7194, "step": 9670 }, { "epoch": 0.28235671951183905, "grad_norm": 0.7187755507291846, "learning_rate": 3.987023519870235e-06, "loss": 0.6085, "step": 9671 }, { "epoch": 0.2823859157396864, "grad_norm": 0.6618793568743996, "learning_rate": 3.986861313868613e-06, "loss": 0.5603, "step": 9672 }, { "epoch": 0.28241511196753377, "grad_norm": 0.6892223191335854, "learning_rate": 3.986699107866991e-06, "loss": 0.607, "step": 9673 }, { "epoch": 0.28244430819538113, "grad_norm": 0.7282174060614764, "learning_rate": 3.986536901865369e-06, "loss": 0.6355, "step": 9674 }, { "epoch": 0.2824735044232285, "grad_norm": 0.7422515128589535, "learning_rate": 3.986374695863747e-06, "loss": 0.6614, "step": 9675 }, { "epoch": 0.28250270065107586, "grad_norm": 0.7123100946019577, "learning_rate": 3.986212489862125e-06, "loss": 0.6095, "step": 9676 }, { "epoch": 0.2825318968789232, "grad_norm": 0.770653043100553, "learning_rate": 3.986050283860503e-06, "loss": 0.7557, "step": 9677 }, { "epoch": 0.2825610931067706, "grad_norm": 0.752355887999397, "learning_rate": 3.985888077858881e-06, "loss": 0.7213, "step": 9678 }, { "epoch": 0.282590289334618, "grad_norm": 0.7697366324393028, "learning_rate": 3.9857258718572585e-06, "loss": 0.7103, "step": 9679 }, { "epoch": 0.28261948556246536, "grad_norm": 0.727792347358216, "learning_rate": 3.9855636658556365e-06, "loss": 0.675, "step": 9680 }, { "epoch": 0.2826486817903127, "grad_norm": 0.7351800064426013, "learning_rate": 3.9854014598540145e-06, "loss": 0.6223, "step": 9681 }, { "epoch": 0.2826778780181601, "grad_norm": 0.7277353303417353, "learning_rate": 3.9852392538523925e-06, "loss": 0.6826, "step": 9682 }, { "epoch": 0.28270707424600744, "grad_norm": 0.7523298509634591, "learning_rate": 3.9850770478507705e-06, "loss": 0.6705, "step": 9683 }, { "epoch": 0.2827362704738548, "grad_norm": 0.8792820348060686, "learning_rate": 3.984914841849149e-06, "loss": 0.7294, "step": 9684 }, { "epoch": 0.28276546670170216, "grad_norm": 0.7196015616372446, "learning_rate": 3.9847526358475265e-06, "loss": 0.5954, "step": 9685 }, { "epoch": 0.2827946629295495, "grad_norm": 0.8274689355451709, "learning_rate": 3.9845904298459045e-06, "loss": 0.7538, "step": 9686 }, { "epoch": 0.2828238591573969, "grad_norm": 0.7422140713060362, "learning_rate": 3.9844282238442825e-06, "loss": 0.5748, "step": 9687 }, { "epoch": 0.28285305538524425, "grad_norm": 0.7491627383734021, "learning_rate": 3.9842660178426605e-06, "loss": 0.6522, "step": 9688 }, { "epoch": 0.2828822516130916, "grad_norm": 0.8513461010751975, "learning_rate": 3.9841038118410386e-06, "loss": 0.6233, "step": 9689 }, { "epoch": 0.28291144784093897, "grad_norm": 0.7148860307482909, "learning_rate": 3.9839416058394166e-06, "loss": 0.6081, "step": 9690 }, { "epoch": 0.28294064406878633, "grad_norm": 0.7975208524461089, "learning_rate": 3.9837793998377946e-06, "loss": 0.7212, "step": 9691 }, { "epoch": 0.2829698402966337, "grad_norm": 0.7480243679236142, "learning_rate": 3.9836171938361726e-06, "loss": 0.6745, "step": 9692 }, { "epoch": 0.28299903652448105, "grad_norm": 0.8244407326114432, "learning_rate": 3.983454987834551e-06, "loss": 0.7295, "step": 9693 }, { "epoch": 0.2830282327523284, "grad_norm": 0.7551527532457991, "learning_rate": 3.983292781832928e-06, "loss": 0.7145, "step": 9694 }, { "epoch": 0.2830574289801758, "grad_norm": 0.8183129944490481, "learning_rate": 3.983130575831306e-06, "loss": 0.7318, "step": 9695 }, { "epoch": 0.28308662520802313, "grad_norm": 0.799300218299158, "learning_rate": 3.982968369829684e-06, "loss": 0.7894, "step": 9696 }, { "epoch": 0.2831158214358705, "grad_norm": 0.7468861871650007, "learning_rate": 3.982806163828062e-06, "loss": 0.7109, "step": 9697 }, { "epoch": 0.28314501766371786, "grad_norm": 0.6842198205089293, "learning_rate": 3.98264395782644e-06, "loss": 0.582, "step": 9698 }, { "epoch": 0.2831742138915652, "grad_norm": 0.7641054904421606, "learning_rate": 3.982481751824818e-06, "loss": 0.7091, "step": 9699 }, { "epoch": 0.2832034101194126, "grad_norm": 0.7550671515259438, "learning_rate": 3.982319545823196e-06, "loss": 0.7098, "step": 9700 }, { "epoch": 0.28323260634725994, "grad_norm": 0.7629837388331708, "learning_rate": 3.982157339821574e-06, "loss": 0.6719, "step": 9701 }, { "epoch": 0.2832618025751073, "grad_norm": 0.7179521359403733, "learning_rate": 3.981995133819952e-06, "loss": 0.6648, "step": 9702 }, { "epoch": 0.28329099880295466, "grad_norm": 0.7456252221012479, "learning_rate": 3.98183292781833e-06, "loss": 0.6767, "step": 9703 }, { "epoch": 0.283320195030802, "grad_norm": 0.7210816773413343, "learning_rate": 3.981670721816708e-06, "loss": 0.6376, "step": 9704 }, { "epoch": 0.2833493912586494, "grad_norm": 0.7902472749886483, "learning_rate": 3.981508515815086e-06, "loss": 0.6579, "step": 9705 }, { "epoch": 0.28337858748649675, "grad_norm": 1.0074943587159615, "learning_rate": 3.981346309813464e-06, "loss": 0.7333, "step": 9706 }, { "epoch": 0.2834077837143441, "grad_norm": 0.734218895952283, "learning_rate": 3.981184103811842e-06, "loss": 0.6333, "step": 9707 }, { "epoch": 0.28343697994219147, "grad_norm": 0.6934851468280743, "learning_rate": 3.98102189781022e-06, "loss": 0.5854, "step": 9708 }, { "epoch": 0.28346617617003883, "grad_norm": 0.7196299067106778, "learning_rate": 3.980859691808597e-06, "loss": 0.6915, "step": 9709 }, { "epoch": 0.2834953723978862, "grad_norm": 0.7227495766084008, "learning_rate": 3.980697485806975e-06, "loss": 0.616, "step": 9710 }, { "epoch": 0.28352456862573355, "grad_norm": 0.7456071174715266, "learning_rate": 3.980535279805353e-06, "loss": 0.6577, "step": 9711 }, { "epoch": 0.2835537648535809, "grad_norm": 0.7807305686237582, "learning_rate": 3.980373073803731e-06, "loss": 0.7224, "step": 9712 }, { "epoch": 0.2835829610814283, "grad_norm": 0.6944442993535574, "learning_rate": 3.980210867802109e-06, "loss": 0.6003, "step": 9713 }, { "epoch": 0.28361215730927564, "grad_norm": 0.7880624061380153, "learning_rate": 3.980048661800487e-06, "loss": 0.6785, "step": 9714 }, { "epoch": 0.283641353537123, "grad_norm": 0.7374487793456455, "learning_rate": 3.979886455798865e-06, "loss": 0.6404, "step": 9715 }, { "epoch": 0.28367054976497036, "grad_norm": 0.7530682786144776, "learning_rate": 3.979724249797243e-06, "loss": 0.7317, "step": 9716 }, { "epoch": 0.2836997459928177, "grad_norm": 0.6967201107341103, "learning_rate": 3.97956204379562e-06, "loss": 0.6348, "step": 9717 }, { "epoch": 0.2837289422206651, "grad_norm": 0.8400817136794336, "learning_rate": 3.979399837793998e-06, "loss": 0.8194, "step": 9718 }, { "epoch": 0.28375813844851244, "grad_norm": 0.7635945472703143, "learning_rate": 3.979237631792376e-06, "loss": 0.6286, "step": 9719 }, { "epoch": 0.2837873346763598, "grad_norm": 0.8785087577873544, "learning_rate": 3.979075425790754e-06, "loss": 0.678, "step": 9720 }, { "epoch": 0.28381653090420716, "grad_norm": 0.7285921911558682, "learning_rate": 3.978913219789132e-06, "loss": 0.7177, "step": 9721 }, { "epoch": 0.2838457271320545, "grad_norm": 0.9036929264264649, "learning_rate": 3.978751013787511e-06, "loss": 0.6838, "step": 9722 }, { "epoch": 0.2838749233599019, "grad_norm": 0.7217131498618042, "learning_rate": 3.978588807785888e-06, "loss": 0.6067, "step": 9723 }, { "epoch": 0.28390411958774925, "grad_norm": 0.7878419352868625, "learning_rate": 3.978426601784266e-06, "loss": 0.7538, "step": 9724 }, { "epoch": 0.2839333158155966, "grad_norm": 0.7315038694473853, "learning_rate": 3.978264395782644e-06, "loss": 0.6366, "step": 9725 }, { "epoch": 0.28396251204344397, "grad_norm": 0.7692953196594968, "learning_rate": 3.978102189781022e-06, "loss": 0.7014, "step": 9726 }, { "epoch": 0.28399170827129133, "grad_norm": 0.7033148737246226, "learning_rate": 3.9779399837794e-06, "loss": 0.6059, "step": 9727 }, { "epoch": 0.2840209044991387, "grad_norm": 0.8010411542952726, "learning_rate": 3.977777777777778e-06, "loss": 0.757, "step": 9728 }, { "epoch": 0.28405010072698605, "grad_norm": 0.7454738595924956, "learning_rate": 3.977615571776156e-06, "loss": 0.6645, "step": 9729 }, { "epoch": 0.2840792969548334, "grad_norm": 0.7314954980970613, "learning_rate": 3.977453365774534e-06, "loss": 0.6453, "step": 9730 }, { "epoch": 0.2841084931826808, "grad_norm": 0.8066493068326693, "learning_rate": 3.977291159772912e-06, "loss": 0.7759, "step": 9731 }, { "epoch": 0.28413768941052814, "grad_norm": 0.806352610447455, "learning_rate": 3.9771289537712895e-06, "loss": 0.7411, "step": 9732 }, { "epoch": 0.2841668856383755, "grad_norm": 0.7483225138835242, "learning_rate": 3.9769667477696675e-06, "loss": 0.7199, "step": 9733 }, { "epoch": 0.28419608186622286, "grad_norm": 0.7564338575601788, "learning_rate": 3.9768045417680455e-06, "loss": 0.6735, "step": 9734 }, { "epoch": 0.2842252780940702, "grad_norm": 0.6750927691349181, "learning_rate": 3.9766423357664235e-06, "loss": 0.549, "step": 9735 }, { "epoch": 0.2842544743219176, "grad_norm": 0.73279237243691, "learning_rate": 3.9764801297648015e-06, "loss": 0.5973, "step": 9736 }, { "epoch": 0.28428367054976494, "grad_norm": 0.7341434687662637, "learning_rate": 3.9763179237631795e-06, "loss": 0.6526, "step": 9737 }, { "epoch": 0.2843128667776123, "grad_norm": 0.7923384210527435, "learning_rate": 3.9761557177615575e-06, "loss": 0.7765, "step": 9738 }, { "epoch": 0.2843420630054597, "grad_norm": 0.7430778565926718, "learning_rate": 3.9759935117599355e-06, "loss": 0.6848, "step": 9739 }, { "epoch": 0.2843712592333071, "grad_norm": 0.7471034379287961, "learning_rate": 3.9758313057583135e-06, "loss": 0.6705, "step": 9740 }, { "epoch": 0.28440045546115444, "grad_norm": 0.7473560246742447, "learning_rate": 3.9756690997566915e-06, "loss": 0.665, "step": 9741 }, { "epoch": 0.2844296516890018, "grad_norm": 0.7760932374454375, "learning_rate": 3.9755068937550695e-06, "loss": 0.7497, "step": 9742 }, { "epoch": 0.28445884791684917, "grad_norm": 0.7247599381410368, "learning_rate": 3.9753446877534475e-06, "loss": 0.6477, "step": 9743 }, { "epoch": 0.28448804414469653, "grad_norm": 0.7587776872690525, "learning_rate": 3.9751824817518255e-06, "loss": 0.7526, "step": 9744 }, { "epoch": 0.2845172403725439, "grad_norm": 0.7983436512579848, "learning_rate": 3.9750202757502035e-06, "loss": 0.7587, "step": 9745 }, { "epoch": 0.28454643660039125, "grad_norm": 0.7489291450878622, "learning_rate": 3.974858069748581e-06, "loss": 0.6971, "step": 9746 }, { "epoch": 0.2845756328282386, "grad_norm": 0.8032588390871376, "learning_rate": 3.974695863746959e-06, "loss": 0.6264, "step": 9747 }, { "epoch": 0.284604829056086, "grad_norm": 0.755049515658867, "learning_rate": 3.974533657745337e-06, "loss": 0.7178, "step": 9748 }, { "epoch": 0.28463402528393333, "grad_norm": 0.7055899794575401, "learning_rate": 3.974371451743715e-06, "loss": 0.6353, "step": 9749 }, { "epoch": 0.2846632215117807, "grad_norm": 0.715518600200108, "learning_rate": 3.974209245742093e-06, "loss": 0.672, "step": 9750 }, { "epoch": 0.28469241773962806, "grad_norm": 0.8237386380897495, "learning_rate": 3.974047039740471e-06, "loss": 0.6686, "step": 9751 }, { "epoch": 0.2847216139674754, "grad_norm": 0.7321189305452368, "learning_rate": 3.973884833738849e-06, "loss": 0.6385, "step": 9752 }, { "epoch": 0.2847508101953228, "grad_norm": 0.7161966717428245, "learning_rate": 3.973722627737227e-06, "loss": 0.6635, "step": 9753 }, { "epoch": 0.28478000642317014, "grad_norm": 0.7834069294185856, "learning_rate": 3.973560421735605e-06, "loss": 0.6937, "step": 9754 }, { "epoch": 0.2848092026510175, "grad_norm": 0.6685700255868586, "learning_rate": 3.973398215733982e-06, "loss": 0.5794, "step": 9755 }, { "epoch": 0.28483839887886486, "grad_norm": 0.7429475855504039, "learning_rate": 3.97323600973236e-06, "loss": 0.6899, "step": 9756 }, { "epoch": 0.2848675951067122, "grad_norm": 0.7201610954256957, "learning_rate": 3.973073803730738e-06, "loss": 0.6305, "step": 9757 }, { "epoch": 0.2848967913345596, "grad_norm": 0.6880533808742855, "learning_rate": 3.972911597729116e-06, "loss": 0.558, "step": 9758 }, { "epoch": 0.28492598756240695, "grad_norm": 0.7329402315595518, "learning_rate": 3.972749391727495e-06, "loss": 0.6413, "step": 9759 }, { "epoch": 0.2849551837902543, "grad_norm": 0.7341910318681367, "learning_rate": 3.972587185725873e-06, "loss": 0.6245, "step": 9760 }, { "epoch": 0.28498438001810167, "grad_norm": 0.7208691240046157, "learning_rate": 3.97242497972425e-06, "loss": 0.6538, "step": 9761 }, { "epoch": 0.28501357624594903, "grad_norm": 0.7418558732667486, "learning_rate": 3.972262773722628e-06, "loss": 0.6547, "step": 9762 }, { "epoch": 0.2850427724737964, "grad_norm": 0.6581103622429757, "learning_rate": 3.972100567721006e-06, "loss": 0.5316, "step": 9763 }, { "epoch": 0.28507196870164375, "grad_norm": 1.2630901095304667, "learning_rate": 3.971938361719384e-06, "loss": 0.7015, "step": 9764 }, { "epoch": 0.2851011649294911, "grad_norm": 0.6881164159988248, "learning_rate": 3.971776155717762e-06, "loss": 0.5882, "step": 9765 }, { "epoch": 0.2851303611573385, "grad_norm": 0.7637171279601364, "learning_rate": 3.97161394971614e-06, "loss": 0.7347, "step": 9766 }, { "epoch": 0.28515955738518584, "grad_norm": 0.7133029620809113, "learning_rate": 3.971451743714518e-06, "loss": 0.582, "step": 9767 }, { "epoch": 0.2851887536130332, "grad_norm": 0.7741050922977611, "learning_rate": 3.971289537712896e-06, "loss": 0.671, "step": 9768 }, { "epoch": 0.28521794984088056, "grad_norm": 0.8308229989208706, "learning_rate": 3.971127331711274e-06, "loss": 0.7984, "step": 9769 }, { "epoch": 0.2852471460687279, "grad_norm": 0.7928524094960154, "learning_rate": 3.970965125709651e-06, "loss": 0.6731, "step": 9770 }, { "epoch": 0.2852763422965753, "grad_norm": 0.7401331366545301, "learning_rate": 3.970802919708029e-06, "loss": 0.6658, "step": 9771 }, { "epoch": 0.28530553852442264, "grad_norm": 0.6919702134400275, "learning_rate": 3.970640713706407e-06, "loss": 0.5903, "step": 9772 }, { "epoch": 0.28533473475227, "grad_norm": 0.7260246233513702, "learning_rate": 3.970478507704785e-06, "loss": 0.6094, "step": 9773 }, { "epoch": 0.28536393098011736, "grad_norm": 0.7249467012138413, "learning_rate": 3.970316301703163e-06, "loss": 0.6662, "step": 9774 }, { "epoch": 0.2853931272079647, "grad_norm": 0.695851258247137, "learning_rate": 3.970154095701541e-06, "loss": 0.5884, "step": 9775 }, { "epoch": 0.2854223234358121, "grad_norm": 0.7364942379784156, "learning_rate": 3.969991889699919e-06, "loss": 0.6709, "step": 9776 }, { "epoch": 0.28545151966365945, "grad_norm": 0.7269762688174908, "learning_rate": 3.969829683698297e-06, "loss": 0.6867, "step": 9777 }, { "epoch": 0.2854807158915068, "grad_norm": 0.7772115777731918, "learning_rate": 3.969667477696675e-06, "loss": 0.6895, "step": 9778 }, { "epoch": 0.28550991211935417, "grad_norm": 0.7594760391405698, "learning_rate": 3.969505271695053e-06, "loss": 0.7196, "step": 9779 }, { "epoch": 0.28553910834720153, "grad_norm": 0.7582997097309547, "learning_rate": 3.969343065693431e-06, "loss": 0.7257, "step": 9780 }, { "epoch": 0.2855683045750489, "grad_norm": 0.7329250938014945, "learning_rate": 3.969180859691809e-06, "loss": 0.6693, "step": 9781 }, { "epoch": 0.28559750080289625, "grad_norm": 0.6673436226681967, "learning_rate": 3.969018653690187e-06, "loss": 0.572, "step": 9782 }, { "epoch": 0.2856266970307436, "grad_norm": 0.7337230467204474, "learning_rate": 3.968856447688565e-06, "loss": 0.6884, "step": 9783 }, { "epoch": 0.285655893258591, "grad_norm": 0.7622261194540764, "learning_rate": 3.9686942416869424e-06, "loss": 0.6031, "step": 9784 }, { "epoch": 0.28568508948643834, "grad_norm": 0.7487773324841964, "learning_rate": 3.9685320356853204e-06, "loss": 0.6271, "step": 9785 }, { "epoch": 0.2857142857142857, "grad_norm": 0.7931397331380707, "learning_rate": 3.9683698296836984e-06, "loss": 0.7281, "step": 9786 }, { "epoch": 0.28574348194213306, "grad_norm": 0.7378944637115962, "learning_rate": 3.9682076236820764e-06, "loss": 0.6906, "step": 9787 }, { "epoch": 0.2857726781699804, "grad_norm": 0.7096022548004436, "learning_rate": 3.9680454176804545e-06, "loss": 0.6273, "step": 9788 }, { "epoch": 0.2858018743978278, "grad_norm": 0.7749367020187311, "learning_rate": 3.9678832116788325e-06, "loss": 0.7002, "step": 9789 }, { "epoch": 0.28583107062567514, "grad_norm": 0.7604358036922282, "learning_rate": 3.9677210056772105e-06, "loss": 0.6352, "step": 9790 }, { "epoch": 0.2858602668535225, "grad_norm": 0.7324518705506868, "learning_rate": 3.9675587996755885e-06, "loss": 0.6183, "step": 9791 }, { "epoch": 0.28588946308136987, "grad_norm": 0.7298599489680211, "learning_rate": 3.9673965936739665e-06, "loss": 0.6653, "step": 9792 }, { "epoch": 0.2859186593092172, "grad_norm": 0.6769638107278296, "learning_rate": 3.967234387672344e-06, "loss": 0.6001, "step": 9793 }, { "epoch": 0.2859478555370646, "grad_norm": 0.7900204937339487, "learning_rate": 3.967072181670722e-06, "loss": 0.6792, "step": 9794 }, { "epoch": 0.28597705176491195, "grad_norm": 0.7129258048976681, "learning_rate": 3.9669099756691e-06, "loss": 0.6443, "step": 9795 }, { "epoch": 0.2860062479927593, "grad_norm": 0.7041553590467335, "learning_rate": 3.966747769667478e-06, "loss": 0.6504, "step": 9796 }, { "epoch": 0.28603544422060667, "grad_norm": 0.7466578408580701, "learning_rate": 3.9665855636658565e-06, "loss": 0.6302, "step": 9797 }, { "epoch": 0.28606464044845403, "grad_norm": 0.7325657166836563, "learning_rate": 3.9664233576642345e-06, "loss": 0.6387, "step": 9798 }, { "epoch": 0.28609383667630145, "grad_norm": 0.833663906559625, "learning_rate": 3.966261151662612e-06, "loss": 0.6559, "step": 9799 }, { "epoch": 0.2861230329041488, "grad_norm": 0.7458320588290457, "learning_rate": 3.96609894566099e-06, "loss": 0.6528, "step": 9800 }, { "epoch": 0.28615222913199617, "grad_norm": 0.7379081900554297, "learning_rate": 3.965936739659368e-06, "loss": 0.6196, "step": 9801 }, { "epoch": 0.28618142535984353, "grad_norm": 0.6753476934910578, "learning_rate": 3.965774533657746e-06, "loss": 0.5763, "step": 9802 }, { "epoch": 0.2862106215876909, "grad_norm": 0.6945278564471843, "learning_rate": 3.965612327656124e-06, "loss": 0.6162, "step": 9803 }, { "epoch": 0.28623981781553826, "grad_norm": 0.7574473906260217, "learning_rate": 3.965450121654502e-06, "loss": 0.6585, "step": 9804 }, { "epoch": 0.2862690140433856, "grad_norm": 0.7218113275317597, "learning_rate": 3.96528791565288e-06, "loss": 0.6761, "step": 9805 }, { "epoch": 0.286298210271233, "grad_norm": 0.7454951158678375, "learning_rate": 3.965125709651258e-06, "loss": 0.7017, "step": 9806 }, { "epoch": 0.28632740649908034, "grad_norm": 0.7708403441496327, "learning_rate": 3.964963503649636e-06, "loss": 0.7258, "step": 9807 }, { "epoch": 0.2863566027269277, "grad_norm": 0.7191066160261353, "learning_rate": 3.964801297648013e-06, "loss": 0.6268, "step": 9808 }, { "epoch": 0.28638579895477506, "grad_norm": 0.723815164522391, "learning_rate": 3.964639091646391e-06, "loss": 0.6474, "step": 9809 }, { "epoch": 0.2864149951826224, "grad_norm": 0.7616363040874868, "learning_rate": 3.964476885644769e-06, "loss": 0.6583, "step": 9810 }, { "epoch": 0.2864441914104698, "grad_norm": 0.7166071982156785, "learning_rate": 3.964314679643147e-06, "loss": 0.6289, "step": 9811 }, { "epoch": 0.28647338763831715, "grad_norm": 0.7762823987162377, "learning_rate": 3.964152473641525e-06, "loss": 0.609, "step": 9812 }, { "epoch": 0.2865025838661645, "grad_norm": 0.7520538666535624, "learning_rate": 3.963990267639903e-06, "loss": 0.687, "step": 9813 }, { "epoch": 0.28653178009401187, "grad_norm": 0.6816132683257233, "learning_rate": 3.963828061638281e-06, "loss": 0.6324, "step": 9814 }, { "epoch": 0.28656097632185923, "grad_norm": 0.7255276819695181, "learning_rate": 3.963665855636659e-06, "loss": 0.6068, "step": 9815 }, { "epoch": 0.2865901725497066, "grad_norm": 0.7231972056659531, "learning_rate": 3.963503649635037e-06, "loss": 0.6618, "step": 9816 }, { "epoch": 0.28661936877755395, "grad_norm": 0.7549411173135951, "learning_rate": 3.963341443633415e-06, "loss": 0.7383, "step": 9817 }, { "epoch": 0.2866485650054013, "grad_norm": 0.726226327664989, "learning_rate": 3.963179237631793e-06, "loss": 0.6317, "step": 9818 }, { "epoch": 0.2866777612332487, "grad_norm": 0.7350642110604937, "learning_rate": 3.963017031630171e-06, "loss": 0.6928, "step": 9819 }, { "epoch": 0.28670695746109603, "grad_norm": 0.7738750602748388, "learning_rate": 3.962854825628549e-06, "loss": 0.7667, "step": 9820 }, { "epoch": 0.2867361536889434, "grad_norm": 0.7605390238226208, "learning_rate": 3.962692619626927e-06, "loss": 0.6593, "step": 9821 }, { "epoch": 0.28676534991679076, "grad_norm": 0.705252530246588, "learning_rate": 3.962530413625304e-06, "loss": 0.6323, "step": 9822 }, { "epoch": 0.2867945461446381, "grad_norm": 0.6978830549750681, "learning_rate": 3.962368207623682e-06, "loss": 0.612, "step": 9823 }, { "epoch": 0.2868237423724855, "grad_norm": 0.7672738030202971, "learning_rate": 3.96220600162206e-06, "loss": 0.6541, "step": 9824 }, { "epoch": 0.28685293860033284, "grad_norm": 0.740228879187946, "learning_rate": 3.962043795620438e-06, "loss": 0.627, "step": 9825 }, { "epoch": 0.2868821348281802, "grad_norm": 0.8078667554495056, "learning_rate": 3.961881589618816e-06, "loss": 0.6588, "step": 9826 }, { "epoch": 0.28691133105602756, "grad_norm": 0.7514889380723336, "learning_rate": 3.961719383617194e-06, "loss": 0.6714, "step": 9827 }, { "epoch": 0.2869405272838749, "grad_norm": 0.7507192520817519, "learning_rate": 3.961557177615572e-06, "loss": 0.7196, "step": 9828 }, { "epoch": 0.2869697235117223, "grad_norm": 0.7653990610066287, "learning_rate": 3.96139497161395e-06, "loss": 0.6333, "step": 9829 }, { "epoch": 0.28699891973956965, "grad_norm": 0.7767566606353652, "learning_rate": 3.961232765612328e-06, "loss": 0.6413, "step": 9830 }, { "epoch": 0.287028115967417, "grad_norm": 0.7935556779814663, "learning_rate": 3.961070559610705e-06, "loss": 0.7017, "step": 9831 }, { "epoch": 0.28705731219526437, "grad_norm": 0.8241208733384894, "learning_rate": 3.960908353609083e-06, "loss": 0.6915, "step": 9832 }, { "epoch": 0.28708650842311173, "grad_norm": 0.7194531852852316, "learning_rate": 3.960746147607461e-06, "loss": 0.6646, "step": 9833 }, { "epoch": 0.2871157046509591, "grad_norm": 0.7246356923834029, "learning_rate": 3.960583941605839e-06, "loss": 0.6413, "step": 9834 }, { "epoch": 0.28714490087880645, "grad_norm": 0.7721129465271233, "learning_rate": 3.960421735604218e-06, "loss": 0.715, "step": 9835 }, { "epoch": 0.2871740971066538, "grad_norm": 0.7119791773882912, "learning_rate": 3.960259529602596e-06, "loss": 0.6433, "step": 9836 }, { "epoch": 0.2872032933345012, "grad_norm": 0.7339057226597913, "learning_rate": 3.960097323600973e-06, "loss": 0.681, "step": 9837 }, { "epoch": 0.28723248956234854, "grad_norm": 0.9717039702629158, "learning_rate": 3.959935117599351e-06, "loss": 0.6777, "step": 9838 }, { "epoch": 0.2872616857901959, "grad_norm": 0.7300182452447183, "learning_rate": 3.959772911597729e-06, "loss": 0.6632, "step": 9839 }, { "epoch": 0.28729088201804326, "grad_norm": 0.8705420956912964, "learning_rate": 3.959610705596107e-06, "loss": 0.7207, "step": 9840 }, { "epoch": 0.2873200782458906, "grad_norm": 0.805115379787809, "learning_rate": 3.9594484995944854e-06, "loss": 0.7181, "step": 9841 }, { "epoch": 0.287349274473738, "grad_norm": 0.7724435668969352, "learning_rate": 3.9592862935928634e-06, "loss": 0.7382, "step": 9842 }, { "epoch": 0.28737847070158534, "grad_norm": 0.7563526736144031, "learning_rate": 3.9591240875912414e-06, "loss": 0.6482, "step": 9843 }, { "epoch": 0.2874076669294327, "grad_norm": 0.8709883405118772, "learning_rate": 3.9589618815896194e-06, "loss": 0.6749, "step": 9844 }, { "epoch": 0.28743686315728006, "grad_norm": 0.6605770924243685, "learning_rate": 3.9587996755879975e-06, "loss": 0.5738, "step": 9845 }, { "epoch": 0.2874660593851274, "grad_norm": 0.7224550154850938, "learning_rate": 3.958637469586375e-06, "loss": 0.6379, "step": 9846 }, { "epoch": 0.2874952556129748, "grad_norm": 0.7689861019072276, "learning_rate": 3.958475263584753e-06, "loss": 0.5815, "step": 9847 }, { "epoch": 0.28752445184082215, "grad_norm": 0.7972123732605937, "learning_rate": 3.958313057583131e-06, "loss": 0.6663, "step": 9848 }, { "epoch": 0.2875536480686695, "grad_norm": 0.7435348037326356, "learning_rate": 3.958150851581509e-06, "loss": 0.697, "step": 9849 }, { "epoch": 0.28758284429651687, "grad_norm": 0.706481096749488, "learning_rate": 3.957988645579887e-06, "loss": 0.6138, "step": 9850 }, { "epoch": 0.28761204052436423, "grad_norm": 0.7665029302206664, "learning_rate": 3.957826439578265e-06, "loss": 0.739, "step": 9851 }, { "epoch": 0.2876412367522116, "grad_norm": 0.6809685773345493, "learning_rate": 3.957664233576643e-06, "loss": 0.5521, "step": 9852 }, { "epoch": 0.28767043298005895, "grad_norm": 0.7039612333993499, "learning_rate": 3.957502027575021e-06, "loss": 0.5985, "step": 9853 }, { "epoch": 0.2876996292079063, "grad_norm": 0.7478356461722723, "learning_rate": 3.957339821573399e-06, "loss": 0.6749, "step": 9854 }, { "epoch": 0.2877288254357537, "grad_norm": 0.7499189003069288, "learning_rate": 3.957177615571777e-06, "loss": 0.671, "step": 9855 }, { "epoch": 0.28775802166360104, "grad_norm": 0.8561092895802366, "learning_rate": 3.957015409570155e-06, "loss": 0.5894, "step": 9856 }, { "epoch": 0.2877872178914484, "grad_norm": 0.8362608313439643, "learning_rate": 3.956853203568533e-06, "loss": 0.8279, "step": 9857 }, { "epoch": 0.28781641411929576, "grad_norm": 0.6747054386910935, "learning_rate": 3.956690997566911e-06, "loss": 0.5967, "step": 9858 }, { "epoch": 0.2878456103471431, "grad_norm": 0.7376886398832011, "learning_rate": 3.956528791565289e-06, "loss": 0.6202, "step": 9859 }, { "epoch": 0.28787480657499054, "grad_norm": 0.7439167154791304, "learning_rate": 3.956366585563666e-06, "loss": 0.6947, "step": 9860 }, { "epoch": 0.2879040028028379, "grad_norm": 0.7420786130558309, "learning_rate": 3.956204379562044e-06, "loss": 0.6755, "step": 9861 }, { "epoch": 0.28793319903068526, "grad_norm": 0.8366822107325846, "learning_rate": 3.956042173560422e-06, "loss": 0.7272, "step": 9862 }, { "epoch": 0.2879623952585326, "grad_norm": 0.6864177112578734, "learning_rate": 3.9558799675588e-06, "loss": 0.5503, "step": 9863 }, { "epoch": 0.28799159148638, "grad_norm": 0.7183900736152112, "learning_rate": 3.955717761557178e-06, "loss": 0.669, "step": 9864 }, { "epoch": 0.28802078771422734, "grad_norm": 0.7511494501951856, "learning_rate": 3.955555555555556e-06, "loss": 0.6698, "step": 9865 }, { "epoch": 0.2880499839420747, "grad_norm": 0.7432897474676607, "learning_rate": 3.955393349553934e-06, "loss": 0.6443, "step": 9866 }, { "epoch": 0.28807918016992207, "grad_norm": 0.7718991841439556, "learning_rate": 3.955231143552312e-06, "loss": 0.6648, "step": 9867 }, { "epoch": 0.28810837639776943, "grad_norm": 0.7133627233870131, "learning_rate": 3.95506893755069e-06, "loss": 0.6509, "step": 9868 }, { "epoch": 0.2881375726256168, "grad_norm": 0.7158985632141802, "learning_rate": 3.954906731549067e-06, "loss": 0.6577, "step": 9869 }, { "epoch": 0.28816676885346415, "grad_norm": 0.7316335268797289, "learning_rate": 3.954744525547445e-06, "loss": 0.662, "step": 9870 }, { "epoch": 0.2881959650813115, "grad_norm": 0.6584535771085014, "learning_rate": 3.954582319545823e-06, "loss": 0.5421, "step": 9871 }, { "epoch": 0.2882251613091589, "grad_norm": 0.747685766761715, "learning_rate": 3.954420113544201e-06, "loss": 0.68, "step": 9872 }, { "epoch": 0.28825435753700623, "grad_norm": 29.83639398561694, "learning_rate": 3.95425790754258e-06, "loss": 1.0372, "step": 9873 }, { "epoch": 0.2882835537648536, "grad_norm": 0.7176012454137576, "learning_rate": 3.954095701540958e-06, "loss": 0.6611, "step": 9874 }, { "epoch": 0.28831274999270096, "grad_norm": 0.7596194576835875, "learning_rate": 3.953933495539335e-06, "loss": 0.7019, "step": 9875 }, { "epoch": 0.2883419462205483, "grad_norm": 0.7383179429331312, "learning_rate": 3.953771289537713e-06, "loss": 0.6956, "step": 9876 }, { "epoch": 0.2883711424483957, "grad_norm": 0.7132808315011011, "learning_rate": 3.953609083536091e-06, "loss": 0.6554, "step": 9877 }, { "epoch": 0.28840033867624304, "grad_norm": 0.7419123077582426, "learning_rate": 3.953446877534469e-06, "loss": 0.6536, "step": 9878 }, { "epoch": 0.2884295349040904, "grad_norm": 0.8315253181363871, "learning_rate": 3.953284671532847e-06, "loss": 0.7023, "step": 9879 }, { "epoch": 0.28845873113193776, "grad_norm": 0.8905693156674731, "learning_rate": 3.953122465531225e-06, "loss": 0.7798, "step": 9880 }, { "epoch": 0.2884879273597851, "grad_norm": 0.7116778816485791, "learning_rate": 3.952960259529603e-06, "loss": 0.6029, "step": 9881 }, { "epoch": 0.2885171235876325, "grad_norm": 0.6926027984843095, "learning_rate": 3.952798053527981e-06, "loss": 0.577, "step": 9882 }, { "epoch": 0.28854631981547985, "grad_norm": 0.684430379309759, "learning_rate": 3.952635847526359e-06, "loss": 0.5944, "step": 9883 }, { "epoch": 0.2885755160433272, "grad_norm": 0.7818070861099098, "learning_rate": 3.952473641524736e-06, "loss": 0.7346, "step": 9884 }, { "epoch": 0.28860471227117457, "grad_norm": 0.7994457927447896, "learning_rate": 3.952311435523114e-06, "loss": 0.6854, "step": 9885 }, { "epoch": 0.28863390849902193, "grad_norm": 0.8293191419100042, "learning_rate": 3.952149229521492e-06, "loss": 0.7137, "step": 9886 }, { "epoch": 0.2886631047268693, "grad_norm": 0.7520445407889332, "learning_rate": 3.95198702351987e-06, "loss": 0.7207, "step": 9887 }, { "epoch": 0.28869230095471665, "grad_norm": 0.7256786599078201, "learning_rate": 3.951824817518248e-06, "loss": 0.6018, "step": 9888 }, { "epoch": 0.288721497182564, "grad_norm": 0.7335800914474898, "learning_rate": 3.951662611516626e-06, "loss": 0.6846, "step": 9889 }, { "epoch": 0.2887506934104114, "grad_norm": 0.7215009563310728, "learning_rate": 3.951500405515004e-06, "loss": 0.6218, "step": 9890 }, { "epoch": 0.28877988963825874, "grad_norm": 0.7417266319047243, "learning_rate": 3.951338199513382e-06, "loss": 0.7001, "step": 9891 }, { "epoch": 0.2888090858661061, "grad_norm": 0.7509463473662475, "learning_rate": 3.95117599351176e-06, "loss": 0.7078, "step": 9892 }, { "epoch": 0.28883828209395346, "grad_norm": 0.7197747409772864, "learning_rate": 3.951013787510138e-06, "loss": 0.5613, "step": 9893 }, { "epoch": 0.2888674783218008, "grad_norm": 0.7372233244150485, "learning_rate": 3.950851581508516e-06, "loss": 0.6468, "step": 9894 }, { "epoch": 0.2888966745496482, "grad_norm": 0.7606970909643967, "learning_rate": 3.950689375506894e-06, "loss": 0.6719, "step": 9895 }, { "epoch": 0.28892587077749554, "grad_norm": 0.7452878706322956, "learning_rate": 3.950527169505272e-06, "loss": 0.6806, "step": 9896 }, { "epoch": 0.2889550670053429, "grad_norm": 0.7383132178283086, "learning_rate": 3.95036496350365e-06, "loss": 0.7427, "step": 9897 }, { "epoch": 0.28898426323319026, "grad_norm": 0.8393053371008891, "learning_rate": 3.950202757502028e-06, "loss": 0.7743, "step": 9898 }, { "epoch": 0.2890134594610376, "grad_norm": 0.7282734890132206, "learning_rate": 3.950040551500406e-06, "loss": 0.6924, "step": 9899 }, { "epoch": 0.289042655688885, "grad_norm": 0.7738345775933432, "learning_rate": 3.949878345498784e-06, "loss": 0.7683, "step": 9900 }, { "epoch": 0.28907185191673235, "grad_norm": 0.8023110872332139, "learning_rate": 3.949716139497162e-06, "loss": 0.7013, "step": 9901 }, { "epoch": 0.2891010481445797, "grad_norm": 0.7178963754123193, "learning_rate": 3.94955393349554e-06, "loss": 0.5912, "step": 9902 }, { "epoch": 0.28913024437242707, "grad_norm": 0.7218933673321832, "learning_rate": 3.949391727493918e-06, "loss": 0.6903, "step": 9903 }, { "epoch": 0.28915944060027443, "grad_norm": 0.7138769538231297, "learning_rate": 3.949229521492296e-06, "loss": 0.6455, "step": 9904 }, { "epoch": 0.2891886368281218, "grad_norm": 0.7478099246541106, "learning_rate": 3.949067315490674e-06, "loss": 0.6424, "step": 9905 }, { "epoch": 0.28921783305596915, "grad_norm": 0.8747165423890828, "learning_rate": 3.948905109489052e-06, "loss": 0.7054, "step": 9906 }, { "epoch": 0.2892470292838165, "grad_norm": 0.7920356587149449, "learning_rate": 3.948742903487429e-06, "loss": 0.7019, "step": 9907 }, { "epoch": 0.2892762255116639, "grad_norm": 0.7750490149079273, "learning_rate": 3.948580697485807e-06, "loss": 0.7631, "step": 9908 }, { "epoch": 0.28930542173951124, "grad_norm": 0.7659308176233749, "learning_rate": 3.948418491484185e-06, "loss": 0.6584, "step": 9909 }, { "epoch": 0.2893346179673586, "grad_norm": 0.7618597234585361, "learning_rate": 3.948256285482563e-06, "loss": 0.7227, "step": 9910 }, { "epoch": 0.28936381419520596, "grad_norm": 0.8093303000077543, "learning_rate": 3.948094079480942e-06, "loss": 0.7664, "step": 9911 }, { "epoch": 0.2893930104230533, "grad_norm": 0.7551728433678773, "learning_rate": 3.94793187347932e-06, "loss": 0.6877, "step": 9912 }, { "epoch": 0.2894222066509007, "grad_norm": 0.7197434975226548, "learning_rate": 3.947769667477697e-06, "loss": 0.5915, "step": 9913 }, { "epoch": 0.28945140287874804, "grad_norm": 0.6958902150647914, "learning_rate": 3.947607461476075e-06, "loss": 0.6215, "step": 9914 }, { "epoch": 0.2894805991065954, "grad_norm": 0.7297028796437689, "learning_rate": 3.947445255474453e-06, "loss": 0.6721, "step": 9915 }, { "epoch": 0.28950979533444277, "grad_norm": 0.8095426415448571, "learning_rate": 3.947283049472831e-06, "loss": 0.7119, "step": 9916 }, { "epoch": 0.2895389915622901, "grad_norm": 0.6790404640834017, "learning_rate": 3.947120843471209e-06, "loss": 0.5817, "step": 9917 }, { "epoch": 0.2895681877901375, "grad_norm": 0.7465857212058484, "learning_rate": 3.946958637469587e-06, "loss": 0.6014, "step": 9918 }, { "epoch": 0.28959738401798485, "grad_norm": 0.7466941854665582, "learning_rate": 3.946796431467965e-06, "loss": 0.6706, "step": 9919 }, { "epoch": 0.28962658024583227, "grad_norm": 0.796033136260107, "learning_rate": 3.946634225466343e-06, "loss": 0.7802, "step": 9920 }, { "epoch": 0.2896557764736796, "grad_norm": 0.7456103481342139, "learning_rate": 3.946472019464721e-06, "loss": 0.6581, "step": 9921 }, { "epoch": 0.289684972701527, "grad_norm": 0.7258934805842642, "learning_rate": 3.946309813463098e-06, "loss": 0.6306, "step": 9922 }, { "epoch": 0.28971416892937435, "grad_norm": 0.7561082903799279, "learning_rate": 3.946147607461476e-06, "loss": 0.6621, "step": 9923 }, { "epoch": 0.2897433651572217, "grad_norm": 0.7228257049632715, "learning_rate": 3.945985401459854e-06, "loss": 0.6136, "step": 9924 }, { "epoch": 0.28977256138506907, "grad_norm": 0.6690215894572343, "learning_rate": 3.945823195458232e-06, "loss": 0.5921, "step": 9925 }, { "epoch": 0.28980175761291643, "grad_norm": 0.7211398612855713, "learning_rate": 3.94566098945661e-06, "loss": 0.6728, "step": 9926 }, { "epoch": 0.2898309538407638, "grad_norm": 0.8023795764169687, "learning_rate": 3.945498783454988e-06, "loss": 0.692, "step": 9927 }, { "epoch": 0.28986015006861116, "grad_norm": 0.8931565724965188, "learning_rate": 3.945336577453366e-06, "loss": 0.7352, "step": 9928 }, { "epoch": 0.2898893462964585, "grad_norm": 0.7263588160218272, "learning_rate": 3.945174371451744e-06, "loss": 0.5961, "step": 9929 }, { "epoch": 0.2899185425243059, "grad_norm": 0.742372663462477, "learning_rate": 3.945012165450122e-06, "loss": 0.6828, "step": 9930 }, { "epoch": 0.28994773875215324, "grad_norm": 0.7107849539340526, "learning_rate": 3.9448499594485e-06, "loss": 0.6437, "step": 9931 }, { "epoch": 0.2899769349800006, "grad_norm": 0.7746258535630944, "learning_rate": 3.944687753446878e-06, "loss": 0.6849, "step": 9932 }, { "epoch": 0.29000613120784796, "grad_norm": 0.8103544179874188, "learning_rate": 3.944525547445256e-06, "loss": 0.7261, "step": 9933 }, { "epoch": 0.2900353274356953, "grad_norm": 0.6846179577884992, "learning_rate": 3.944363341443634e-06, "loss": 0.5871, "step": 9934 }, { "epoch": 0.2900645236635427, "grad_norm": 0.7394869822903194, "learning_rate": 3.944201135442012e-06, "loss": 0.6816, "step": 9935 }, { "epoch": 0.29009371989139004, "grad_norm": 0.7162117224232607, "learning_rate": 3.944038929440389e-06, "loss": 0.6888, "step": 9936 }, { "epoch": 0.2901229161192374, "grad_norm": 0.73540594722094, "learning_rate": 3.943876723438767e-06, "loss": 0.6324, "step": 9937 }, { "epoch": 0.29015211234708477, "grad_norm": 0.7627204012166734, "learning_rate": 3.943714517437145e-06, "loss": 0.7112, "step": 9938 }, { "epoch": 0.29018130857493213, "grad_norm": 0.7617364437923688, "learning_rate": 3.943552311435523e-06, "loss": 0.6286, "step": 9939 }, { "epoch": 0.2902105048027795, "grad_norm": 0.7571012668747025, "learning_rate": 3.943390105433901e-06, "loss": 0.7312, "step": 9940 }, { "epoch": 0.29023970103062685, "grad_norm": 0.8063072203505864, "learning_rate": 3.943227899432279e-06, "loss": 0.7908, "step": 9941 }, { "epoch": 0.2902688972584742, "grad_norm": 0.7351339719364479, "learning_rate": 3.943065693430657e-06, "loss": 0.6965, "step": 9942 }, { "epoch": 0.2902980934863216, "grad_norm": 0.783256445973496, "learning_rate": 3.942903487429035e-06, "loss": 0.6626, "step": 9943 }, { "epoch": 0.29032728971416893, "grad_norm": 0.7403783563239097, "learning_rate": 3.942741281427413e-06, "loss": 0.6558, "step": 9944 }, { "epoch": 0.2903564859420163, "grad_norm": 0.7613569788451576, "learning_rate": 3.9425790754257905e-06, "loss": 0.6815, "step": 9945 }, { "epoch": 0.29038568216986366, "grad_norm": 0.7598469874142173, "learning_rate": 3.9424168694241685e-06, "loss": 0.6697, "step": 9946 }, { "epoch": 0.290414878397711, "grad_norm": 0.7953528186588902, "learning_rate": 3.9422546634225465e-06, "loss": 0.6825, "step": 9947 }, { "epoch": 0.2904440746255584, "grad_norm": 0.7414672131345263, "learning_rate": 3.942092457420925e-06, "loss": 0.6604, "step": 9948 }, { "epoch": 0.29047327085340574, "grad_norm": 0.8043001302309862, "learning_rate": 3.941930251419303e-06, "loss": 0.7748, "step": 9949 }, { "epoch": 0.2905024670812531, "grad_norm": 0.7263654920257996, "learning_rate": 3.941768045417681e-06, "loss": 0.6519, "step": 9950 }, { "epoch": 0.29053166330910046, "grad_norm": 0.7626081390189482, "learning_rate": 3.9416058394160585e-06, "loss": 0.6653, "step": 9951 }, { "epoch": 0.2905608595369478, "grad_norm": 0.7419323499641238, "learning_rate": 3.9414436334144366e-06, "loss": 0.6422, "step": 9952 }, { "epoch": 0.2905900557647952, "grad_norm": 0.7932675578209034, "learning_rate": 3.9412814274128146e-06, "loss": 0.7335, "step": 9953 }, { "epoch": 0.29061925199264255, "grad_norm": 0.7285539393232051, "learning_rate": 3.9411192214111926e-06, "loss": 0.6285, "step": 9954 }, { "epoch": 0.2906484482204899, "grad_norm": 0.7590111562025489, "learning_rate": 3.940957015409571e-06, "loss": 0.6413, "step": 9955 }, { "epoch": 0.29067764444833727, "grad_norm": 0.7038571242225188, "learning_rate": 3.940794809407949e-06, "loss": 0.5816, "step": 9956 }, { "epoch": 0.29070684067618463, "grad_norm": 0.6776920285198398, "learning_rate": 3.940632603406327e-06, "loss": 0.5881, "step": 9957 }, { "epoch": 0.290736036904032, "grad_norm": 0.7733942499683181, "learning_rate": 3.940470397404705e-06, "loss": 0.6891, "step": 9958 }, { "epoch": 0.29076523313187935, "grad_norm": 0.7285752866688394, "learning_rate": 3.940308191403083e-06, "loss": 0.6424, "step": 9959 }, { "epoch": 0.2907944293597267, "grad_norm": 0.8313534396405036, "learning_rate": 3.94014598540146e-06, "loss": 0.7918, "step": 9960 }, { "epoch": 0.2908236255875741, "grad_norm": 0.7561328592740284, "learning_rate": 3.939983779399838e-06, "loss": 0.6856, "step": 9961 }, { "epoch": 0.29085282181542144, "grad_norm": 0.7838155962095353, "learning_rate": 3.939821573398216e-06, "loss": 0.721, "step": 9962 }, { "epoch": 0.2908820180432688, "grad_norm": 0.750104314983632, "learning_rate": 3.939659367396594e-06, "loss": 0.6439, "step": 9963 }, { "epoch": 0.29091121427111616, "grad_norm": 0.7155266568251768, "learning_rate": 3.939497161394972e-06, "loss": 0.6447, "step": 9964 }, { "epoch": 0.2909404104989635, "grad_norm": 0.7232752599085894, "learning_rate": 3.93933495539335e-06, "loss": 0.6368, "step": 9965 }, { "epoch": 0.2909696067268109, "grad_norm": 0.7233940430233172, "learning_rate": 3.939172749391728e-06, "loss": 0.6212, "step": 9966 }, { "epoch": 0.29099880295465824, "grad_norm": 0.9371563545368033, "learning_rate": 3.939010543390106e-06, "loss": 0.6811, "step": 9967 }, { "epoch": 0.2910279991825056, "grad_norm": 0.7933054119908267, "learning_rate": 3.938848337388484e-06, "loss": 0.6185, "step": 9968 }, { "epoch": 0.29105719541035296, "grad_norm": 0.8067010436133315, "learning_rate": 3.938686131386862e-06, "loss": 0.7457, "step": 9969 }, { "epoch": 0.2910863916382003, "grad_norm": 0.6907020002387264, "learning_rate": 3.93852392538524e-06, "loss": 0.6034, "step": 9970 }, { "epoch": 0.2911155878660477, "grad_norm": 0.7733221620837114, "learning_rate": 3.938361719383618e-06, "loss": 0.684, "step": 9971 }, { "epoch": 0.29114478409389505, "grad_norm": 0.7952988310492012, "learning_rate": 3.938199513381996e-06, "loss": 0.7905, "step": 9972 }, { "epoch": 0.2911739803217424, "grad_norm": 0.6991309434473397, "learning_rate": 3.938037307380374e-06, "loss": 0.6643, "step": 9973 }, { "epoch": 0.29120317654958977, "grad_norm": 0.7726539458319703, "learning_rate": 3.937875101378751e-06, "loss": 0.7257, "step": 9974 }, { "epoch": 0.29123237277743713, "grad_norm": 0.6914709928997613, "learning_rate": 3.937712895377129e-06, "loss": 0.6409, "step": 9975 }, { "epoch": 0.2912615690052845, "grad_norm": 0.6898327754990425, "learning_rate": 3.937550689375507e-06, "loss": 0.5765, "step": 9976 }, { "epoch": 0.29129076523313185, "grad_norm": 0.7432064919710821, "learning_rate": 3.937388483373885e-06, "loss": 0.7381, "step": 9977 }, { "epoch": 0.2913199614609792, "grad_norm": 0.8026840322697548, "learning_rate": 3.937226277372263e-06, "loss": 0.718, "step": 9978 }, { "epoch": 0.2913491576888266, "grad_norm": 0.7671047544437134, "learning_rate": 3.937064071370641e-06, "loss": 0.7304, "step": 9979 }, { "epoch": 0.291378353916674, "grad_norm": 0.6994213719427177, "learning_rate": 3.936901865369019e-06, "loss": 0.6098, "step": 9980 }, { "epoch": 0.29140755014452135, "grad_norm": 0.7278145726152039, "learning_rate": 3.936739659367397e-06, "loss": 0.5907, "step": 9981 }, { "epoch": 0.2914367463723687, "grad_norm": 0.7692676185179703, "learning_rate": 3.936577453365775e-06, "loss": 0.6503, "step": 9982 }, { "epoch": 0.2914659426002161, "grad_norm": 0.7528378902337683, "learning_rate": 3.936415247364152e-06, "loss": 0.7147, "step": 9983 }, { "epoch": 0.29149513882806344, "grad_norm": 0.7500693714396212, "learning_rate": 3.93625304136253e-06, "loss": 0.6874, "step": 9984 }, { "epoch": 0.2915243350559108, "grad_norm": 0.7280575911881895, "learning_rate": 3.936090835360908e-06, "loss": 0.682, "step": 9985 }, { "epoch": 0.29155353128375816, "grad_norm": 0.7749130107666369, "learning_rate": 3.935928629359287e-06, "loss": 0.7016, "step": 9986 }, { "epoch": 0.2915827275116055, "grad_norm": 0.849375854809565, "learning_rate": 3.935766423357665e-06, "loss": 0.6926, "step": 9987 }, { "epoch": 0.2916119237394529, "grad_norm": 0.8421001732159208, "learning_rate": 3.935604217356043e-06, "loss": 0.7159, "step": 9988 }, { "epoch": 0.29164111996730024, "grad_norm": 0.7493719841672815, "learning_rate": 3.93544201135442e-06, "loss": 0.6956, "step": 9989 }, { "epoch": 0.2916703161951476, "grad_norm": 0.8312600352088261, "learning_rate": 3.935279805352798e-06, "loss": 0.7001, "step": 9990 }, { "epoch": 0.29169951242299497, "grad_norm": 0.7389635188543652, "learning_rate": 3.935117599351176e-06, "loss": 0.7048, "step": 9991 }, { "epoch": 0.2917287086508423, "grad_norm": 0.7414806553170404, "learning_rate": 3.934955393349554e-06, "loss": 0.6848, "step": 9992 }, { "epoch": 0.2917579048786897, "grad_norm": 0.7839479680671577, "learning_rate": 3.934793187347932e-06, "loss": 0.754, "step": 9993 }, { "epoch": 0.29178710110653705, "grad_norm": 0.7571798802349783, "learning_rate": 3.93463098134631e-06, "loss": 0.6726, "step": 9994 }, { "epoch": 0.2918162973343844, "grad_norm": 0.7887370125707712, "learning_rate": 3.934468775344688e-06, "loss": 0.6716, "step": 9995 }, { "epoch": 0.2918454935622318, "grad_norm": 1.1276606261904543, "learning_rate": 3.934306569343066e-06, "loss": 0.7595, "step": 9996 }, { "epoch": 0.29187468979007913, "grad_norm": 0.7380529842418813, "learning_rate": 3.9341443633414435e-06, "loss": 0.6217, "step": 9997 }, { "epoch": 0.2919038860179265, "grad_norm": 0.7830120601182511, "learning_rate": 3.9339821573398215e-06, "loss": 0.7218, "step": 9998 }, { "epoch": 0.29193308224577386, "grad_norm": 0.714930245672852, "learning_rate": 3.9338199513381995e-06, "loss": 0.6347, "step": 9999 }, { "epoch": 0.2919622784736212, "grad_norm": 0.7745765326216957, "learning_rate": 3.9336577453365775e-06, "loss": 0.7259, "step": 10000 }, { "epoch": 0.2919914747014686, "grad_norm": 0.7352966339488078, "learning_rate": 3.9334955393349555e-06, "loss": 0.5852, "step": 10001 }, { "epoch": 0.29202067092931594, "grad_norm": 0.6906194030064559, "learning_rate": 3.9333333333333335e-06, "loss": 0.6029, "step": 10002 }, { "epoch": 0.2920498671571633, "grad_norm": 0.7846890112856101, "learning_rate": 3.9331711273317115e-06, "loss": 0.772, "step": 10003 }, { "epoch": 0.29207906338501066, "grad_norm": 0.740018632158368, "learning_rate": 3.9330089213300895e-06, "loss": 0.696, "step": 10004 }, { "epoch": 0.292108259612858, "grad_norm": 0.7533345293185844, "learning_rate": 3.9328467153284675e-06, "loss": 0.6817, "step": 10005 }, { "epoch": 0.2921374558407054, "grad_norm": 0.6567906529605517, "learning_rate": 3.9326845093268455e-06, "loss": 0.5734, "step": 10006 }, { "epoch": 0.29216665206855275, "grad_norm": 0.7122144057058946, "learning_rate": 3.9325223033252235e-06, "loss": 0.6307, "step": 10007 }, { "epoch": 0.2921958482964001, "grad_norm": 0.7544539046029772, "learning_rate": 3.9323600973236015e-06, "loss": 0.7216, "step": 10008 }, { "epoch": 0.29222504452424747, "grad_norm": 0.7268472214995213, "learning_rate": 3.9321978913219796e-06, "loss": 0.651, "step": 10009 }, { "epoch": 0.29225424075209483, "grad_norm": 0.7163622042867942, "learning_rate": 3.9320356853203576e-06, "loss": 0.6376, "step": 10010 }, { "epoch": 0.2922834369799422, "grad_norm": 0.6768644306961139, "learning_rate": 3.9318734793187356e-06, "loss": 0.5888, "step": 10011 }, { "epoch": 0.29231263320778955, "grad_norm": 0.739545330875565, "learning_rate": 3.931711273317113e-06, "loss": 0.6485, "step": 10012 }, { "epoch": 0.2923418294356369, "grad_norm": 0.7712553542649856, "learning_rate": 3.931549067315491e-06, "loss": 0.6944, "step": 10013 }, { "epoch": 0.2923710256634843, "grad_norm": 0.7443247759719683, "learning_rate": 3.931386861313869e-06, "loss": 0.623, "step": 10014 }, { "epoch": 0.29240022189133164, "grad_norm": 0.6924752405498971, "learning_rate": 3.931224655312247e-06, "loss": 0.5687, "step": 10015 }, { "epoch": 0.292429418119179, "grad_norm": 0.732372853975277, "learning_rate": 3.931062449310625e-06, "loss": 0.6645, "step": 10016 }, { "epoch": 0.29245861434702636, "grad_norm": 0.6740509403200924, "learning_rate": 3.930900243309003e-06, "loss": 0.6147, "step": 10017 }, { "epoch": 0.2924878105748737, "grad_norm": 0.7651492648497376, "learning_rate": 3.930738037307381e-06, "loss": 0.7369, "step": 10018 }, { "epoch": 0.2925170068027211, "grad_norm": 0.7342991406586434, "learning_rate": 3.930575831305759e-06, "loss": 0.675, "step": 10019 }, { "epoch": 0.29254620303056844, "grad_norm": 0.7348765465172723, "learning_rate": 3.930413625304137e-06, "loss": 0.6927, "step": 10020 }, { "epoch": 0.2925753992584158, "grad_norm": 0.8158739071301487, "learning_rate": 3.930251419302514e-06, "loss": 0.7155, "step": 10021 }, { "epoch": 0.29260459548626316, "grad_norm": 0.7315217675605119, "learning_rate": 3.930089213300892e-06, "loss": 0.682, "step": 10022 }, { "epoch": 0.2926337917141105, "grad_norm": 0.7742392688353774, "learning_rate": 3.92992700729927e-06, "loss": 0.682, "step": 10023 }, { "epoch": 0.2926629879419579, "grad_norm": 0.7513156379951318, "learning_rate": 3.929764801297649e-06, "loss": 0.7228, "step": 10024 }, { "epoch": 0.29269218416980525, "grad_norm": 0.7612572666914937, "learning_rate": 3.929602595296027e-06, "loss": 0.6049, "step": 10025 }, { "epoch": 0.2927213803976526, "grad_norm": 0.7605509703312838, "learning_rate": 3.929440389294405e-06, "loss": 0.6305, "step": 10026 }, { "epoch": 0.29275057662549997, "grad_norm": 0.7580119207437946, "learning_rate": 3.929278183292782e-06, "loss": 0.6835, "step": 10027 }, { "epoch": 0.29277977285334733, "grad_norm": 0.7475619867074222, "learning_rate": 3.92911597729116e-06, "loss": 0.6909, "step": 10028 }, { "epoch": 0.2928089690811947, "grad_norm": 0.8667584693749418, "learning_rate": 3.928953771289538e-06, "loss": 0.7178, "step": 10029 }, { "epoch": 0.29283816530904205, "grad_norm": 0.7432733940548227, "learning_rate": 3.928791565287916e-06, "loss": 0.6526, "step": 10030 }, { "epoch": 0.2928673615368894, "grad_norm": 0.7290401855753896, "learning_rate": 3.928629359286294e-06, "loss": 0.5511, "step": 10031 }, { "epoch": 0.2928965577647368, "grad_norm": 0.7466129199498236, "learning_rate": 3.928467153284672e-06, "loss": 0.6664, "step": 10032 }, { "epoch": 0.29292575399258414, "grad_norm": 0.6908562956366511, "learning_rate": 3.92830494728305e-06, "loss": 0.5863, "step": 10033 }, { "epoch": 0.2929549502204315, "grad_norm": 0.6975709312671869, "learning_rate": 3.928142741281428e-06, "loss": 0.6014, "step": 10034 }, { "epoch": 0.29298414644827886, "grad_norm": 0.7462588218817862, "learning_rate": 3.927980535279805e-06, "loss": 0.5422, "step": 10035 }, { "epoch": 0.2930133426761262, "grad_norm": 0.67923738175269, "learning_rate": 3.927818329278183e-06, "loss": 0.5901, "step": 10036 }, { "epoch": 0.2930425389039736, "grad_norm": 0.7594240460408377, "learning_rate": 3.927656123276561e-06, "loss": 0.7189, "step": 10037 }, { "epoch": 0.29307173513182094, "grad_norm": 0.7810771182226326, "learning_rate": 3.927493917274939e-06, "loss": 0.6953, "step": 10038 }, { "epoch": 0.2931009313596683, "grad_norm": 0.728882462568224, "learning_rate": 3.927331711273317e-06, "loss": 0.6618, "step": 10039 }, { "epoch": 0.29313012758751567, "grad_norm": 0.8392995171515862, "learning_rate": 3.927169505271695e-06, "loss": 0.7789, "step": 10040 }, { "epoch": 0.2931593238153631, "grad_norm": 0.7514862743484045, "learning_rate": 3.927007299270073e-06, "loss": 0.7008, "step": 10041 }, { "epoch": 0.29318852004321044, "grad_norm": 0.6952373446477507, "learning_rate": 3.926845093268451e-06, "loss": 0.6238, "step": 10042 }, { "epoch": 0.2932177162710578, "grad_norm": 0.877809283331115, "learning_rate": 3.926682887266829e-06, "loss": 0.8233, "step": 10043 }, { "epoch": 0.29324691249890517, "grad_norm": 0.7724880659995892, "learning_rate": 3.926520681265207e-06, "loss": 0.7481, "step": 10044 }, { "epoch": 0.2932761087267525, "grad_norm": 0.704271036833273, "learning_rate": 3.926358475263585e-06, "loss": 0.6381, "step": 10045 }, { "epoch": 0.2933053049545999, "grad_norm": 0.7605497665465342, "learning_rate": 3.926196269261963e-06, "loss": 0.7228, "step": 10046 }, { "epoch": 0.29333450118244725, "grad_norm": 0.7493955004197216, "learning_rate": 3.926034063260341e-06, "loss": 0.6889, "step": 10047 }, { "epoch": 0.2933636974102946, "grad_norm": 0.772087631078114, "learning_rate": 3.925871857258719e-06, "loss": 0.6939, "step": 10048 }, { "epoch": 0.29339289363814197, "grad_norm": 0.7971735259597982, "learning_rate": 3.925709651257097e-06, "loss": 0.7055, "step": 10049 }, { "epoch": 0.29342208986598933, "grad_norm": 0.7450471030806076, "learning_rate": 3.9255474452554744e-06, "loss": 0.6716, "step": 10050 }, { "epoch": 0.2934512860938367, "grad_norm": 0.7144377056686783, "learning_rate": 3.9253852392538525e-06, "loss": 0.6539, "step": 10051 }, { "epoch": 0.29348048232168406, "grad_norm": 0.7261719635039152, "learning_rate": 3.9252230332522305e-06, "loss": 0.6236, "step": 10052 }, { "epoch": 0.2935096785495314, "grad_norm": 0.7396554575198775, "learning_rate": 3.9250608272506085e-06, "loss": 0.6866, "step": 10053 }, { "epoch": 0.2935388747773788, "grad_norm": 0.7346934773610074, "learning_rate": 3.9248986212489865e-06, "loss": 0.6196, "step": 10054 }, { "epoch": 0.29356807100522614, "grad_norm": 0.7452716778753665, "learning_rate": 3.9247364152473645e-06, "loss": 0.7159, "step": 10055 }, { "epoch": 0.2935972672330735, "grad_norm": 0.6943557294135235, "learning_rate": 3.9245742092457425e-06, "loss": 0.5791, "step": 10056 }, { "epoch": 0.29362646346092086, "grad_norm": 0.7372697316386897, "learning_rate": 3.9244120032441205e-06, "loss": 0.6698, "step": 10057 }, { "epoch": 0.2936556596887682, "grad_norm": 0.7373789465192027, "learning_rate": 3.9242497972424985e-06, "loss": 0.6438, "step": 10058 }, { "epoch": 0.2936848559166156, "grad_norm": 0.7502677674414876, "learning_rate": 3.924087591240876e-06, "loss": 0.6634, "step": 10059 }, { "epoch": 0.29371405214446294, "grad_norm": 0.8395215380696751, "learning_rate": 3.923925385239254e-06, "loss": 0.7314, "step": 10060 }, { "epoch": 0.2937432483723103, "grad_norm": 0.712674072922879, "learning_rate": 3.923763179237632e-06, "loss": 0.6052, "step": 10061 }, { "epoch": 0.29377244460015767, "grad_norm": 0.7848002456998296, "learning_rate": 3.9236009732360105e-06, "loss": 0.6756, "step": 10062 }, { "epoch": 0.29380164082800503, "grad_norm": 0.7207665619698238, "learning_rate": 3.9234387672343885e-06, "loss": 0.6474, "step": 10063 }, { "epoch": 0.2938308370558524, "grad_norm": 0.6839852290330947, "learning_rate": 3.9232765612327665e-06, "loss": 0.6431, "step": 10064 }, { "epoch": 0.29386003328369975, "grad_norm": 0.8642586356682089, "learning_rate": 3.923114355231144e-06, "loss": 0.6676, "step": 10065 }, { "epoch": 0.2938892295115471, "grad_norm": 0.7921468629207469, "learning_rate": 3.922952149229522e-06, "loss": 0.6575, "step": 10066 }, { "epoch": 0.2939184257393945, "grad_norm": 0.8315551699917202, "learning_rate": 3.9227899432279e-06, "loss": 0.7327, "step": 10067 }, { "epoch": 0.29394762196724183, "grad_norm": 0.7777394878723918, "learning_rate": 3.922627737226278e-06, "loss": 0.7195, "step": 10068 }, { "epoch": 0.2939768181950892, "grad_norm": 0.7127250915488246, "learning_rate": 3.922465531224656e-06, "loss": 0.608, "step": 10069 }, { "epoch": 0.29400601442293656, "grad_norm": 0.7467142407985696, "learning_rate": 3.922303325223034e-06, "loss": 0.6841, "step": 10070 }, { "epoch": 0.2940352106507839, "grad_norm": 0.7703216283124379, "learning_rate": 3.922141119221412e-06, "loss": 0.7188, "step": 10071 }, { "epoch": 0.2940644068786313, "grad_norm": 0.8290647257435281, "learning_rate": 3.92197891321979e-06, "loss": 0.7445, "step": 10072 }, { "epoch": 0.29409360310647864, "grad_norm": 0.8509826091575114, "learning_rate": 3.921816707218167e-06, "loss": 0.7961, "step": 10073 }, { "epoch": 0.294122799334326, "grad_norm": 0.705558365233627, "learning_rate": 3.921654501216545e-06, "loss": 0.6254, "step": 10074 }, { "epoch": 0.29415199556217336, "grad_norm": 0.6889473822130289, "learning_rate": 3.921492295214923e-06, "loss": 0.6042, "step": 10075 }, { "epoch": 0.2941811917900207, "grad_norm": 0.6775641234569751, "learning_rate": 3.921330089213301e-06, "loss": 0.5798, "step": 10076 }, { "epoch": 0.2942103880178681, "grad_norm": 0.7488878155887049, "learning_rate": 3.921167883211679e-06, "loss": 0.6774, "step": 10077 }, { "epoch": 0.29423958424571545, "grad_norm": 0.7555056455446872, "learning_rate": 3.921005677210057e-06, "loss": 0.6847, "step": 10078 }, { "epoch": 0.2942687804735628, "grad_norm": 0.7356664434147798, "learning_rate": 3.920843471208435e-06, "loss": 0.6553, "step": 10079 }, { "epoch": 0.29429797670141017, "grad_norm": 0.7058624549828524, "learning_rate": 3.920681265206813e-06, "loss": 0.6382, "step": 10080 }, { "epoch": 0.29432717292925753, "grad_norm": 0.7800697468396899, "learning_rate": 3.920519059205191e-06, "loss": 0.6581, "step": 10081 }, { "epoch": 0.2943563691571049, "grad_norm": 0.8809522057300839, "learning_rate": 3.920356853203569e-06, "loss": 0.6843, "step": 10082 }, { "epoch": 0.29438556538495225, "grad_norm": 0.9183325731167965, "learning_rate": 3.920194647201947e-06, "loss": 0.8529, "step": 10083 }, { "epoch": 0.2944147616127996, "grad_norm": 0.6815548965855902, "learning_rate": 3.920032441200325e-06, "loss": 0.6244, "step": 10084 }, { "epoch": 0.294443957840647, "grad_norm": 0.6821586308574574, "learning_rate": 3.919870235198703e-06, "loss": 0.6455, "step": 10085 }, { "epoch": 0.29447315406849434, "grad_norm": 0.7438088040885553, "learning_rate": 3.919708029197081e-06, "loss": 0.6459, "step": 10086 }, { "epoch": 0.2945023502963417, "grad_norm": 1.0341171293347673, "learning_rate": 3.919545823195459e-06, "loss": 0.7388, "step": 10087 }, { "epoch": 0.29453154652418906, "grad_norm": 0.8613105103863928, "learning_rate": 3.919383617193836e-06, "loss": 0.7134, "step": 10088 }, { "epoch": 0.2945607427520364, "grad_norm": 0.6993053764749021, "learning_rate": 3.919221411192214e-06, "loss": 0.6031, "step": 10089 }, { "epoch": 0.2945899389798838, "grad_norm": 0.751320161144973, "learning_rate": 3.919059205190592e-06, "loss": 0.6655, "step": 10090 }, { "epoch": 0.29461913520773114, "grad_norm": 0.7507513575086209, "learning_rate": 3.91889699918897e-06, "loss": 0.7394, "step": 10091 }, { "epoch": 0.2946483314355785, "grad_norm": 0.7328906330675036, "learning_rate": 3.918734793187348e-06, "loss": 0.6623, "step": 10092 }, { "epoch": 0.29467752766342586, "grad_norm": 0.707988483451734, "learning_rate": 3.918572587185726e-06, "loss": 0.6237, "step": 10093 }, { "epoch": 0.2947067238912732, "grad_norm": 0.7329620056189159, "learning_rate": 3.918410381184104e-06, "loss": 0.7, "step": 10094 }, { "epoch": 0.2947359201191206, "grad_norm": 0.7291530766850424, "learning_rate": 3.918248175182482e-06, "loss": 0.6355, "step": 10095 }, { "epoch": 0.29476511634696795, "grad_norm": 0.7161524784219239, "learning_rate": 3.91808596918086e-06, "loss": 0.6336, "step": 10096 }, { "epoch": 0.2947943125748153, "grad_norm": 0.8003147918462159, "learning_rate": 3.917923763179237e-06, "loss": 0.6881, "step": 10097 }, { "epoch": 0.29482350880266267, "grad_norm": 0.7260853707765279, "learning_rate": 3.917761557177615e-06, "loss": 0.6124, "step": 10098 }, { "epoch": 0.29485270503051003, "grad_norm": 0.6755145720694736, "learning_rate": 3.917599351175994e-06, "loss": 0.5963, "step": 10099 }, { "epoch": 0.2948819012583574, "grad_norm": 0.7878327306554137, "learning_rate": 3.917437145174372e-06, "loss": 0.726, "step": 10100 }, { "epoch": 0.2949110974862048, "grad_norm": 0.8204726679434562, "learning_rate": 3.91727493917275e-06, "loss": 0.7738, "step": 10101 }, { "epoch": 0.29494029371405217, "grad_norm": 0.6982629331201904, "learning_rate": 3.917112733171127e-06, "loss": 0.6283, "step": 10102 }, { "epoch": 0.29496948994189953, "grad_norm": 0.7393797576557757, "learning_rate": 3.916950527169505e-06, "loss": 0.6873, "step": 10103 }, { "epoch": 0.2949986861697469, "grad_norm": 0.8319938531196677, "learning_rate": 3.9167883211678834e-06, "loss": 0.722, "step": 10104 }, { "epoch": 0.29502788239759425, "grad_norm": 0.7483859358959658, "learning_rate": 3.9166261151662614e-06, "loss": 0.7236, "step": 10105 }, { "epoch": 0.2950570786254416, "grad_norm": 0.7703472360933027, "learning_rate": 3.9164639091646394e-06, "loss": 0.7075, "step": 10106 }, { "epoch": 0.295086274853289, "grad_norm": 0.7863588958793276, "learning_rate": 3.9163017031630174e-06, "loss": 0.7007, "step": 10107 }, { "epoch": 0.29511547108113634, "grad_norm": 0.7804595932544395, "learning_rate": 3.9161394971613955e-06, "loss": 0.6234, "step": 10108 }, { "epoch": 0.2951446673089837, "grad_norm": 0.7973349068404504, "learning_rate": 3.9159772911597735e-06, "loss": 0.7456, "step": 10109 }, { "epoch": 0.29517386353683106, "grad_norm": 0.8010456290983032, "learning_rate": 3.9158150851581515e-06, "loss": 0.6887, "step": 10110 }, { "epoch": 0.2952030597646784, "grad_norm": 0.6834570440971878, "learning_rate": 3.915652879156529e-06, "loss": 0.5971, "step": 10111 }, { "epoch": 0.2952322559925258, "grad_norm": 0.839006527045544, "learning_rate": 3.915490673154907e-06, "loss": 0.6941, "step": 10112 }, { "epoch": 0.29526145222037314, "grad_norm": 0.8271033047420943, "learning_rate": 3.915328467153285e-06, "loss": 0.6361, "step": 10113 }, { "epoch": 0.2952906484482205, "grad_norm": 0.7327467866371579, "learning_rate": 3.915166261151663e-06, "loss": 0.692, "step": 10114 }, { "epoch": 0.29531984467606787, "grad_norm": 0.7509390834223666, "learning_rate": 3.915004055150041e-06, "loss": 0.6153, "step": 10115 }, { "epoch": 0.2953490409039152, "grad_norm": 0.6938387440352476, "learning_rate": 3.914841849148419e-06, "loss": 0.5903, "step": 10116 }, { "epoch": 0.2953782371317626, "grad_norm": 0.727983101570506, "learning_rate": 3.914679643146797e-06, "loss": 0.7061, "step": 10117 }, { "epoch": 0.29540743335960995, "grad_norm": 0.7364630068699863, "learning_rate": 3.914517437145175e-06, "loss": 0.6465, "step": 10118 }, { "epoch": 0.2954366295874573, "grad_norm": 0.7213098557030445, "learning_rate": 3.914355231143553e-06, "loss": 0.6584, "step": 10119 }, { "epoch": 0.2954658258153047, "grad_norm": 0.7631263674358925, "learning_rate": 3.914193025141931e-06, "loss": 0.6853, "step": 10120 }, { "epoch": 0.29549502204315203, "grad_norm": 0.7034168913493813, "learning_rate": 3.914030819140309e-06, "loss": 0.5966, "step": 10121 }, { "epoch": 0.2955242182709994, "grad_norm": 0.7583911195209886, "learning_rate": 3.913868613138687e-06, "loss": 0.73, "step": 10122 }, { "epoch": 0.29555341449884676, "grad_norm": 0.7563018389654464, "learning_rate": 3.913706407137065e-06, "loss": 0.682, "step": 10123 }, { "epoch": 0.2955826107266941, "grad_norm": 0.800691000685294, "learning_rate": 3.913544201135443e-06, "loss": 0.7515, "step": 10124 }, { "epoch": 0.2956118069545415, "grad_norm": 0.683322949772453, "learning_rate": 3.913381995133821e-06, "loss": 0.613, "step": 10125 }, { "epoch": 0.29564100318238884, "grad_norm": 0.7024726107075869, "learning_rate": 3.913219789132198e-06, "loss": 0.6648, "step": 10126 }, { "epoch": 0.2956701994102362, "grad_norm": 0.7091946418398667, "learning_rate": 3.913057583130576e-06, "loss": 0.6312, "step": 10127 }, { "epoch": 0.29569939563808356, "grad_norm": 0.8107674114218754, "learning_rate": 3.912895377128954e-06, "loss": 0.7487, "step": 10128 }, { "epoch": 0.2957285918659309, "grad_norm": 0.826476393643825, "learning_rate": 3.912733171127332e-06, "loss": 0.729, "step": 10129 }, { "epoch": 0.2957577880937783, "grad_norm": 0.7520444928333292, "learning_rate": 3.91257096512571e-06, "loss": 0.6921, "step": 10130 }, { "epoch": 0.29578698432162565, "grad_norm": 0.7046287101428943, "learning_rate": 3.912408759124088e-06, "loss": 0.5792, "step": 10131 }, { "epoch": 0.295816180549473, "grad_norm": 0.7874448458276813, "learning_rate": 3.912246553122466e-06, "loss": 0.7333, "step": 10132 }, { "epoch": 0.29584537677732037, "grad_norm": 0.7294471319808625, "learning_rate": 3.912084347120844e-06, "loss": 0.647, "step": 10133 }, { "epoch": 0.29587457300516773, "grad_norm": 0.7663188149090912, "learning_rate": 3.911922141119222e-06, "loss": 0.6929, "step": 10134 }, { "epoch": 0.2959037692330151, "grad_norm": 0.7032666334795832, "learning_rate": 3.911759935117599e-06, "loss": 0.6105, "step": 10135 }, { "epoch": 0.29593296546086245, "grad_norm": 0.6586556479689982, "learning_rate": 3.911597729115977e-06, "loss": 0.5401, "step": 10136 }, { "epoch": 0.2959621616887098, "grad_norm": 0.7902901118026444, "learning_rate": 3.911435523114356e-06, "loss": 0.6825, "step": 10137 }, { "epoch": 0.2959913579165572, "grad_norm": 0.8344070341204853, "learning_rate": 3.911273317112734e-06, "loss": 0.6962, "step": 10138 }, { "epoch": 0.29602055414440454, "grad_norm": 0.7414554085039021, "learning_rate": 3.911111111111112e-06, "loss": 0.6977, "step": 10139 }, { "epoch": 0.2960497503722519, "grad_norm": 0.8110336289855878, "learning_rate": 3.910948905109489e-06, "loss": 0.706, "step": 10140 }, { "epoch": 0.29607894660009926, "grad_norm": 0.7108256560335838, "learning_rate": 3.910786699107867e-06, "loss": 0.6622, "step": 10141 }, { "epoch": 0.2961081428279466, "grad_norm": 0.8591716033919854, "learning_rate": 3.910624493106245e-06, "loss": 0.7448, "step": 10142 }, { "epoch": 0.296137339055794, "grad_norm": 0.798996583187554, "learning_rate": 3.910462287104623e-06, "loss": 0.6894, "step": 10143 }, { "epoch": 0.29616653528364134, "grad_norm": 0.7703111934831826, "learning_rate": 3.910300081103001e-06, "loss": 0.6476, "step": 10144 }, { "epoch": 0.2961957315114887, "grad_norm": 0.6917434417847567, "learning_rate": 3.910137875101379e-06, "loss": 0.6169, "step": 10145 }, { "epoch": 0.29622492773933606, "grad_norm": 1.0178272201500818, "learning_rate": 3.909975669099757e-06, "loss": 0.6828, "step": 10146 }, { "epoch": 0.2962541239671834, "grad_norm": 0.8024142362224707, "learning_rate": 3.909813463098135e-06, "loss": 0.8567, "step": 10147 }, { "epoch": 0.2962833201950308, "grad_norm": 0.775131165322802, "learning_rate": 3.909651257096513e-06, "loss": 0.7105, "step": 10148 }, { "epoch": 0.29631251642287815, "grad_norm": 0.7043187601230615, "learning_rate": 3.90948905109489e-06, "loss": 0.6304, "step": 10149 }, { "epoch": 0.2963417126507255, "grad_norm": 0.7284902376043519, "learning_rate": 3.909326845093268e-06, "loss": 0.653, "step": 10150 }, { "epoch": 0.29637090887857287, "grad_norm": 0.7410737526313544, "learning_rate": 3.909164639091646e-06, "loss": 0.7033, "step": 10151 }, { "epoch": 0.29640010510642023, "grad_norm": 0.7769330312460279, "learning_rate": 3.909002433090024e-06, "loss": 0.6059, "step": 10152 }, { "epoch": 0.2964293013342676, "grad_norm": 0.7117508740101012, "learning_rate": 3.908840227088402e-06, "loss": 0.6195, "step": 10153 }, { "epoch": 0.29645849756211495, "grad_norm": 0.6886706917847509, "learning_rate": 3.90867802108678e-06, "loss": 0.5739, "step": 10154 }, { "epoch": 0.2964876937899623, "grad_norm": 0.7348557040250875, "learning_rate": 3.908515815085158e-06, "loss": 0.651, "step": 10155 }, { "epoch": 0.2965168900178097, "grad_norm": 0.7646300027570798, "learning_rate": 3.908353609083536e-06, "loss": 0.6538, "step": 10156 }, { "epoch": 0.29654608624565704, "grad_norm": 0.7888845692267391, "learning_rate": 3.908191403081914e-06, "loss": 0.7035, "step": 10157 }, { "epoch": 0.2965752824735044, "grad_norm": 0.7376517090618437, "learning_rate": 3.908029197080292e-06, "loss": 0.6567, "step": 10158 }, { "epoch": 0.29660447870135176, "grad_norm": 0.7517837072509667, "learning_rate": 3.90786699107867e-06, "loss": 0.6621, "step": 10159 }, { "epoch": 0.2966336749291991, "grad_norm": 0.7653660377651756, "learning_rate": 3.907704785077048e-06, "loss": 0.7305, "step": 10160 }, { "epoch": 0.29666287115704654, "grad_norm": 0.9807247001704141, "learning_rate": 3.9075425790754264e-06, "loss": 0.6916, "step": 10161 }, { "epoch": 0.2966920673848939, "grad_norm": 0.8870126677707331, "learning_rate": 3.9073803730738044e-06, "loss": 0.6761, "step": 10162 }, { "epoch": 0.29672126361274126, "grad_norm": 0.7176342563876261, "learning_rate": 3.9072181670721824e-06, "loss": 0.6057, "step": 10163 }, { "epoch": 0.2967504598405886, "grad_norm": 0.7657448776854876, "learning_rate": 3.90705596107056e-06, "loss": 0.7197, "step": 10164 }, { "epoch": 0.296779656068436, "grad_norm": 0.7599005929634001, "learning_rate": 3.906893755068938e-06, "loss": 0.6137, "step": 10165 }, { "epoch": 0.29680885229628334, "grad_norm": 0.8336186254876158, "learning_rate": 3.906731549067316e-06, "loss": 0.7343, "step": 10166 }, { "epoch": 0.2968380485241307, "grad_norm": 0.7494807903105795, "learning_rate": 3.906569343065694e-06, "loss": 0.6201, "step": 10167 }, { "epoch": 0.29686724475197807, "grad_norm": 0.7693048685685362, "learning_rate": 3.906407137064072e-06, "loss": 0.6836, "step": 10168 }, { "epoch": 0.2968964409798254, "grad_norm": 0.8794063881572997, "learning_rate": 3.90624493106245e-06, "loss": 0.8692, "step": 10169 }, { "epoch": 0.2969256372076728, "grad_norm": 0.757560538002414, "learning_rate": 3.906082725060828e-06, "loss": 0.6631, "step": 10170 }, { "epoch": 0.29695483343552015, "grad_norm": 0.7406991651107491, "learning_rate": 3.905920519059206e-06, "loss": 0.7032, "step": 10171 }, { "epoch": 0.2969840296633675, "grad_norm": 0.7570340046060867, "learning_rate": 3.905758313057584e-06, "loss": 0.6536, "step": 10172 }, { "epoch": 0.29701322589121487, "grad_norm": 0.6825708191896508, "learning_rate": 3.905596107055961e-06, "loss": 0.5719, "step": 10173 }, { "epoch": 0.29704242211906223, "grad_norm": 0.8725675551965096, "learning_rate": 3.905433901054339e-06, "loss": 0.6674, "step": 10174 }, { "epoch": 0.2970716183469096, "grad_norm": 0.7600127967337973, "learning_rate": 3.905271695052718e-06, "loss": 0.7635, "step": 10175 }, { "epoch": 0.29710081457475696, "grad_norm": 0.6828841265708202, "learning_rate": 3.905109489051096e-06, "loss": 0.6361, "step": 10176 }, { "epoch": 0.2971300108026043, "grad_norm": 0.7941685954078519, "learning_rate": 3.904947283049474e-06, "loss": 0.8231, "step": 10177 }, { "epoch": 0.2971592070304517, "grad_norm": 0.7722181631890316, "learning_rate": 3.904785077047851e-06, "loss": 0.6798, "step": 10178 }, { "epoch": 0.29718840325829904, "grad_norm": 0.7485516813922484, "learning_rate": 3.904622871046229e-06, "loss": 0.7181, "step": 10179 }, { "epoch": 0.2972175994861464, "grad_norm": 0.7060485277297184, "learning_rate": 3.904460665044607e-06, "loss": 0.6565, "step": 10180 }, { "epoch": 0.29724679571399376, "grad_norm": 0.7260742251147798, "learning_rate": 3.904298459042985e-06, "loss": 0.6647, "step": 10181 }, { "epoch": 0.2972759919418411, "grad_norm": 0.8323055478713579, "learning_rate": 3.904136253041363e-06, "loss": 0.7522, "step": 10182 }, { "epoch": 0.2973051881696885, "grad_norm": 0.8054461789634553, "learning_rate": 3.903974047039741e-06, "loss": 0.7829, "step": 10183 }, { "epoch": 0.29733438439753584, "grad_norm": 0.7708870444654864, "learning_rate": 3.903811841038119e-06, "loss": 0.7395, "step": 10184 }, { "epoch": 0.2973635806253832, "grad_norm": 0.7612050079808456, "learning_rate": 3.903649635036497e-06, "loss": 0.697, "step": 10185 }, { "epoch": 0.29739277685323057, "grad_norm": 0.6976413244499242, "learning_rate": 3.903487429034875e-06, "loss": 0.6262, "step": 10186 }, { "epoch": 0.29742197308107793, "grad_norm": 0.701816771311847, "learning_rate": 3.903325223033252e-06, "loss": 0.6648, "step": 10187 }, { "epoch": 0.2974511693089253, "grad_norm": 0.731770537006021, "learning_rate": 3.90316301703163e-06, "loss": 0.6987, "step": 10188 }, { "epoch": 0.29748036553677265, "grad_norm": 1.0152330797273623, "learning_rate": 3.903000811030008e-06, "loss": 0.7568, "step": 10189 }, { "epoch": 0.29750956176462, "grad_norm": 0.7307318885853763, "learning_rate": 3.902838605028386e-06, "loss": 0.6227, "step": 10190 }, { "epoch": 0.2975387579924674, "grad_norm": 0.7395338494238649, "learning_rate": 3.902676399026764e-06, "loss": 0.6095, "step": 10191 }, { "epoch": 0.29756795422031473, "grad_norm": 0.8588760814824078, "learning_rate": 3.902514193025142e-06, "loss": 0.6757, "step": 10192 }, { "epoch": 0.2975971504481621, "grad_norm": 0.7333326596060281, "learning_rate": 3.90235198702352e-06, "loss": 0.7026, "step": 10193 }, { "epoch": 0.29762634667600946, "grad_norm": 0.7467970221752877, "learning_rate": 3.902189781021898e-06, "loss": 0.7054, "step": 10194 }, { "epoch": 0.2976555429038568, "grad_norm": 0.801679537614842, "learning_rate": 3.902027575020276e-06, "loss": 0.8142, "step": 10195 }, { "epoch": 0.2976847391317042, "grad_norm": 0.9522852640203165, "learning_rate": 3.901865369018654e-06, "loss": 0.7233, "step": 10196 }, { "epoch": 0.29771393535955154, "grad_norm": 0.7321710930200566, "learning_rate": 3.901703163017032e-06, "loss": 0.6817, "step": 10197 }, { "epoch": 0.2977431315873989, "grad_norm": 0.8140406850637344, "learning_rate": 3.90154095701541e-06, "loss": 0.7167, "step": 10198 }, { "epoch": 0.29777232781524626, "grad_norm": 0.8718772143315321, "learning_rate": 3.901378751013788e-06, "loss": 0.6966, "step": 10199 }, { "epoch": 0.2978015240430936, "grad_norm": 0.76738344450339, "learning_rate": 3.901216545012166e-06, "loss": 0.6532, "step": 10200 }, { "epoch": 0.297830720270941, "grad_norm": 0.7842653447386284, "learning_rate": 3.901054339010544e-06, "loss": 0.6638, "step": 10201 }, { "epoch": 0.29785991649878835, "grad_norm": 0.7079305929173662, "learning_rate": 3.900892133008921e-06, "loss": 0.5875, "step": 10202 }, { "epoch": 0.2978891127266357, "grad_norm": 0.81353960381143, "learning_rate": 3.900729927007299e-06, "loss": 0.6805, "step": 10203 }, { "epoch": 0.29791830895448307, "grad_norm": 0.7661076461505394, "learning_rate": 3.900567721005677e-06, "loss": 0.6862, "step": 10204 }, { "epoch": 0.29794750518233043, "grad_norm": 0.7158078128725663, "learning_rate": 3.900405515004055e-06, "loss": 0.631, "step": 10205 }, { "epoch": 0.2979767014101778, "grad_norm": 0.7069704211200044, "learning_rate": 3.900243309002433e-06, "loss": 0.6488, "step": 10206 }, { "epoch": 0.29800589763802515, "grad_norm": 0.7282219583831591, "learning_rate": 3.900081103000811e-06, "loss": 0.6834, "step": 10207 }, { "epoch": 0.2980350938658725, "grad_norm": 0.771732464611479, "learning_rate": 3.899918896999189e-06, "loss": 0.6568, "step": 10208 }, { "epoch": 0.2980642900937199, "grad_norm": 0.7364965441187007, "learning_rate": 3.899756690997567e-06, "loss": 0.6355, "step": 10209 }, { "epoch": 0.29809348632156724, "grad_norm": 0.7996993886510407, "learning_rate": 3.899594484995945e-06, "loss": 0.7128, "step": 10210 }, { "epoch": 0.2981226825494146, "grad_norm": 0.7182155891080284, "learning_rate": 3.8994322789943225e-06, "loss": 0.5841, "step": 10211 }, { "epoch": 0.29815187877726196, "grad_norm": 0.7098259575445645, "learning_rate": 3.8992700729927005e-06, "loss": 0.6071, "step": 10212 }, { "epoch": 0.2981810750051093, "grad_norm": 0.6873904202536526, "learning_rate": 3.899107866991079e-06, "loss": 0.5721, "step": 10213 }, { "epoch": 0.2982102712329567, "grad_norm": 0.7323289573180608, "learning_rate": 3.898945660989457e-06, "loss": 0.6605, "step": 10214 }, { "epoch": 0.29823946746080404, "grad_norm": 0.7919647183840187, "learning_rate": 3.898783454987835e-06, "loss": 0.6502, "step": 10215 }, { "epoch": 0.2982686636886514, "grad_norm": 0.7074394912468265, "learning_rate": 3.8986212489862126e-06, "loss": 0.6339, "step": 10216 }, { "epoch": 0.29829785991649876, "grad_norm": 0.8044282264608594, "learning_rate": 3.8984590429845906e-06, "loss": 0.7668, "step": 10217 }, { "epoch": 0.2983270561443461, "grad_norm": 0.7203889293325748, "learning_rate": 3.898296836982969e-06, "loss": 0.6351, "step": 10218 }, { "epoch": 0.2983562523721935, "grad_norm": 0.710528910506092, "learning_rate": 3.898134630981347e-06, "loss": 0.6183, "step": 10219 }, { "epoch": 0.29838544860004085, "grad_norm": 0.7098005489734753, "learning_rate": 3.897972424979725e-06, "loss": 0.6381, "step": 10220 }, { "epoch": 0.2984146448278882, "grad_norm": 0.744363599234143, "learning_rate": 3.897810218978103e-06, "loss": 0.6349, "step": 10221 }, { "epoch": 0.2984438410557356, "grad_norm": 0.7067793803931615, "learning_rate": 3.897648012976481e-06, "loss": 0.6134, "step": 10222 }, { "epoch": 0.298473037283583, "grad_norm": 0.7033598383172309, "learning_rate": 3.897485806974859e-06, "loss": 0.5694, "step": 10223 }, { "epoch": 0.29850223351143035, "grad_norm": 0.6697391252356407, "learning_rate": 3.897323600973237e-06, "loss": 0.5675, "step": 10224 }, { "epoch": 0.2985314297392777, "grad_norm": 0.9196225064202449, "learning_rate": 3.897161394971614e-06, "loss": 0.7337, "step": 10225 }, { "epoch": 0.29856062596712507, "grad_norm": 0.7247008230794735, "learning_rate": 3.896999188969992e-06, "loss": 0.6939, "step": 10226 }, { "epoch": 0.29858982219497243, "grad_norm": 0.7176919496527726, "learning_rate": 3.89683698296837e-06, "loss": 0.6237, "step": 10227 }, { "epoch": 0.2986190184228198, "grad_norm": 0.7197547742555301, "learning_rate": 3.896674776966748e-06, "loss": 0.6285, "step": 10228 }, { "epoch": 0.29864821465066715, "grad_norm": 0.6638073799173463, "learning_rate": 3.896512570965126e-06, "loss": 0.587, "step": 10229 }, { "epoch": 0.2986774108785145, "grad_norm": 0.7120450492428158, "learning_rate": 3.896350364963504e-06, "loss": 0.6097, "step": 10230 }, { "epoch": 0.2987066071063619, "grad_norm": 0.7519890673373536, "learning_rate": 3.896188158961882e-06, "loss": 0.7159, "step": 10231 }, { "epoch": 0.29873580333420924, "grad_norm": 0.7666073216705855, "learning_rate": 3.89602595296026e-06, "loss": 0.7431, "step": 10232 }, { "epoch": 0.2987649995620566, "grad_norm": 0.83185160778556, "learning_rate": 3.895863746958638e-06, "loss": 0.7527, "step": 10233 }, { "epoch": 0.29879419578990396, "grad_norm": 0.7828512742659707, "learning_rate": 3.895701540957016e-06, "loss": 0.7463, "step": 10234 }, { "epoch": 0.2988233920177513, "grad_norm": 0.6848043948432524, "learning_rate": 3.895539334955394e-06, "loss": 0.582, "step": 10235 }, { "epoch": 0.2988525882455987, "grad_norm": 0.7285225589124438, "learning_rate": 3.895377128953772e-06, "loss": 0.6213, "step": 10236 }, { "epoch": 0.29888178447344604, "grad_norm": 0.6579870928482592, "learning_rate": 3.89521492295215e-06, "loss": 0.5275, "step": 10237 }, { "epoch": 0.2989109807012934, "grad_norm": 0.711241163563435, "learning_rate": 3.895052716950528e-06, "loss": 0.599, "step": 10238 }, { "epoch": 0.29894017692914077, "grad_norm": 0.7857649278699137, "learning_rate": 3.894890510948906e-06, "loss": 0.7474, "step": 10239 }, { "epoch": 0.2989693731569881, "grad_norm": 0.7537778375835712, "learning_rate": 3.894728304947283e-06, "loss": 0.6841, "step": 10240 }, { "epoch": 0.2989985693848355, "grad_norm": 0.8942029634770264, "learning_rate": 3.894566098945661e-06, "loss": 0.7494, "step": 10241 }, { "epoch": 0.29902776561268285, "grad_norm": 0.7023473326135627, "learning_rate": 3.894403892944039e-06, "loss": 0.5761, "step": 10242 }, { "epoch": 0.2990569618405302, "grad_norm": 0.8107743413199926, "learning_rate": 3.894241686942417e-06, "loss": 0.6748, "step": 10243 }, { "epoch": 0.29908615806837757, "grad_norm": 0.7371393980886201, "learning_rate": 3.894079480940795e-06, "loss": 0.6781, "step": 10244 }, { "epoch": 0.29911535429622493, "grad_norm": 0.7432297584361675, "learning_rate": 3.893917274939173e-06, "loss": 0.6689, "step": 10245 }, { "epoch": 0.2991445505240723, "grad_norm": 0.8174144207022359, "learning_rate": 3.893755068937551e-06, "loss": 0.6811, "step": 10246 }, { "epoch": 0.29917374675191966, "grad_norm": 0.680276568450712, "learning_rate": 3.893592862935929e-06, "loss": 0.5649, "step": 10247 }, { "epoch": 0.299202942979767, "grad_norm": 0.7458854312648117, "learning_rate": 3.893430656934306e-06, "loss": 0.6376, "step": 10248 }, { "epoch": 0.2992321392076144, "grad_norm": 0.7669033778037605, "learning_rate": 3.893268450932684e-06, "loss": 0.6509, "step": 10249 }, { "epoch": 0.29926133543546174, "grad_norm": 0.7642962234952357, "learning_rate": 3.893106244931063e-06, "loss": 0.6956, "step": 10250 }, { "epoch": 0.2992905316633091, "grad_norm": 0.874161375880455, "learning_rate": 3.892944038929441e-06, "loss": 0.6949, "step": 10251 }, { "epoch": 0.29931972789115646, "grad_norm": 0.7332131651518392, "learning_rate": 3.892781832927819e-06, "loss": 0.6765, "step": 10252 }, { "epoch": 0.2993489241190038, "grad_norm": 0.6699000599362495, "learning_rate": 3.892619626926197e-06, "loss": 0.5405, "step": 10253 }, { "epoch": 0.2993781203468512, "grad_norm": 0.9359378403209566, "learning_rate": 3.892457420924574e-06, "loss": 0.645, "step": 10254 }, { "epoch": 0.29940731657469855, "grad_norm": 0.7971194170868573, "learning_rate": 3.892295214922952e-06, "loss": 0.7163, "step": 10255 }, { "epoch": 0.2994365128025459, "grad_norm": 0.7709969468654772, "learning_rate": 3.89213300892133e-06, "loss": 0.7036, "step": 10256 }, { "epoch": 0.29946570903039327, "grad_norm": 0.7232382950067089, "learning_rate": 3.891970802919708e-06, "loss": 0.6421, "step": 10257 }, { "epoch": 0.29949490525824063, "grad_norm": 0.7578724168173905, "learning_rate": 3.891808596918086e-06, "loss": 0.5891, "step": 10258 }, { "epoch": 0.299524101486088, "grad_norm": 0.7927795277475345, "learning_rate": 3.891646390916464e-06, "loss": 0.7146, "step": 10259 }, { "epoch": 0.29955329771393535, "grad_norm": 0.7244106289011428, "learning_rate": 3.891484184914842e-06, "loss": 0.6388, "step": 10260 }, { "epoch": 0.2995824939417827, "grad_norm": 0.7314397781279859, "learning_rate": 3.89132197891322e-06, "loss": 0.6884, "step": 10261 }, { "epoch": 0.2996116901696301, "grad_norm": 0.7607219686356838, "learning_rate": 3.891159772911598e-06, "loss": 0.7013, "step": 10262 }, { "epoch": 0.29964088639747744, "grad_norm": 0.7643987860886913, "learning_rate": 3.8909975669099755e-06, "loss": 0.5589, "step": 10263 }, { "epoch": 0.2996700826253248, "grad_norm": 0.7887266909094316, "learning_rate": 3.8908353609083535e-06, "loss": 0.6847, "step": 10264 }, { "epoch": 0.29969927885317216, "grad_norm": 0.7321861179384429, "learning_rate": 3.8906731549067315e-06, "loss": 0.6278, "step": 10265 }, { "epoch": 0.2997284750810195, "grad_norm": 0.7286078955971721, "learning_rate": 3.8905109489051095e-06, "loss": 0.6923, "step": 10266 }, { "epoch": 0.2997576713088669, "grad_norm": 0.7843905292002008, "learning_rate": 3.8903487429034875e-06, "loss": 0.752, "step": 10267 }, { "epoch": 0.29978686753671424, "grad_norm": 0.748168175420559, "learning_rate": 3.8901865369018655e-06, "loss": 0.6982, "step": 10268 }, { "epoch": 0.2998160637645616, "grad_norm": 0.9572080386287487, "learning_rate": 3.8900243309002435e-06, "loss": 0.6768, "step": 10269 }, { "epoch": 0.29984525999240896, "grad_norm": 0.7539504541870521, "learning_rate": 3.8898621248986215e-06, "loss": 0.672, "step": 10270 }, { "epoch": 0.2998744562202563, "grad_norm": 0.7386531604252196, "learning_rate": 3.8896999188969995e-06, "loss": 0.6695, "step": 10271 }, { "epoch": 0.2999036524481037, "grad_norm": 0.7484109041673137, "learning_rate": 3.8895377128953776e-06, "loss": 0.6997, "step": 10272 }, { "epoch": 0.29993284867595105, "grad_norm": 0.7789804865505962, "learning_rate": 3.8893755068937556e-06, "loss": 0.7251, "step": 10273 }, { "epoch": 0.2999620449037984, "grad_norm": 0.7330075328320788, "learning_rate": 3.8892133008921336e-06, "loss": 0.6233, "step": 10274 }, { "epoch": 0.29999124113164577, "grad_norm": 0.6916233528370545, "learning_rate": 3.889051094890512e-06, "loss": 0.5353, "step": 10275 }, { "epoch": 0.30002043735949313, "grad_norm": 0.6890644907883262, "learning_rate": 3.88888888888889e-06, "loss": 0.606, "step": 10276 }, { "epoch": 0.3000496335873405, "grad_norm": 0.8129947920202801, "learning_rate": 3.888726682887268e-06, "loss": 0.7025, "step": 10277 }, { "epoch": 0.30007882981518785, "grad_norm": 0.7730664506781141, "learning_rate": 3.888564476885645e-06, "loss": 0.7591, "step": 10278 }, { "epoch": 0.3001080260430352, "grad_norm": 0.6909842541874572, "learning_rate": 3.888402270884023e-06, "loss": 0.5855, "step": 10279 }, { "epoch": 0.3001372222708826, "grad_norm": 0.782226811401294, "learning_rate": 3.888240064882401e-06, "loss": 0.6514, "step": 10280 }, { "epoch": 0.30016641849872994, "grad_norm": 0.8180705522333794, "learning_rate": 3.888077858880779e-06, "loss": 0.7275, "step": 10281 }, { "epoch": 0.30019561472657735, "grad_norm": 0.7255705171829635, "learning_rate": 3.887915652879157e-06, "loss": 0.6686, "step": 10282 }, { "epoch": 0.3002248109544247, "grad_norm": 0.7478580145191486, "learning_rate": 3.887753446877535e-06, "loss": 0.694, "step": 10283 }, { "epoch": 0.3002540071822721, "grad_norm": 0.7406120583261625, "learning_rate": 3.887591240875913e-06, "loss": 0.7305, "step": 10284 }, { "epoch": 0.30028320341011944, "grad_norm": 0.7321428523264417, "learning_rate": 3.887429034874291e-06, "loss": 0.6806, "step": 10285 }, { "epoch": 0.3003123996379668, "grad_norm": 0.7710596095528899, "learning_rate": 3.887266828872668e-06, "loss": 0.678, "step": 10286 }, { "epoch": 0.30034159586581416, "grad_norm": 0.6929123936190777, "learning_rate": 3.887104622871046e-06, "loss": 0.5719, "step": 10287 }, { "epoch": 0.3003707920936615, "grad_norm": 0.7605803212196816, "learning_rate": 3.886942416869425e-06, "loss": 0.6944, "step": 10288 }, { "epoch": 0.3003999883215089, "grad_norm": 0.7509222503655367, "learning_rate": 3.886780210867803e-06, "loss": 0.7371, "step": 10289 }, { "epoch": 0.30042918454935624, "grad_norm": 0.705948741270431, "learning_rate": 3.886618004866181e-06, "loss": 0.6268, "step": 10290 }, { "epoch": 0.3004583807772036, "grad_norm": 0.812046032308862, "learning_rate": 3.886455798864559e-06, "loss": 0.6829, "step": 10291 }, { "epoch": 0.30048757700505097, "grad_norm": 0.7926045816342434, "learning_rate": 3.886293592862936e-06, "loss": 0.6609, "step": 10292 }, { "epoch": 0.3005167732328983, "grad_norm": 0.731954636335183, "learning_rate": 3.886131386861314e-06, "loss": 0.7059, "step": 10293 }, { "epoch": 0.3005459694607457, "grad_norm": 0.7837509061044154, "learning_rate": 3.885969180859692e-06, "loss": 0.7297, "step": 10294 }, { "epoch": 0.30057516568859305, "grad_norm": 0.9100898177870443, "learning_rate": 3.88580697485807e-06, "loss": 0.6956, "step": 10295 }, { "epoch": 0.3006043619164404, "grad_norm": 0.7448026940048514, "learning_rate": 3.885644768856448e-06, "loss": 0.6727, "step": 10296 }, { "epoch": 0.30063355814428777, "grad_norm": 0.7521449557218922, "learning_rate": 3.885482562854826e-06, "loss": 0.7017, "step": 10297 }, { "epoch": 0.30066275437213513, "grad_norm": 0.7265080872809455, "learning_rate": 3.885320356853204e-06, "loss": 0.6305, "step": 10298 }, { "epoch": 0.3006919505999825, "grad_norm": 0.750011408647148, "learning_rate": 3.885158150851582e-06, "loss": 0.7053, "step": 10299 }, { "epoch": 0.30072114682782985, "grad_norm": 0.742660479667654, "learning_rate": 3.88499594484996e-06, "loss": 0.6994, "step": 10300 }, { "epoch": 0.3007503430556772, "grad_norm": 0.7394731466857425, "learning_rate": 3.884833738848337e-06, "loss": 0.6575, "step": 10301 }, { "epoch": 0.3007795392835246, "grad_norm": 0.7342221775398726, "learning_rate": 3.884671532846715e-06, "loss": 0.6147, "step": 10302 }, { "epoch": 0.30080873551137194, "grad_norm": 0.7211423249402009, "learning_rate": 3.884509326845093e-06, "loss": 0.6241, "step": 10303 }, { "epoch": 0.3008379317392193, "grad_norm": 0.7114570614900889, "learning_rate": 3.884347120843471e-06, "loss": 0.6397, "step": 10304 }, { "epoch": 0.30086712796706666, "grad_norm": 0.7906924058632732, "learning_rate": 3.884184914841849e-06, "loss": 0.667, "step": 10305 }, { "epoch": 0.300896324194914, "grad_norm": 0.7715645488449918, "learning_rate": 3.884022708840227e-06, "loss": 0.6744, "step": 10306 }, { "epoch": 0.3009255204227614, "grad_norm": 0.7681598422088453, "learning_rate": 3.883860502838605e-06, "loss": 0.5986, "step": 10307 }, { "epoch": 0.30095471665060874, "grad_norm": 0.7501601519641529, "learning_rate": 3.883698296836983e-06, "loss": 0.6528, "step": 10308 }, { "epoch": 0.3009839128784561, "grad_norm": 0.6978016704466213, "learning_rate": 3.883536090835361e-06, "loss": 0.6308, "step": 10309 }, { "epoch": 0.30101310910630347, "grad_norm": 0.9632715336094554, "learning_rate": 3.883373884833739e-06, "loss": 0.7343, "step": 10310 }, { "epoch": 0.30104230533415083, "grad_norm": 0.7185059898894586, "learning_rate": 3.883211678832117e-06, "loss": 0.6363, "step": 10311 }, { "epoch": 0.3010715015619982, "grad_norm": 0.7557540981636807, "learning_rate": 3.883049472830495e-06, "loss": 0.6599, "step": 10312 }, { "epoch": 0.30110069778984555, "grad_norm": 0.74319381699955, "learning_rate": 3.882887266828873e-06, "loss": 0.663, "step": 10313 }, { "epoch": 0.3011298940176929, "grad_norm": 0.7498073554165305, "learning_rate": 3.882725060827251e-06, "loss": 0.7106, "step": 10314 }, { "epoch": 0.3011590902455403, "grad_norm": 0.7500497233630192, "learning_rate": 3.882562854825629e-06, "loss": 0.6224, "step": 10315 }, { "epoch": 0.30118828647338763, "grad_norm": 0.7850749391775743, "learning_rate": 3.8824006488240065e-06, "loss": 0.6726, "step": 10316 }, { "epoch": 0.301217482701235, "grad_norm": 0.7274159970789372, "learning_rate": 3.8822384428223845e-06, "loss": 0.6747, "step": 10317 }, { "epoch": 0.30124667892908236, "grad_norm": 0.7777200639691698, "learning_rate": 3.8820762368207625e-06, "loss": 0.7295, "step": 10318 }, { "epoch": 0.3012758751569297, "grad_norm": 0.9601073519200573, "learning_rate": 3.8819140308191405e-06, "loss": 0.7083, "step": 10319 }, { "epoch": 0.3013050713847771, "grad_norm": 0.6957202538429782, "learning_rate": 3.8817518248175185e-06, "loss": 0.6261, "step": 10320 }, { "epoch": 0.30133426761262444, "grad_norm": 0.8146179904862877, "learning_rate": 3.8815896188158965e-06, "loss": 0.7845, "step": 10321 }, { "epoch": 0.3013634638404718, "grad_norm": 0.8657253573191303, "learning_rate": 3.8814274128142745e-06, "loss": 0.7467, "step": 10322 }, { "epoch": 0.30139266006831916, "grad_norm": 0.7332492255133113, "learning_rate": 3.8812652068126525e-06, "loss": 0.6421, "step": 10323 }, { "epoch": 0.3014218562961665, "grad_norm": 0.7142427439084971, "learning_rate": 3.88110300081103e-06, "loss": 0.6372, "step": 10324 }, { "epoch": 0.3014510525240139, "grad_norm": 0.7014076829477148, "learning_rate": 3.880940794809408e-06, "loss": 0.6343, "step": 10325 }, { "epoch": 0.30148024875186125, "grad_norm": 0.7391227512449995, "learning_rate": 3.8807785888077865e-06, "loss": 0.6974, "step": 10326 }, { "epoch": 0.3015094449797086, "grad_norm": 0.8507211232096228, "learning_rate": 3.8806163828061645e-06, "loss": 0.698, "step": 10327 }, { "epoch": 0.30153864120755597, "grad_norm": 0.7242810742684246, "learning_rate": 3.8804541768045425e-06, "loss": 0.6255, "step": 10328 }, { "epoch": 0.30156783743540333, "grad_norm": 0.7902076074770041, "learning_rate": 3.8802919708029206e-06, "loss": 0.7033, "step": 10329 }, { "epoch": 0.3015970336632507, "grad_norm": 0.8253098583115642, "learning_rate": 3.880129764801298e-06, "loss": 0.7183, "step": 10330 }, { "epoch": 0.30162622989109805, "grad_norm": 0.7042441572038496, "learning_rate": 3.879967558799676e-06, "loss": 0.6368, "step": 10331 }, { "epoch": 0.3016554261189454, "grad_norm": 0.739465221304971, "learning_rate": 3.879805352798054e-06, "loss": 0.6753, "step": 10332 }, { "epoch": 0.3016846223467928, "grad_norm": 0.7474615583132855, "learning_rate": 3.879643146796432e-06, "loss": 0.6922, "step": 10333 }, { "epoch": 0.30171381857464014, "grad_norm": 0.8490272089829549, "learning_rate": 3.87948094079481e-06, "loss": 0.7146, "step": 10334 }, { "epoch": 0.3017430148024875, "grad_norm": 0.729273604665576, "learning_rate": 3.879318734793188e-06, "loss": 0.6426, "step": 10335 }, { "epoch": 0.30177221103033486, "grad_norm": 0.8083614508100928, "learning_rate": 3.879156528791566e-06, "loss": 0.7656, "step": 10336 }, { "epoch": 0.3018014072581822, "grad_norm": 0.7723994450924845, "learning_rate": 3.878994322789944e-06, "loss": 0.7228, "step": 10337 }, { "epoch": 0.3018306034860296, "grad_norm": 0.7601493950989424, "learning_rate": 3.878832116788322e-06, "loss": 0.7264, "step": 10338 }, { "epoch": 0.30185979971387694, "grad_norm": 0.8061146556100058, "learning_rate": 3.878669910786699e-06, "loss": 0.7195, "step": 10339 }, { "epoch": 0.3018889959417243, "grad_norm": 0.7702908888045544, "learning_rate": 3.878507704785077e-06, "loss": 0.6361, "step": 10340 }, { "epoch": 0.30191819216957166, "grad_norm": 0.7009255073140493, "learning_rate": 3.878345498783455e-06, "loss": 0.6112, "step": 10341 }, { "epoch": 0.3019473883974191, "grad_norm": 0.7923637640244686, "learning_rate": 3.878183292781833e-06, "loss": 0.7231, "step": 10342 }, { "epoch": 0.30197658462526644, "grad_norm": 0.8537496701679399, "learning_rate": 3.878021086780211e-06, "loss": 0.6935, "step": 10343 }, { "epoch": 0.3020057808531138, "grad_norm": 0.6760493762575082, "learning_rate": 3.877858880778589e-06, "loss": 0.6035, "step": 10344 }, { "epoch": 0.30203497708096116, "grad_norm": 0.7911148900267894, "learning_rate": 3.877696674776967e-06, "loss": 0.8235, "step": 10345 }, { "epoch": 0.3020641733088085, "grad_norm": 0.7459880759402711, "learning_rate": 3.877534468775345e-06, "loss": 0.7373, "step": 10346 }, { "epoch": 0.3020933695366559, "grad_norm": 0.7251772351941124, "learning_rate": 3.877372262773723e-06, "loss": 0.5889, "step": 10347 }, { "epoch": 0.30212256576450325, "grad_norm": 0.7256956444212663, "learning_rate": 3.877210056772101e-06, "loss": 0.6244, "step": 10348 }, { "epoch": 0.3021517619923506, "grad_norm": 0.7327884286400789, "learning_rate": 3.877047850770479e-06, "loss": 0.6536, "step": 10349 }, { "epoch": 0.30218095822019797, "grad_norm": 0.7309310434634854, "learning_rate": 3.876885644768857e-06, "loss": 0.629, "step": 10350 }, { "epoch": 0.30221015444804533, "grad_norm": 0.6865112099876234, "learning_rate": 3.876723438767235e-06, "loss": 0.5889, "step": 10351 }, { "epoch": 0.3022393506758927, "grad_norm": 0.7885946067512639, "learning_rate": 3.876561232765613e-06, "loss": 0.6524, "step": 10352 }, { "epoch": 0.30226854690374005, "grad_norm": 0.6968515297364436, "learning_rate": 3.876399026763991e-06, "loss": 0.612, "step": 10353 }, { "epoch": 0.3022977431315874, "grad_norm": 0.7983442339983812, "learning_rate": 3.876236820762368e-06, "loss": 0.6566, "step": 10354 }, { "epoch": 0.3023269393594348, "grad_norm": 0.7507700223307143, "learning_rate": 3.876074614760746e-06, "loss": 0.7053, "step": 10355 }, { "epoch": 0.30235613558728214, "grad_norm": 0.746862863971519, "learning_rate": 3.875912408759124e-06, "loss": 0.6845, "step": 10356 }, { "epoch": 0.3023853318151295, "grad_norm": 0.787022542291878, "learning_rate": 3.875750202757502e-06, "loss": 0.7452, "step": 10357 }, { "epoch": 0.30241452804297686, "grad_norm": 0.7924437874331334, "learning_rate": 3.87558799675588e-06, "loss": 0.6707, "step": 10358 }, { "epoch": 0.3024437242708242, "grad_norm": 0.7203736189013372, "learning_rate": 3.875425790754258e-06, "loss": 0.6781, "step": 10359 }, { "epoch": 0.3024729204986716, "grad_norm": 0.7250402736577357, "learning_rate": 3.875263584752636e-06, "loss": 0.6046, "step": 10360 }, { "epoch": 0.30250211672651894, "grad_norm": 0.7295455484455241, "learning_rate": 3.875101378751014e-06, "loss": 0.6354, "step": 10361 }, { "epoch": 0.3025313129543663, "grad_norm": 0.7277763016108355, "learning_rate": 3.874939172749391e-06, "loss": 0.6415, "step": 10362 }, { "epoch": 0.30256050918221367, "grad_norm": 0.7493929640089935, "learning_rate": 3.874776966747769e-06, "loss": 0.6613, "step": 10363 }, { "epoch": 0.302589705410061, "grad_norm": 0.9989185658232755, "learning_rate": 3.874614760746148e-06, "loss": 0.7582, "step": 10364 }, { "epoch": 0.3026189016379084, "grad_norm": 0.7102467473065648, "learning_rate": 3.874452554744526e-06, "loss": 0.6556, "step": 10365 }, { "epoch": 0.30264809786575575, "grad_norm": 0.7682753761737697, "learning_rate": 3.874290348742904e-06, "loss": 0.7838, "step": 10366 }, { "epoch": 0.3026772940936031, "grad_norm": 0.7449749432277496, "learning_rate": 3.874128142741282e-06, "loss": 0.6318, "step": 10367 }, { "epoch": 0.30270649032145047, "grad_norm": 0.7499571126000578, "learning_rate": 3.8739659367396594e-06, "loss": 0.6695, "step": 10368 }, { "epoch": 0.30273568654929783, "grad_norm": 0.6799726572478777, "learning_rate": 3.8738037307380374e-06, "loss": 0.5726, "step": 10369 }, { "epoch": 0.3027648827771452, "grad_norm": 0.6766985222171678, "learning_rate": 3.8736415247364154e-06, "loss": 0.5589, "step": 10370 }, { "epoch": 0.30279407900499256, "grad_norm": 1.00115964285325, "learning_rate": 3.8734793187347935e-06, "loss": 0.6634, "step": 10371 }, { "epoch": 0.3028232752328399, "grad_norm": 0.744972690454089, "learning_rate": 3.8733171127331715e-06, "loss": 0.7301, "step": 10372 }, { "epoch": 0.3028524714606873, "grad_norm": 0.7676467268704107, "learning_rate": 3.8731549067315495e-06, "loss": 0.7561, "step": 10373 }, { "epoch": 0.30288166768853464, "grad_norm": 0.787023597111024, "learning_rate": 3.8729927007299275e-06, "loss": 0.7524, "step": 10374 }, { "epoch": 0.302910863916382, "grad_norm": 0.8237287440780903, "learning_rate": 3.8728304947283055e-06, "loss": 0.7266, "step": 10375 }, { "epoch": 0.30294006014422936, "grad_norm": 0.7563165021979661, "learning_rate": 3.8726682887266835e-06, "loss": 0.6825, "step": 10376 }, { "epoch": 0.3029692563720767, "grad_norm": 0.70502431903248, "learning_rate": 3.872506082725061e-06, "loss": 0.5776, "step": 10377 }, { "epoch": 0.3029984525999241, "grad_norm": 0.802170401685247, "learning_rate": 3.872343876723439e-06, "loss": 0.7181, "step": 10378 }, { "epoch": 0.30302764882777145, "grad_norm": 0.7628318794129003, "learning_rate": 3.872181670721817e-06, "loss": 0.6917, "step": 10379 }, { "epoch": 0.3030568450556188, "grad_norm": 0.7204306889224137, "learning_rate": 3.872019464720195e-06, "loss": 0.6654, "step": 10380 }, { "epoch": 0.30308604128346617, "grad_norm": 0.739825637201744, "learning_rate": 3.871857258718573e-06, "loss": 0.6747, "step": 10381 }, { "epoch": 0.30311523751131353, "grad_norm": 0.7546184996319145, "learning_rate": 3.8716950527169515e-06, "loss": 0.7586, "step": 10382 }, { "epoch": 0.3031444337391609, "grad_norm": 0.9288468643998761, "learning_rate": 3.871532846715329e-06, "loss": 0.737, "step": 10383 }, { "epoch": 0.30317362996700825, "grad_norm": 0.804302580553744, "learning_rate": 3.871370640713707e-06, "loss": 0.6109, "step": 10384 }, { "epoch": 0.3032028261948556, "grad_norm": 0.8326512175952191, "learning_rate": 3.871208434712085e-06, "loss": 0.6932, "step": 10385 }, { "epoch": 0.303232022422703, "grad_norm": 0.8293768319513971, "learning_rate": 3.871046228710463e-06, "loss": 0.7122, "step": 10386 }, { "epoch": 0.30326121865055033, "grad_norm": 0.7536403548700819, "learning_rate": 3.870884022708841e-06, "loss": 0.5459, "step": 10387 }, { "epoch": 0.3032904148783977, "grad_norm": 0.7675254563251804, "learning_rate": 3.870721816707219e-06, "loss": 0.5694, "step": 10388 }, { "epoch": 0.30331961110624506, "grad_norm": 0.7577677525580528, "learning_rate": 3.870559610705597e-06, "loss": 0.6764, "step": 10389 }, { "epoch": 0.3033488073340924, "grad_norm": 0.7863656867635127, "learning_rate": 3.870397404703975e-06, "loss": 0.7318, "step": 10390 }, { "epoch": 0.3033780035619398, "grad_norm": 0.9070032709677629, "learning_rate": 3.870235198702352e-06, "loss": 0.7038, "step": 10391 }, { "epoch": 0.30340719978978714, "grad_norm": 0.7409784289609759, "learning_rate": 3.87007299270073e-06, "loss": 0.6554, "step": 10392 }, { "epoch": 0.3034363960176345, "grad_norm": 0.7101583025922444, "learning_rate": 3.869910786699108e-06, "loss": 0.6768, "step": 10393 }, { "epoch": 0.30346559224548186, "grad_norm": 0.7077857608498871, "learning_rate": 3.869748580697486e-06, "loss": 0.5994, "step": 10394 }, { "epoch": 0.3034947884733292, "grad_norm": 0.7879449317326663, "learning_rate": 3.869586374695864e-06, "loss": 0.7248, "step": 10395 }, { "epoch": 0.3035239847011766, "grad_norm": 0.7939026822141958, "learning_rate": 3.869424168694242e-06, "loss": 0.7881, "step": 10396 }, { "epoch": 0.30355318092902395, "grad_norm": 0.6896786627447179, "learning_rate": 3.86926196269262e-06, "loss": 0.6314, "step": 10397 }, { "epoch": 0.3035823771568713, "grad_norm": 0.7205686235018062, "learning_rate": 3.869099756690998e-06, "loss": 0.626, "step": 10398 }, { "epoch": 0.30361157338471867, "grad_norm": 0.815162387295659, "learning_rate": 3.868937550689376e-06, "loss": 0.6696, "step": 10399 }, { "epoch": 0.30364076961256603, "grad_norm": 0.6811938278807459, "learning_rate": 3.868775344687753e-06, "loss": 0.5719, "step": 10400 }, { "epoch": 0.3036699658404134, "grad_norm": 0.7424019703118746, "learning_rate": 3.868613138686132e-06, "loss": 0.6601, "step": 10401 }, { "epoch": 0.3036991620682608, "grad_norm": 0.8181172634308711, "learning_rate": 3.86845093268451e-06, "loss": 0.7422, "step": 10402 }, { "epoch": 0.30372835829610817, "grad_norm": 0.8729968437473337, "learning_rate": 3.868288726682888e-06, "loss": 0.6942, "step": 10403 }, { "epoch": 0.30375755452395553, "grad_norm": 0.7601361252052314, "learning_rate": 3.868126520681266e-06, "loss": 0.6987, "step": 10404 }, { "epoch": 0.3037867507518029, "grad_norm": 0.8870644423767216, "learning_rate": 3.867964314679644e-06, "loss": 0.6695, "step": 10405 }, { "epoch": 0.30381594697965025, "grad_norm": 0.7407218062257772, "learning_rate": 3.867802108678021e-06, "loss": 0.6921, "step": 10406 }, { "epoch": 0.3038451432074976, "grad_norm": 0.681910444563217, "learning_rate": 3.867639902676399e-06, "loss": 0.6374, "step": 10407 }, { "epoch": 0.303874339435345, "grad_norm": 1.242022829094162, "learning_rate": 3.867477696674777e-06, "loss": 0.6754, "step": 10408 }, { "epoch": 0.30390353566319234, "grad_norm": 0.7373393096801701, "learning_rate": 3.867315490673155e-06, "loss": 0.6066, "step": 10409 }, { "epoch": 0.3039327318910397, "grad_norm": 0.7533125140401055, "learning_rate": 3.867153284671533e-06, "loss": 0.6921, "step": 10410 }, { "epoch": 0.30396192811888706, "grad_norm": 0.8316428411760355, "learning_rate": 3.866991078669911e-06, "loss": 0.7071, "step": 10411 }, { "epoch": 0.3039911243467344, "grad_norm": 0.7711939000191605, "learning_rate": 3.866828872668289e-06, "loss": 0.7815, "step": 10412 }, { "epoch": 0.3040203205745818, "grad_norm": 0.8041205754557794, "learning_rate": 3.866666666666667e-06, "loss": 0.7476, "step": 10413 }, { "epoch": 0.30404951680242914, "grad_norm": 0.7770003585869739, "learning_rate": 3.866504460665045e-06, "loss": 0.7367, "step": 10414 }, { "epoch": 0.3040787130302765, "grad_norm": 0.666138886509599, "learning_rate": 3.866342254663422e-06, "loss": 0.583, "step": 10415 }, { "epoch": 0.30410790925812387, "grad_norm": 0.7684698067346415, "learning_rate": 3.8661800486618e-06, "loss": 0.6646, "step": 10416 }, { "epoch": 0.3041371054859712, "grad_norm": 0.6741084218220411, "learning_rate": 3.866017842660178e-06, "loss": 0.5909, "step": 10417 }, { "epoch": 0.3041663017138186, "grad_norm": 0.711513999020629, "learning_rate": 3.865855636658556e-06, "loss": 0.6049, "step": 10418 }, { "epoch": 0.30419549794166595, "grad_norm": 0.7286545940908146, "learning_rate": 3.865693430656934e-06, "loss": 0.7008, "step": 10419 }, { "epoch": 0.3042246941695133, "grad_norm": 0.7380623753253643, "learning_rate": 3.865531224655313e-06, "loss": 0.6154, "step": 10420 }, { "epoch": 0.30425389039736067, "grad_norm": 0.7959751458231386, "learning_rate": 3.86536901865369e-06, "loss": 0.6931, "step": 10421 }, { "epoch": 0.30428308662520803, "grad_norm": 0.6694347366761256, "learning_rate": 3.865206812652068e-06, "loss": 0.603, "step": 10422 }, { "epoch": 0.3043122828530554, "grad_norm": 0.727105988964675, "learning_rate": 3.865044606650446e-06, "loss": 0.6093, "step": 10423 }, { "epoch": 0.30434147908090275, "grad_norm": 0.7416496667062579, "learning_rate": 3.8648824006488244e-06, "loss": 0.7018, "step": 10424 }, { "epoch": 0.3043706753087501, "grad_norm": 0.7596329840255931, "learning_rate": 3.8647201946472024e-06, "loss": 0.7051, "step": 10425 }, { "epoch": 0.3043998715365975, "grad_norm": 0.7570780613953889, "learning_rate": 3.8645579886455804e-06, "loss": 0.6801, "step": 10426 }, { "epoch": 0.30442906776444484, "grad_norm": 0.6952268421849329, "learning_rate": 3.8643957826439584e-06, "loss": 0.5746, "step": 10427 }, { "epoch": 0.3044582639922922, "grad_norm": 1.5159573499296919, "learning_rate": 3.8642335766423365e-06, "loss": 0.7087, "step": 10428 }, { "epoch": 0.30448746022013956, "grad_norm": 0.77208479366301, "learning_rate": 3.864071370640714e-06, "loss": 0.6975, "step": 10429 }, { "epoch": 0.3045166564479869, "grad_norm": 0.7186527837343221, "learning_rate": 3.863909164639092e-06, "loss": 0.601, "step": 10430 }, { "epoch": 0.3045458526758343, "grad_norm": 0.7396447578873752, "learning_rate": 3.86374695863747e-06, "loss": 0.6815, "step": 10431 }, { "epoch": 0.30457504890368164, "grad_norm": 0.7557180871508107, "learning_rate": 3.863584752635848e-06, "loss": 0.6413, "step": 10432 }, { "epoch": 0.304604245131529, "grad_norm": 0.7604921012769746, "learning_rate": 3.863422546634226e-06, "loss": 0.7065, "step": 10433 }, { "epoch": 0.30463344135937637, "grad_norm": 0.7774975936753759, "learning_rate": 3.863260340632604e-06, "loss": 0.7363, "step": 10434 }, { "epoch": 0.30466263758722373, "grad_norm": 0.7389115076325861, "learning_rate": 3.863098134630982e-06, "loss": 0.6983, "step": 10435 }, { "epoch": 0.3046918338150711, "grad_norm": 0.9262392548463572, "learning_rate": 3.86293592862936e-06, "loss": 0.8449, "step": 10436 }, { "epoch": 0.30472103004291845, "grad_norm": 0.8151014875735801, "learning_rate": 3.862773722627738e-06, "loss": 0.654, "step": 10437 }, { "epoch": 0.3047502262707658, "grad_norm": 0.8053358848445259, "learning_rate": 3.862611516626115e-06, "loss": 0.7349, "step": 10438 }, { "epoch": 0.3047794224986132, "grad_norm": 0.7089838879597232, "learning_rate": 3.862449310624494e-06, "loss": 0.5383, "step": 10439 }, { "epoch": 0.30480861872646053, "grad_norm": 0.7926707284238325, "learning_rate": 3.862287104622872e-06, "loss": 0.7397, "step": 10440 }, { "epoch": 0.3048378149543079, "grad_norm": 0.69684746467784, "learning_rate": 3.86212489862125e-06, "loss": 0.6336, "step": 10441 }, { "epoch": 0.30486701118215526, "grad_norm": 0.7480678990145133, "learning_rate": 3.861962692619628e-06, "loss": 0.6675, "step": 10442 }, { "epoch": 0.3048962074100026, "grad_norm": 0.7464016990052349, "learning_rate": 3.861800486618006e-06, "loss": 0.6852, "step": 10443 }, { "epoch": 0.30492540363785, "grad_norm": 0.7215762874963259, "learning_rate": 3.861638280616383e-06, "loss": 0.6281, "step": 10444 }, { "epoch": 0.30495459986569734, "grad_norm": 0.7134152040836216, "learning_rate": 3.861476074614761e-06, "loss": 0.6117, "step": 10445 }, { "epoch": 0.3049837960935447, "grad_norm": 0.7348056268380055, "learning_rate": 3.861313868613139e-06, "loss": 0.6511, "step": 10446 }, { "epoch": 0.30501299232139206, "grad_norm": 0.7434121375943107, "learning_rate": 3.861151662611517e-06, "loss": 0.6865, "step": 10447 }, { "epoch": 0.3050421885492394, "grad_norm": 0.720753855843977, "learning_rate": 3.860989456609895e-06, "loss": 0.6602, "step": 10448 }, { "epoch": 0.3050713847770868, "grad_norm": 0.6938628953909141, "learning_rate": 3.860827250608273e-06, "loss": 0.6169, "step": 10449 }, { "epoch": 0.30510058100493415, "grad_norm": 0.692352725053397, "learning_rate": 3.860665044606651e-06, "loss": 0.6389, "step": 10450 }, { "epoch": 0.3051297772327815, "grad_norm": 0.7567751972746057, "learning_rate": 3.860502838605029e-06, "loss": 0.6393, "step": 10451 }, { "epoch": 0.30515897346062887, "grad_norm": 0.7353665563110525, "learning_rate": 3.860340632603407e-06, "loss": 0.6523, "step": 10452 }, { "epoch": 0.30518816968847623, "grad_norm": 0.7086442650252158, "learning_rate": 3.860178426601784e-06, "loss": 0.6534, "step": 10453 }, { "epoch": 0.3052173659163236, "grad_norm": 0.7055160752242229, "learning_rate": 3.860016220600162e-06, "loss": 0.618, "step": 10454 }, { "epoch": 0.30524656214417095, "grad_norm": 0.7723830633174733, "learning_rate": 3.85985401459854e-06, "loss": 0.7185, "step": 10455 }, { "epoch": 0.3052757583720183, "grad_norm": 0.7553088308872745, "learning_rate": 3.859691808596918e-06, "loss": 0.7011, "step": 10456 }, { "epoch": 0.3053049545998657, "grad_norm": 0.7413074320300872, "learning_rate": 3.859529602595296e-06, "loss": 0.6308, "step": 10457 }, { "epoch": 0.30533415082771304, "grad_norm": 0.6956813905278832, "learning_rate": 3.859367396593675e-06, "loss": 0.5964, "step": 10458 }, { "epoch": 0.3053633470555604, "grad_norm": 0.7541422764591998, "learning_rate": 3.859205190592052e-06, "loss": 0.698, "step": 10459 }, { "epoch": 0.30539254328340776, "grad_norm": 0.7011047908758661, "learning_rate": 3.85904298459043e-06, "loss": 0.6114, "step": 10460 }, { "epoch": 0.3054217395112551, "grad_norm": 0.6686703335102328, "learning_rate": 3.858880778588808e-06, "loss": 0.533, "step": 10461 }, { "epoch": 0.3054509357391025, "grad_norm": 0.7390491724906684, "learning_rate": 3.858718572587186e-06, "loss": 0.6363, "step": 10462 }, { "epoch": 0.3054801319669499, "grad_norm": 0.7128228104436282, "learning_rate": 3.858556366585564e-06, "loss": 0.6349, "step": 10463 }, { "epoch": 0.30550932819479726, "grad_norm": 0.75083739085922, "learning_rate": 3.858394160583942e-06, "loss": 0.7032, "step": 10464 }, { "epoch": 0.3055385244226446, "grad_norm": 0.711535028498859, "learning_rate": 3.85823195458232e-06, "loss": 0.6481, "step": 10465 }, { "epoch": 0.305567720650492, "grad_norm": 0.7790959362958234, "learning_rate": 3.858069748580698e-06, "loss": 0.7358, "step": 10466 }, { "epoch": 0.30559691687833934, "grad_norm": 0.7566909792966334, "learning_rate": 3.857907542579075e-06, "loss": 0.7423, "step": 10467 }, { "epoch": 0.3056261131061867, "grad_norm": 0.7215639427687279, "learning_rate": 3.857745336577453e-06, "loss": 0.6475, "step": 10468 }, { "epoch": 0.30565530933403406, "grad_norm": 0.7447406225387438, "learning_rate": 3.857583130575831e-06, "loss": 0.6493, "step": 10469 }, { "epoch": 0.3056845055618814, "grad_norm": 0.7553435939189254, "learning_rate": 3.857420924574209e-06, "loss": 0.7159, "step": 10470 }, { "epoch": 0.3057137017897288, "grad_norm": 0.7723849298813147, "learning_rate": 3.857258718572587e-06, "loss": 0.7582, "step": 10471 }, { "epoch": 0.30574289801757615, "grad_norm": 0.6931753215376008, "learning_rate": 3.857096512570965e-06, "loss": 0.5965, "step": 10472 }, { "epoch": 0.3057720942454235, "grad_norm": 0.7222362831321258, "learning_rate": 3.856934306569343e-06, "loss": 0.6862, "step": 10473 }, { "epoch": 0.30580129047327087, "grad_norm": 0.719415281980838, "learning_rate": 3.856772100567721e-06, "loss": 0.6296, "step": 10474 }, { "epoch": 0.30583048670111823, "grad_norm": 0.7524485036982249, "learning_rate": 3.856609894566099e-06, "loss": 0.6798, "step": 10475 }, { "epoch": 0.3058596829289656, "grad_norm": 0.7643314221760672, "learning_rate": 3.8564476885644765e-06, "loss": 0.7334, "step": 10476 }, { "epoch": 0.30588887915681295, "grad_norm": 0.7199866780149597, "learning_rate": 3.856285482562855e-06, "loss": 0.6609, "step": 10477 }, { "epoch": 0.3059180753846603, "grad_norm": 0.7804394886771392, "learning_rate": 3.856123276561233e-06, "loss": 0.6548, "step": 10478 }, { "epoch": 0.3059472716125077, "grad_norm": 0.7281825126471052, "learning_rate": 3.855961070559611e-06, "loss": 0.661, "step": 10479 }, { "epoch": 0.30597646784035504, "grad_norm": 0.6468459075160191, "learning_rate": 3.855798864557989e-06, "loss": 0.557, "step": 10480 }, { "epoch": 0.3060056640682024, "grad_norm": 0.6814880941304755, "learning_rate": 3.8556366585563674e-06, "loss": 0.5923, "step": 10481 }, { "epoch": 0.30603486029604976, "grad_norm": 0.749021633762924, "learning_rate": 3.855474452554745e-06, "loss": 0.7004, "step": 10482 }, { "epoch": 0.3060640565238971, "grad_norm": 0.7214551358331144, "learning_rate": 3.855312246553123e-06, "loss": 0.6299, "step": 10483 }, { "epoch": 0.3060932527517445, "grad_norm": 0.7199790770781325, "learning_rate": 3.855150040551501e-06, "loss": 0.6378, "step": 10484 }, { "epoch": 0.30612244897959184, "grad_norm": 0.7500086029370636, "learning_rate": 3.854987834549879e-06, "loss": 0.7329, "step": 10485 }, { "epoch": 0.3061516452074392, "grad_norm": 0.6998544025516156, "learning_rate": 3.854825628548257e-06, "loss": 0.6174, "step": 10486 }, { "epoch": 0.30618084143528657, "grad_norm": 0.7274906955906963, "learning_rate": 3.854663422546635e-06, "loss": 0.6031, "step": 10487 }, { "epoch": 0.3062100376631339, "grad_norm": 0.7020350174492777, "learning_rate": 3.854501216545013e-06, "loss": 0.599, "step": 10488 }, { "epoch": 0.3062392338909813, "grad_norm": 0.7409442002302564, "learning_rate": 3.854339010543391e-06, "loss": 0.6113, "step": 10489 }, { "epoch": 0.30626843011882865, "grad_norm": 0.6979945157032276, "learning_rate": 3.854176804541769e-06, "loss": 0.6387, "step": 10490 }, { "epoch": 0.306297626346676, "grad_norm": 0.7863652510721993, "learning_rate": 3.854014598540146e-06, "loss": 0.6898, "step": 10491 }, { "epoch": 0.30632682257452337, "grad_norm": 0.735584435522913, "learning_rate": 3.853852392538524e-06, "loss": 0.7153, "step": 10492 }, { "epoch": 0.30635601880237073, "grad_norm": 0.7606627761590719, "learning_rate": 3.853690186536902e-06, "loss": 0.7105, "step": 10493 }, { "epoch": 0.3063852150302181, "grad_norm": 0.7088559455225608, "learning_rate": 3.85352798053528e-06, "loss": 0.6261, "step": 10494 }, { "epoch": 0.30641441125806546, "grad_norm": 0.7617741769026559, "learning_rate": 3.853365774533658e-06, "loss": 0.6721, "step": 10495 }, { "epoch": 0.3064436074859128, "grad_norm": 0.7585207024111386, "learning_rate": 3.853203568532036e-06, "loss": 0.7123, "step": 10496 }, { "epoch": 0.3064728037137602, "grad_norm": 0.7589661853441184, "learning_rate": 3.853041362530414e-06, "loss": 0.6749, "step": 10497 }, { "epoch": 0.30650199994160754, "grad_norm": 0.7830009388546388, "learning_rate": 3.852879156528792e-06, "loss": 0.7114, "step": 10498 }, { "epoch": 0.3065311961694549, "grad_norm": 0.7402451773716866, "learning_rate": 3.85271695052717e-06, "loss": 0.6584, "step": 10499 }, { "epoch": 0.30656039239730226, "grad_norm": 0.7257100650804398, "learning_rate": 3.852554744525548e-06, "loss": 0.6772, "step": 10500 }, { "epoch": 0.3065895886251496, "grad_norm": 0.7390161376194208, "learning_rate": 3.852392538523926e-06, "loss": 0.6337, "step": 10501 }, { "epoch": 0.306618784852997, "grad_norm": 0.7285573440438257, "learning_rate": 3.852230332522304e-06, "loss": 0.6069, "step": 10502 }, { "epoch": 0.30664798108084435, "grad_norm": 0.797364726852773, "learning_rate": 3.852068126520682e-06, "loss": 0.6442, "step": 10503 }, { "epoch": 0.3066771773086917, "grad_norm": 0.6702139195094265, "learning_rate": 3.85190592051906e-06, "loss": 0.5844, "step": 10504 }, { "epoch": 0.30670637353653907, "grad_norm": 0.7616496599820368, "learning_rate": 3.851743714517437e-06, "loss": 0.5951, "step": 10505 }, { "epoch": 0.30673556976438643, "grad_norm": 0.707804609109208, "learning_rate": 3.851581508515815e-06, "loss": 0.6242, "step": 10506 }, { "epoch": 0.3067647659922338, "grad_norm": 0.7177151844080195, "learning_rate": 3.851419302514193e-06, "loss": 0.5986, "step": 10507 }, { "epoch": 0.30679396222008115, "grad_norm": 0.7456895980210542, "learning_rate": 3.851257096512571e-06, "loss": 0.6982, "step": 10508 }, { "epoch": 0.3068231584479285, "grad_norm": 0.746520526223586, "learning_rate": 3.851094890510949e-06, "loss": 0.6461, "step": 10509 }, { "epoch": 0.3068523546757759, "grad_norm": 0.8324563236185951, "learning_rate": 3.850932684509327e-06, "loss": 0.7124, "step": 10510 }, { "epoch": 0.30688155090362323, "grad_norm": 0.7777344224117155, "learning_rate": 3.850770478507705e-06, "loss": 0.6648, "step": 10511 }, { "epoch": 0.3069107471314706, "grad_norm": 0.7457358690448147, "learning_rate": 3.850608272506083e-06, "loss": 0.6971, "step": 10512 }, { "epoch": 0.30693994335931796, "grad_norm": 0.8112982879506426, "learning_rate": 3.850446066504461e-06, "loss": 0.6789, "step": 10513 }, { "epoch": 0.3069691395871653, "grad_norm": 0.7711258174719281, "learning_rate": 3.850283860502838e-06, "loss": 0.7294, "step": 10514 }, { "epoch": 0.3069983358150127, "grad_norm": 0.7583413088074579, "learning_rate": 3.850121654501217e-06, "loss": 0.6861, "step": 10515 }, { "epoch": 0.30702753204286004, "grad_norm": 0.7132107874554339, "learning_rate": 3.849959448499595e-06, "loss": 0.6076, "step": 10516 }, { "epoch": 0.3070567282707074, "grad_norm": 0.8088728372461456, "learning_rate": 3.849797242497973e-06, "loss": 0.7659, "step": 10517 }, { "epoch": 0.30708592449855476, "grad_norm": 0.7319664764199109, "learning_rate": 3.849635036496351e-06, "loss": 0.5739, "step": 10518 }, { "epoch": 0.3071151207264021, "grad_norm": 0.6938127752428986, "learning_rate": 3.849472830494729e-06, "loss": 0.6245, "step": 10519 }, { "epoch": 0.3071443169542495, "grad_norm": 0.678680015222823, "learning_rate": 3.849310624493106e-06, "loss": 0.5647, "step": 10520 }, { "epoch": 0.30717351318209685, "grad_norm": 0.8111741424310995, "learning_rate": 3.849148418491484e-06, "loss": 0.7039, "step": 10521 }, { "epoch": 0.3072027094099442, "grad_norm": 0.762028941395305, "learning_rate": 3.848986212489862e-06, "loss": 0.7091, "step": 10522 }, { "epoch": 0.3072319056377916, "grad_norm": 0.7461480448654146, "learning_rate": 3.84882400648824e-06, "loss": 0.6797, "step": 10523 }, { "epoch": 0.307261101865639, "grad_norm": 0.7635447754850285, "learning_rate": 3.848661800486618e-06, "loss": 0.7087, "step": 10524 }, { "epoch": 0.30729029809348635, "grad_norm": 0.7848727399632563, "learning_rate": 3.848499594484996e-06, "loss": 0.7162, "step": 10525 }, { "epoch": 0.3073194943213337, "grad_norm": 0.8003918310392713, "learning_rate": 3.848337388483374e-06, "loss": 0.7447, "step": 10526 }, { "epoch": 0.30734869054918107, "grad_norm": 0.7230959904589651, "learning_rate": 3.848175182481752e-06, "loss": 0.6394, "step": 10527 }, { "epoch": 0.30737788677702843, "grad_norm": 0.7930794102869977, "learning_rate": 3.84801297648013e-06, "loss": 0.7091, "step": 10528 }, { "epoch": 0.3074070830048758, "grad_norm": 0.7896549912218641, "learning_rate": 3.8478507704785075e-06, "loss": 0.6637, "step": 10529 }, { "epoch": 0.30743627923272315, "grad_norm": 0.7280381651274526, "learning_rate": 3.8476885644768855e-06, "loss": 0.6282, "step": 10530 }, { "epoch": 0.3074654754605705, "grad_norm": 0.8331082786990658, "learning_rate": 3.8475263584752635e-06, "loss": 0.6859, "step": 10531 }, { "epoch": 0.3074946716884179, "grad_norm": 0.842644689948815, "learning_rate": 3.8473641524736415e-06, "loss": 0.6428, "step": 10532 }, { "epoch": 0.30752386791626524, "grad_norm": 0.7468717476110482, "learning_rate": 3.84720194647202e-06, "loss": 0.6652, "step": 10533 }, { "epoch": 0.3075530641441126, "grad_norm": 0.7356595847678277, "learning_rate": 3.8470397404703975e-06, "loss": 0.6723, "step": 10534 }, { "epoch": 0.30758226037195996, "grad_norm": 0.8487974395923072, "learning_rate": 3.8468775344687756e-06, "loss": 0.7335, "step": 10535 }, { "epoch": 0.3076114565998073, "grad_norm": 0.759014892873694, "learning_rate": 3.8467153284671536e-06, "loss": 0.6909, "step": 10536 }, { "epoch": 0.3076406528276547, "grad_norm": 0.8213699989427548, "learning_rate": 3.8465531224655316e-06, "loss": 0.6437, "step": 10537 }, { "epoch": 0.30766984905550204, "grad_norm": 0.6999343063346231, "learning_rate": 3.84639091646391e-06, "loss": 0.6622, "step": 10538 }, { "epoch": 0.3076990452833494, "grad_norm": 0.8544159357432066, "learning_rate": 3.846228710462288e-06, "loss": 0.7094, "step": 10539 }, { "epoch": 0.30772824151119677, "grad_norm": 0.7379894600156298, "learning_rate": 3.846066504460666e-06, "loss": 0.6803, "step": 10540 }, { "epoch": 0.3077574377390441, "grad_norm": 0.7755531970457208, "learning_rate": 3.845904298459044e-06, "loss": 0.6833, "step": 10541 }, { "epoch": 0.3077866339668915, "grad_norm": 0.7266241355257886, "learning_rate": 3.845742092457422e-06, "loss": 0.6645, "step": 10542 }, { "epoch": 0.30781583019473885, "grad_norm": 0.778007847519062, "learning_rate": 3.845579886455799e-06, "loss": 0.6458, "step": 10543 }, { "epoch": 0.3078450264225862, "grad_norm": 0.9898123304640999, "learning_rate": 3.845417680454177e-06, "loss": 0.7079, "step": 10544 }, { "epoch": 0.30787422265043357, "grad_norm": 0.8107000163177894, "learning_rate": 3.845255474452555e-06, "loss": 0.7005, "step": 10545 }, { "epoch": 0.30790341887828093, "grad_norm": 0.8033508273694834, "learning_rate": 3.845093268450933e-06, "loss": 0.6575, "step": 10546 }, { "epoch": 0.3079326151061283, "grad_norm": 0.6867935434179294, "learning_rate": 3.844931062449311e-06, "loss": 0.6327, "step": 10547 }, { "epoch": 0.30796181133397565, "grad_norm": 0.7417110897835916, "learning_rate": 3.844768856447689e-06, "loss": 0.6947, "step": 10548 }, { "epoch": 0.307991007561823, "grad_norm": 0.7194577376690316, "learning_rate": 3.844606650446067e-06, "loss": 0.6508, "step": 10549 }, { "epoch": 0.3080202037896704, "grad_norm": 0.7583471899275734, "learning_rate": 3.844444444444445e-06, "loss": 0.6143, "step": 10550 }, { "epoch": 0.30804940001751774, "grad_norm": 0.7104580370700826, "learning_rate": 3.844282238442823e-06, "loss": 0.6656, "step": 10551 }, { "epoch": 0.3080785962453651, "grad_norm": 0.7943392189793128, "learning_rate": 3.844120032441201e-06, "loss": 0.7832, "step": 10552 }, { "epoch": 0.30810779247321246, "grad_norm": 0.6804355297333393, "learning_rate": 3.843957826439579e-06, "loss": 0.5536, "step": 10553 }, { "epoch": 0.3081369887010598, "grad_norm": 0.739447454582823, "learning_rate": 3.843795620437957e-06, "loss": 0.6805, "step": 10554 }, { "epoch": 0.3081661849289072, "grad_norm": 0.7397620881685825, "learning_rate": 3.843633414436335e-06, "loss": 0.6748, "step": 10555 }, { "epoch": 0.30819538115675454, "grad_norm": 0.7452533670465668, "learning_rate": 3.843471208434713e-06, "loss": 0.6245, "step": 10556 }, { "epoch": 0.3082245773846019, "grad_norm": 0.706046716399873, "learning_rate": 3.843309002433091e-06, "loss": 0.6186, "step": 10557 }, { "epoch": 0.30825377361244927, "grad_norm": 0.8100377438142382, "learning_rate": 3.843146796431468e-06, "loss": 0.7895, "step": 10558 }, { "epoch": 0.30828296984029663, "grad_norm": 0.7115237568393461, "learning_rate": 3.842984590429846e-06, "loss": 0.6051, "step": 10559 }, { "epoch": 0.308312166068144, "grad_norm": 0.7230948920728699, "learning_rate": 3.842822384428224e-06, "loss": 0.6804, "step": 10560 }, { "epoch": 0.30834136229599135, "grad_norm": 0.7423176689700725, "learning_rate": 3.842660178426602e-06, "loss": 0.7081, "step": 10561 }, { "epoch": 0.3083705585238387, "grad_norm": 0.6941529640610145, "learning_rate": 3.84249797242498e-06, "loss": 0.6374, "step": 10562 }, { "epoch": 0.3083997547516861, "grad_norm": 0.8100160576733091, "learning_rate": 3.842335766423358e-06, "loss": 0.7546, "step": 10563 }, { "epoch": 0.30842895097953343, "grad_norm": 0.7310379465949445, "learning_rate": 3.842173560421736e-06, "loss": 0.6786, "step": 10564 }, { "epoch": 0.3084581472073808, "grad_norm": 0.7681954785399363, "learning_rate": 3.842011354420114e-06, "loss": 0.7749, "step": 10565 }, { "epoch": 0.30848734343522816, "grad_norm": 0.7557302366556219, "learning_rate": 3.841849148418492e-06, "loss": 0.7356, "step": 10566 }, { "epoch": 0.3085165396630755, "grad_norm": 0.9689572714215944, "learning_rate": 3.841686942416869e-06, "loss": 0.7352, "step": 10567 }, { "epoch": 0.3085457358909229, "grad_norm": 0.6895436768704262, "learning_rate": 3.841524736415247e-06, "loss": 0.6041, "step": 10568 }, { "epoch": 0.30857493211877024, "grad_norm": 0.8021386516753255, "learning_rate": 3.841362530413625e-06, "loss": 0.7262, "step": 10569 }, { "epoch": 0.3086041283466176, "grad_norm": 0.7214961220483631, "learning_rate": 3.841200324412003e-06, "loss": 0.6636, "step": 10570 }, { "epoch": 0.30863332457446496, "grad_norm": 0.7051360405310897, "learning_rate": 3.841038118410382e-06, "loss": 0.6382, "step": 10571 }, { "epoch": 0.3086625208023123, "grad_norm": 0.7676106649145991, "learning_rate": 3.840875912408759e-06, "loss": 0.7541, "step": 10572 }, { "epoch": 0.3086917170301597, "grad_norm": 0.7708794322484775, "learning_rate": 3.840713706407137e-06, "loss": 0.7016, "step": 10573 }, { "epoch": 0.30872091325800705, "grad_norm": 0.6841545867645292, "learning_rate": 3.840551500405515e-06, "loss": 0.5366, "step": 10574 }, { "epoch": 0.3087501094858544, "grad_norm": 0.7033328208266266, "learning_rate": 3.840389294403893e-06, "loss": 0.6343, "step": 10575 }, { "epoch": 0.30877930571370177, "grad_norm": 0.6666694425540481, "learning_rate": 3.840227088402271e-06, "loss": 0.5471, "step": 10576 }, { "epoch": 0.30880850194154913, "grad_norm": 0.7394146991759303, "learning_rate": 3.840064882400649e-06, "loss": 0.6152, "step": 10577 }, { "epoch": 0.3088376981693965, "grad_norm": 0.8134895947660221, "learning_rate": 3.839902676399027e-06, "loss": 0.7193, "step": 10578 }, { "epoch": 0.30886689439724385, "grad_norm": 0.6583266857500004, "learning_rate": 3.839740470397405e-06, "loss": 0.5473, "step": 10579 }, { "epoch": 0.3088960906250912, "grad_norm": 0.7220785906840633, "learning_rate": 3.839578264395783e-06, "loss": 0.6066, "step": 10580 }, { "epoch": 0.3089252868529386, "grad_norm": 0.7286728133466238, "learning_rate": 3.8394160583941605e-06, "loss": 0.5702, "step": 10581 }, { "epoch": 0.30895448308078594, "grad_norm": 0.6633615336830734, "learning_rate": 3.8392538523925385e-06, "loss": 0.5747, "step": 10582 }, { "epoch": 0.30898367930863335, "grad_norm": 0.7832492749585627, "learning_rate": 3.8390916463909165e-06, "loss": 0.6955, "step": 10583 }, { "epoch": 0.3090128755364807, "grad_norm": 0.7439308959861407, "learning_rate": 3.8389294403892945e-06, "loss": 0.6538, "step": 10584 }, { "epoch": 0.3090420717643281, "grad_norm": 1.6498049798267358, "learning_rate": 3.8387672343876725e-06, "loss": 0.5738, "step": 10585 }, { "epoch": 0.30907126799217544, "grad_norm": 0.7309252106874956, "learning_rate": 3.8386050283860505e-06, "loss": 0.613, "step": 10586 }, { "epoch": 0.3091004642200228, "grad_norm": 0.7776259020689337, "learning_rate": 3.8384428223844285e-06, "loss": 0.769, "step": 10587 }, { "epoch": 0.30912966044787016, "grad_norm": 0.8665490659522729, "learning_rate": 3.8382806163828065e-06, "loss": 0.6263, "step": 10588 }, { "epoch": 0.3091588566757175, "grad_norm": 0.7558690198660061, "learning_rate": 3.8381184103811845e-06, "loss": 0.6827, "step": 10589 }, { "epoch": 0.3091880529035649, "grad_norm": 0.8335936511828022, "learning_rate": 3.8379562043795625e-06, "loss": 0.6692, "step": 10590 }, { "epoch": 0.30921724913141224, "grad_norm": 0.7570830674736855, "learning_rate": 3.8377939983779405e-06, "loss": 0.7287, "step": 10591 }, { "epoch": 0.3092464453592596, "grad_norm": 0.7498247811090734, "learning_rate": 3.8376317923763186e-06, "loss": 0.6949, "step": 10592 }, { "epoch": 0.30927564158710696, "grad_norm": 0.7438924884158635, "learning_rate": 3.8374695863746966e-06, "loss": 0.7175, "step": 10593 }, { "epoch": 0.3093048378149543, "grad_norm": 0.7521032900217736, "learning_rate": 3.8373073803730746e-06, "loss": 0.6439, "step": 10594 }, { "epoch": 0.3093340340428017, "grad_norm": 0.7473842330017251, "learning_rate": 3.837145174371453e-06, "loss": 0.6971, "step": 10595 }, { "epoch": 0.30936323027064905, "grad_norm": 0.6782657652459776, "learning_rate": 3.83698296836983e-06, "loss": 0.5726, "step": 10596 }, { "epoch": 0.3093924264984964, "grad_norm": 0.8492525646247825, "learning_rate": 3.836820762368208e-06, "loss": 0.7718, "step": 10597 }, { "epoch": 0.30942162272634377, "grad_norm": 0.7315428959963607, "learning_rate": 3.836658556366586e-06, "loss": 0.642, "step": 10598 }, { "epoch": 0.30945081895419113, "grad_norm": 0.7007563678233132, "learning_rate": 3.836496350364964e-06, "loss": 0.6136, "step": 10599 }, { "epoch": 0.3094800151820385, "grad_norm": 0.7451434033888881, "learning_rate": 3.836334144363342e-06, "loss": 0.665, "step": 10600 }, { "epoch": 0.30950921140988585, "grad_norm": 0.7722258862929913, "learning_rate": 3.83617193836172e-06, "loss": 0.7805, "step": 10601 }, { "epoch": 0.3095384076377332, "grad_norm": 0.7692951773377167, "learning_rate": 3.836009732360098e-06, "loss": 0.6658, "step": 10602 }, { "epoch": 0.3095676038655806, "grad_norm": 0.7587553043736562, "learning_rate": 3.835847526358476e-06, "loss": 0.6794, "step": 10603 }, { "epoch": 0.30959680009342794, "grad_norm": 0.747205081988962, "learning_rate": 3.835685320356854e-06, "loss": 0.625, "step": 10604 }, { "epoch": 0.3096259963212753, "grad_norm": 0.7390635211729357, "learning_rate": 3.835523114355231e-06, "loss": 0.6963, "step": 10605 }, { "epoch": 0.30965519254912266, "grad_norm": 0.7563052942883052, "learning_rate": 3.835360908353609e-06, "loss": 0.666, "step": 10606 }, { "epoch": 0.30968438877697, "grad_norm": 0.7595908666790191, "learning_rate": 3.835198702351987e-06, "loss": 0.6697, "step": 10607 }, { "epoch": 0.3097135850048174, "grad_norm": 0.6981185081013472, "learning_rate": 3.835036496350365e-06, "loss": 0.6055, "step": 10608 }, { "epoch": 0.30974278123266474, "grad_norm": 0.8154113736354116, "learning_rate": 3.834874290348744e-06, "loss": 0.6829, "step": 10609 }, { "epoch": 0.3097719774605121, "grad_norm": 2.2174760714311543, "learning_rate": 3.834712084347121e-06, "loss": 0.7569, "step": 10610 }, { "epoch": 0.30980117368835947, "grad_norm": 0.7555041073039257, "learning_rate": 3.834549878345499e-06, "loss": 0.7208, "step": 10611 }, { "epoch": 0.3098303699162068, "grad_norm": 0.7475379049194231, "learning_rate": 3.834387672343877e-06, "loss": 0.6075, "step": 10612 }, { "epoch": 0.3098595661440542, "grad_norm": 0.760608749501849, "learning_rate": 3.834225466342255e-06, "loss": 0.6501, "step": 10613 }, { "epoch": 0.30988876237190155, "grad_norm": 0.7614519227344995, "learning_rate": 3.834063260340633e-06, "loss": 0.7276, "step": 10614 }, { "epoch": 0.3099179585997489, "grad_norm": 0.7403725172152577, "learning_rate": 3.833901054339011e-06, "loss": 0.7112, "step": 10615 }, { "epoch": 0.30994715482759627, "grad_norm": 0.7673582231827807, "learning_rate": 3.833738848337389e-06, "loss": 0.6956, "step": 10616 }, { "epoch": 0.30997635105544363, "grad_norm": 0.7457847868304764, "learning_rate": 3.833576642335767e-06, "loss": 0.6757, "step": 10617 }, { "epoch": 0.310005547283291, "grad_norm": 0.7929247342720744, "learning_rate": 3.833414436334145e-06, "loss": 0.74, "step": 10618 }, { "epoch": 0.31003474351113836, "grad_norm": 0.7132408757958149, "learning_rate": 3.833252230332522e-06, "loss": 0.6516, "step": 10619 }, { "epoch": 0.3100639397389857, "grad_norm": 0.7071934061304592, "learning_rate": 3.8330900243309e-06, "loss": 0.6007, "step": 10620 }, { "epoch": 0.3100931359668331, "grad_norm": 0.8113003591917483, "learning_rate": 3.832927818329278e-06, "loss": 0.7662, "step": 10621 }, { "epoch": 0.31012233219468044, "grad_norm": 0.7107751013333751, "learning_rate": 3.832765612327656e-06, "loss": 0.6113, "step": 10622 }, { "epoch": 0.3101515284225278, "grad_norm": 0.8399688670431656, "learning_rate": 3.832603406326034e-06, "loss": 0.7985, "step": 10623 }, { "epoch": 0.31018072465037516, "grad_norm": 0.7495891568669487, "learning_rate": 3.832441200324412e-06, "loss": 0.6269, "step": 10624 }, { "epoch": 0.3102099208782225, "grad_norm": 0.7443998362103581, "learning_rate": 3.83227899432279e-06, "loss": 0.6364, "step": 10625 }, { "epoch": 0.3102391171060699, "grad_norm": 0.7292548320708118, "learning_rate": 3.832116788321168e-06, "loss": 0.6633, "step": 10626 }, { "epoch": 0.31026831333391724, "grad_norm": 0.7494259465967686, "learning_rate": 3.831954582319546e-06, "loss": 0.7422, "step": 10627 }, { "epoch": 0.3102975095617646, "grad_norm": 0.7268336740767004, "learning_rate": 3.831792376317924e-06, "loss": 0.6534, "step": 10628 }, { "epoch": 0.31032670578961197, "grad_norm": 0.7198405180485499, "learning_rate": 3.831630170316302e-06, "loss": 0.6174, "step": 10629 }, { "epoch": 0.31035590201745933, "grad_norm": 0.7519723660003924, "learning_rate": 3.83146796431468e-06, "loss": 0.6342, "step": 10630 }, { "epoch": 0.3103850982453067, "grad_norm": 0.8462819690221312, "learning_rate": 3.831305758313058e-06, "loss": 0.7182, "step": 10631 }, { "epoch": 0.31041429447315405, "grad_norm": 0.7219053620607947, "learning_rate": 3.831143552311436e-06, "loss": 0.6498, "step": 10632 }, { "epoch": 0.3104434907010014, "grad_norm": 0.7348660931084269, "learning_rate": 3.830981346309814e-06, "loss": 0.6748, "step": 10633 }, { "epoch": 0.3104726869288488, "grad_norm": 0.7376762160431448, "learning_rate": 3.8308191403081915e-06, "loss": 0.6512, "step": 10634 }, { "epoch": 0.31050188315669613, "grad_norm": 0.6845584457792041, "learning_rate": 3.8306569343065695e-06, "loss": 0.5931, "step": 10635 }, { "epoch": 0.3105310793845435, "grad_norm": 0.7185578304710761, "learning_rate": 3.8304947283049475e-06, "loss": 0.6388, "step": 10636 }, { "epoch": 0.31056027561239086, "grad_norm": 0.6967592059985404, "learning_rate": 3.8303325223033255e-06, "loss": 0.6346, "step": 10637 }, { "epoch": 0.3105894718402382, "grad_norm": 0.756664595269563, "learning_rate": 3.8301703163017035e-06, "loss": 0.6757, "step": 10638 }, { "epoch": 0.3106186680680856, "grad_norm": 0.7362736304175542, "learning_rate": 3.8300081103000815e-06, "loss": 0.6533, "step": 10639 }, { "epoch": 0.31064786429593294, "grad_norm": 0.8017672062641278, "learning_rate": 3.8298459042984595e-06, "loss": 0.6986, "step": 10640 }, { "epoch": 0.3106770605237803, "grad_norm": 0.756421802584212, "learning_rate": 3.8296836982968375e-06, "loss": 0.6748, "step": 10641 }, { "epoch": 0.31070625675162766, "grad_norm": 0.7787812478962277, "learning_rate": 3.829521492295215e-06, "loss": 0.7598, "step": 10642 }, { "epoch": 0.310735452979475, "grad_norm": 0.7096900378751517, "learning_rate": 3.829359286293593e-06, "loss": 0.6247, "step": 10643 }, { "epoch": 0.31076464920732244, "grad_norm": 0.7025132237662493, "learning_rate": 3.829197080291971e-06, "loss": 0.5756, "step": 10644 }, { "epoch": 0.3107938454351698, "grad_norm": 0.7326636040301944, "learning_rate": 3.829034874290349e-06, "loss": 0.6633, "step": 10645 }, { "epoch": 0.31082304166301716, "grad_norm": 0.711673454781256, "learning_rate": 3.828872668288727e-06, "loss": 0.6169, "step": 10646 }, { "epoch": 0.3108522378908645, "grad_norm": 0.8099189954390849, "learning_rate": 3.8287104622871055e-06, "loss": 0.6731, "step": 10647 }, { "epoch": 0.3108814341187119, "grad_norm": 0.7028752181183086, "learning_rate": 3.828548256285483e-06, "loss": 0.6296, "step": 10648 }, { "epoch": 0.31091063034655925, "grad_norm": 0.6947576223486336, "learning_rate": 3.828386050283861e-06, "loss": 0.5996, "step": 10649 }, { "epoch": 0.3109398265744066, "grad_norm": 0.7990869278616545, "learning_rate": 3.828223844282239e-06, "loss": 0.7192, "step": 10650 }, { "epoch": 0.31096902280225397, "grad_norm": 0.7668834874411595, "learning_rate": 3.828061638280617e-06, "loss": 0.7788, "step": 10651 }, { "epoch": 0.31099821903010133, "grad_norm": 0.7018768698248855, "learning_rate": 3.827899432278995e-06, "loss": 0.6114, "step": 10652 }, { "epoch": 0.3110274152579487, "grad_norm": 0.7780675425662065, "learning_rate": 3.827737226277373e-06, "loss": 0.7059, "step": 10653 }, { "epoch": 0.31105661148579605, "grad_norm": 0.6784446951372115, "learning_rate": 3.827575020275751e-06, "loss": 0.5878, "step": 10654 }, { "epoch": 0.3110858077136434, "grad_norm": 0.7570466575199486, "learning_rate": 3.827412814274129e-06, "loss": 0.7307, "step": 10655 }, { "epoch": 0.3111150039414908, "grad_norm": 0.7231003434697192, "learning_rate": 3.827250608272507e-06, "loss": 0.5745, "step": 10656 }, { "epoch": 0.31114420016933814, "grad_norm": 0.7346145916444927, "learning_rate": 3.827088402270884e-06, "loss": 0.673, "step": 10657 }, { "epoch": 0.3111733963971855, "grad_norm": 0.7939767735054383, "learning_rate": 3.826926196269262e-06, "loss": 0.6848, "step": 10658 }, { "epoch": 0.31120259262503286, "grad_norm": 0.738277987406183, "learning_rate": 3.82676399026764e-06, "loss": 0.6583, "step": 10659 }, { "epoch": 0.3112317888528802, "grad_norm": 0.7669911028315063, "learning_rate": 3.826601784266018e-06, "loss": 0.6291, "step": 10660 }, { "epoch": 0.3112609850807276, "grad_norm": 0.7308728902488827, "learning_rate": 3.826439578264396e-06, "loss": 0.6518, "step": 10661 }, { "epoch": 0.31129018130857494, "grad_norm": 0.8148578984492235, "learning_rate": 3.826277372262774e-06, "loss": 0.6994, "step": 10662 }, { "epoch": 0.3113193775364223, "grad_norm": 0.7396025143221109, "learning_rate": 3.826115166261152e-06, "loss": 0.6903, "step": 10663 }, { "epoch": 0.31134857376426966, "grad_norm": 0.8045775201535922, "learning_rate": 3.82595296025953e-06, "loss": 0.596, "step": 10664 }, { "epoch": 0.311377769992117, "grad_norm": 0.7585545357001452, "learning_rate": 3.825790754257908e-06, "loss": 0.673, "step": 10665 }, { "epoch": 0.3114069662199644, "grad_norm": 0.6836389597656176, "learning_rate": 3.825628548256286e-06, "loss": 0.5207, "step": 10666 }, { "epoch": 0.31143616244781175, "grad_norm": 0.781191585596761, "learning_rate": 3.825466342254664e-06, "loss": 0.697, "step": 10667 }, { "epoch": 0.3114653586756591, "grad_norm": 0.70953363403375, "learning_rate": 3.825304136253042e-06, "loss": 0.6673, "step": 10668 }, { "epoch": 0.31149455490350647, "grad_norm": 0.7722510868884152, "learning_rate": 3.82514193025142e-06, "loss": 0.7266, "step": 10669 }, { "epoch": 0.31152375113135383, "grad_norm": 0.6920268923846313, "learning_rate": 3.824979724249798e-06, "loss": 0.6034, "step": 10670 }, { "epoch": 0.3115529473592012, "grad_norm": 0.7923001389930145, "learning_rate": 3.824817518248176e-06, "loss": 0.7467, "step": 10671 }, { "epoch": 0.31158214358704855, "grad_norm": 0.7262515936495966, "learning_rate": 3.824655312246553e-06, "loss": 0.6978, "step": 10672 }, { "epoch": 0.3116113398148959, "grad_norm": 0.7128006822696669, "learning_rate": 3.824493106244931e-06, "loss": 0.6428, "step": 10673 }, { "epoch": 0.3116405360427433, "grad_norm": 0.6875848988478585, "learning_rate": 3.824330900243309e-06, "loss": 0.5914, "step": 10674 }, { "epoch": 0.31166973227059064, "grad_norm": 0.7158770938283061, "learning_rate": 3.824168694241687e-06, "loss": 0.6227, "step": 10675 }, { "epoch": 0.311698928498438, "grad_norm": 0.7500936113782142, "learning_rate": 3.824006488240065e-06, "loss": 0.6413, "step": 10676 }, { "epoch": 0.31172812472628536, "grad_norm": 0.718425964414778, "learning_rate": 3.823844282238443e-06, "loss": 0.6018, "step": 10677 }, { "epoch": 0.3117573209541327, "grad_norm": 0.7939289551995891, "learning_rate": 3.823682076236821e-06, "loss": 0.6603, "step": 10678 }, { "epoch": 0.3117865171819801, "grad_norm": 0.7339991134994732, "learning_rate": 3.823519870235199e-06, "loss": 0.6555, "step": 10679 }, { "epoch": 0.31181571340982744, "grad_norm": 0.7592057390739249, "learning_rate": 3.823357664233576e-06, "loss": 0.6409, "step": 10680 }, { "epoch": 0.3118449096376748, "grad_norm": 1.3849888857934798, "learning_rate": 3.823195458231954e-06, "loss": 0.6787, "step": 10681 }, { "epoch": 0.31187410586552217, "grad_norm": 0.7119595668356841, "learning_rate": 3.823033252230332e-06, "loss": 0.6167, "step": 10682 }, { "epoch": 0.3119033020933695, "grad_norm": 0.7801796280891171, "learning_rate": 3.82287104622871e-06, "loss": 0.7478, "step": 10683 }, { "epoch": 0.3119324983212169, "grad_norm": 0.7359096170679923, "learning_rate": 3.822708840227089e-06, "loss": 0.6623, "step": 10684 }, { "epoch": 0.31196169454906425, "grad_norm": 0.7212937395420025, "learning_rate": 3.822546634225467e-06, "loss": 0.6483, "step": 10685 }, { "epoch": 0.3119908907769116, "grad_norm": 0.7587046451999383, "learning_rate": 3.822384428223844e-06, "loss": 0.7013, "step": 10686 }, { "epoch": 0.312020087004759, "grad_norm": 0.7839579230582455, "learning_rate": 3.8222222222222224e-06, "loss": 0.6807, "step": 10687 }, { "epoch": 0.31204928323260633, "grad_norm": 0.6988698331792244, "learning_rate": 3.8220600162206004e-06, "loss": 0.6059, "step": 10688 }, { "epoch": 0.3120784794604537, "grad_norm": 0.7413623370564093, "learning_rate": 3.8218978102189784e-06, "loss": 0.6942, "step": 10689 }, { "epoch": 0.31210767568830106, "grad_norm": 0.7083735247482098, "learning_rate": 3.8217356042173564e-06, "loss": 0.6327, "step": 10690 }, { "epoch": 0.3121368719161484, "grad_norm": 0.7774396318073787, "learning_rate": 3.8215733982157345e-06, "loss": 0.6741, "step": 10691 }, { "epoch": 0.3121660681439958, "grad_norm": 0.7051947590377484, "learning_rate": 3.8214111922141125e-06, "loss": 0.5533, "step": 10692 }, { "epoch": 0.31219526437184314, "grad_norm": 0.7052918167728859, "learning_rate": 3.8212489862124905e-06, "loss": 0.61, "step": 10693 }, { "epoch": 0.3122244605996905, "grad_norm": 0.7303416887197924, "learning_rate": 3.8210867802108685e-06, "loss": 0.6508, "step": 10694 }, { "epoch": 0.31225365682753786, "grad_norm": 0.7181373638083837, "learning_rate": 3.820924574209246e-06, "loss": 0.6311, "step": 10695 }, { "epoch": 0.3122828530553852, "grad_norm": 0.7482600302820226, "learning_rate": 3.820762368207624e-06, "loss": 0.6722, "step": 10696 }, { "epoch": 0.3123120492832326, "grad_norm": 0.728289313316875, "learning_rate": 3.820600162206002e-06, "loss": 0.632, "step": 10697 }, { "epoch": 0.31234124551107995, "grad_norm": 0.7301275306231503, "learning_rate": 3.82043795620438e-06, "loss": 0.6425, "step": 10698 }, { "epoch": 0.3123704417389273, "grad_norm": 0.7521013983519504, "learning_rate": 3.820275750202758e-06, "loss": 0.7016, "step": 10699 }, { "epoch": 0.31239963796677467, "grad_norm": 0.8042843456778644, "learning_rate": 3.820113544201136e-06, "loss": 0.7232, "step": 10700 }, { "epoch": 0.31242883419462203, "grad_norm": 0.7109184034882833, "learning_rate": 3.819951338199514e-06, "loss": 0.6485, "step": 10701 }, { "epoch": 0.3124580304224694, "grad_norm": 0.747920924623022, "learning_rate": 3.819789132197892e-06, "loss": 0.656, "step": 10702 }, { "epoch": 0.31248722665031675, "grad_norm": 0.741015512892524, "learning_rate": 3.81962692619627e-06, "loss": 0.6914, "step": 10703 }, { "epoch": 0.31251642287816417, "grad_norm": 0.7342036754006566, "learning_rate": 3.819464720194648e-06, "loss": 0.654, "step": 10704 }, { "epoch": 0.31254561910601153, "grad_norm": 1.0163678289503413, "learning_rate": 3.819302514193026e-06, "loss": 0.7454, "step": 10705 }, { "epoch": 0.3125748153338589, "grad_norm": 0.6862209110096393, "learning_rate": 3.819140308191404e-06, "loss": 0.5796, "step": 10706 }, { "epoch": 0.31260401156170625, "grad_norm": 0.7072814486582578, "learning_rate": 3.818978102189782e-06, "loss": 0.6118, "step": 10707 }, { "epoch": 0.3126332077895536, "grad_norm": 0.7216055231861785, "learning_rate": 3.81881589618816e-06, "loss": 0.5981, "step": 10708 }, { "epoch": 0.312662404017401, "grad_norm": 0.7649536112100204, "learning_rate": 3.818653690186538e-06, "loss": 0.7621, "step": 10709 }, { "epoch": 0.31269160024524834, "grad_norm": 0.8318316242694166, "learning_rate": 3.818491484184915e-06, "loss": 0.716, "step": 10710 }, { "epoch": 0.3127207964730957, "grad_norm": 0.7102902734857033, "learning_rate": 3.818329278183293e-06, "loss": 0.6174, "step": 10711 }, { "epoch": 0.31274999270094306, "grad_norm": 0.7401893872582922, "learning_rate": 3.818167072181671e-06, "loss": 0.6941, "step": 10712 }, { "epoch": 0.3127791889287904, "grad_norm": 0.7415548946593077, "learning_rate": 3.818004866180049e-06, "loss": 0.6565, "step": 10713 }, { "epoch": 0.3128083851566378, "grad_norm": 0.767757471967682, "learning_rate": 3.817842660178427e-06, "loss": 0.6945, "step": 10714 }, { "epoch": 0.31283758138448514, "grad_norm": 0.7979044280085005, "learning_rate": 3.817680454176805e-06, "loss": 0.7112, "step": 10715 }, { "epoch": 0.3128667776123325, "grad_norm": 0.795074946630423, "learning_rate": 3.817518248175183e-06, "loss": 0.7452, "step": 10716 }, { "epoch": 0.31289597384017986, "grad_norm": 0.9263588810091732, "learning_rate": 3.817356042173561e-06, "loss": 0.6549, "step": 10717 }, { "epoch": 0.3129251700680272, "grad_norm": 0.7442783509634079, "learning_rate": 3.817193836171938e-06, "loss": 0.7153, "step": 10718 }, { "epoch": 0.3129543662958746, "grad_norm": 0.7293248069476385, "learning_rate": 3.817031630170316e-06, "loss": 0.6537, "step": 10719 }, { "epoch": 0.31298356252372195, "grad_norm": 0.7244003354402586, "learning_rate": 3.816869424168694e-06, "loss": 0.6804, "step": 10720 }, { "epoch": 0.3130127587515693, "grad_norm": 0.7400456039104822, "learning_rate": 3.816707218167072e-06, "loss": 0.6823, "step": 10721 }, { "epoch": 0.31304195497941667, "grad_norm": 0.719679316655015, "learning_rate": 3.816545012165451e-06, "loss": 0.6215, "step": 10722 }, { "epoch": 0.31307115120726403, "grad_norm": 0.8563502177684931, "learning_rate": 3.816382806163829e-06, "loss": 0.6457, "step": 10723 }, { "epoch": 0.3131003474351114, "grad_norm": 0.7296887509563963, "learning_rate": 3.816220600162206e-06, "loss": 0.6801, "step": 10724 }, { "epoch": 0.31312954366295875, "grad_norm": 0.7218365397247504, "learning_rate": 3.816058394160584e-06, "loss": 0.6344, "step": 10725 }, { "epoch": 0.3131587398908061, "grad_norm": 0.781987771820318, "learning_rate": 3.815896188158962e-06, "loss": 0.6216, "step": 10726 }, { "epoch": 0.3131879361186535, "grad_norm": 0.7267452821068952, "learning_rate": 3.81573398215734e-06, "loss": 0.6503, "step": 10727 }, { "epoch": 0.31321713234650084, "grad_norm": 0.7417016133502292, "learning_rate": 3.815571776155718e-06, "loss": 0.7196, "step": 10728 }, { "epoch": 0.3132463285743482, "grad_norm": 0.7511302431840767, "learning_rate": 3.815409570154096e-06, "loss": 0.6458, "step": 10729 }, { "epoch": 0.31327552480219556, "grad_norm": 0.6868887099454057, "learning_rate": 3.815247364152474e-06, "loss": 0.5359, "step": 10730 }, { "epoch": 0.3133047210300429, "grad_norm": 0.7257151498090518, "learning_rate": 3.815085158150852e-06, "loss": 0.6148, "step": 10731 }, { "epoch": 0.3133339172578903, "grad_norm": 0.722198977865615, "learning_rate": 3.81492295214923e-06, "loss": 0.6412, "step": 10732 }, { "epoch": 0.31336311348573764, "grad_norm": 0.8327277530722689, "learning_rate": 3.814760746147607e-06, "loss": 0.8155, "step": 10733 }, { "epoch": 0.313392309713585, "grad_norm": 0.7254605534402259, "learning_rate": 3.8145985401459858e-06, "loss": 0.6684, "step": 10734 }, { "epoch": 0.31342150594143237, "grad_norm": 0.743647812674643, "learning_rate": 3.8144363341443634e-06, "loss": 0.653, "step": 10735 }, { "epoch": 0.3134507021692797, "grad_norm": 0.7137984436650014, "learning_rate": 3.8142741281427414e-06, "loss": 0.6162, "step": 10736 }, { "epoch": 0.3134798983971271, "grad_norm": 0.751701556284825, "learning_rate": 3.8141119221411194e-06, "loss": 0.6708, "step": 10737 }, { "epoch": 0.31350909462497445, "grad_norm": 0.8108218656283831, "learning_rate": 3.8139497161394974e-06, "loss": 0.6706, "step": 10738 }, { "epoch": 0.3135382908528218, "grad_norm": 0.6813177744999162, "learning_rate": 3.8137875101378754e-06, "loss": 0.6248, "step": 10739 }, { "epoch": 0.31356748708066917, "grad_norm": 0.6975833812830545, "learning_rate": 3.813625304136253e-06, "loss": 0.6242, "step": 10740 }, { "epoch": 0.31359668330851653, "grad_norm": 0.7013645801483472, "learning_rate": 3.8134630981346314e-06, "loss": 0.5803, "step": 10741 }, { "epoch": 0.3136258795363639, "grad_norm": 0.7561212398141369, "learning_rate": 3.8133008921330094e-06, "loss": 0.6952, "step": 10742 }, { "epoch": 0.31365507576421126, "grad_norm": 0.7659051770066275, "learning_rate": 3.8131386861313874e-06, "loss": 0.6911, "step": 10743 }, { "epoch": 0.3136842719920586, "grad_norm": 0.7398523332915312, "learning_rate": 3.8129764801297654e-06, "loss": 0.7284, "step": 10744 }, { "epoch": 0.313713468219906, "grad_norm": 0.713508879354419, "learning_rate": 3.812814274128143e-06, "loss": 0.6553, "step": 10745 }, { "epoch": 0.31374266444775334, "grad_norm": 0.7626653922885511, "learning_rate": 3.812652068126521e-06, "loss": 0.6912, "step": 10746 }, { "epoch": 0.3137718606756007, "grad_norm": 0.6819022782882249, "learning_rate": 3.812489862124899e-06, "loss": 0.5617, "step": 10747 }, { "epoch": 0.31380105690344806, "grad_norm": 0.7822560921931215, "learning_rate": 3.812327656123277e-06, "loss": 0.6949, "step": 10748 }, { "epoch": 0.3138302531312954, "grad_norm": 0.7954387530459686, "learning_rate": 3.812165450121655e-06, "loss": 0.7605, "step": 10749 }, { "epoch": 0.3138594493591428, "grad_norm": 0.7352535729150382, "learning_rate": 3.8120032441200326e-06, "loss": 0.6548, "step": 10750 }, { "epoch": 0.31388864558699014, "grad_norm": 0.7056608565376357, "learning_rate": 3.8118410381184106e-06, "loss": 0.6704, "step": 10751 }, { "epoch": 0.3139178418148375, "grad_norm": 0.717707205516768, "learning_rate": 3.8116788321167886e-06, "loss": 0.6308, "step": 10752 }, { "epoch": 0.31394703804268487, "grad_norm": 0.8222314724467447, "learning_rate": 3.8115166261151666e-06, "loss": 0.7052, "step": 10753 }, { "epoch": 0.31397623427053223, "grad_norm": 0.9701707554212224, "learning_rate": 3.8113544201135442e-06, "loss": 0.6672, "step": 10754 }, { "epoch": 0.3140054304983796, "grad_norm": 0.7295528663384959, "learning_rate": 3.8111922141119222e-06, "loss": 0.7235, "step": 10755 }, { "epoch": 0.31403462672622695, "grad_norm": 0.7136311275329325, "learning_rate": 3.8110300081103002e-06, "loss": 0.6544, "step": 10756 }, { "epoch": 0.3140638229540743, "grad_norm": 0.6798285408012311, "learning_rate": 3.8108678021086782e-06, "loss": 0.5711, "step": 10757 }, { "epoch": 0.3140930191819217, "grad_norm": 0.7132089127424743, "learning_rate": 3.810705596107056e-06, "loss": 0.5831, "step": 10758 }, { "epoch": 0.31412221540976903, "grad_norm": 0.7391362741803431, "learning_rate": 3.810543390105434e-06, "loss": 0.6668, "step": 10759 }, { "epoch": 0.3141514116376164, "grad_norm": 0.7312304154960858, "learning_rate": 3.8103811841038123e-06, "loss": 0.6432, "step": 10760 }, { "epoch": 0.31418060786546376, "grad_norm": 0.7358306575183472, "learning_rate": 3.8102189781021903e-06, "loss": 0.6883, "step": 10761 }, { "epoch": 0.3142098040933111, "grad_norm": 0.7747898081020657, "learning_rate": 3.8100567721005683e-06, "loss": 0.677, "step": 10762 }, { "epoch": 0.3142390003211585, "grad_norm": 0.6639273854697282, "learning_rate": 3.8098945660989463e-06, "loss": 0.529, "step": 10763 }, { "epoch": 0.3142681965490059, "grad_norm": 0.6751299418712372, "learning_rate": 3.809732360097324e-06, "loss": 0.5849, "step": 10764 }, { "epoch": 0.31429739277685326, "grad_norm": 0.7624073621645721, "learning_rate": 3.809570154095702e-06, "loss": 0.7534, "step": 10765 }, { "epoch": 0.3143265890047006, "grad_norm": 0.7531011766674529, "learning_rate": 3.80940794809408e-06, "loss": 0.6333, "step": 10766 }, { "epoch": 0.314355785232548, "grad_norm": 0.6986983170582362, "learning_rate": 3.809245742092458e-06, "loss": 0.5998, "step": 10767 }, { "epoch": 0.31438498146039534, "grad_norm": 0.788267500868447, "learning_rate": 3.809083536090836e-06, "loss": 0.7693, "step": 10768 }, { "epoch": 0.3144141776882427, "grad_norm": 0.7815316670090761, "learning_rate": 3.8089213300892135e-06, "loss": 0.6825, "step": 10769 }, { "epoch": 0.31444337391609006, "grad_norm": 0.733197966208532, "learning_rate": 3.8087591240875915e-06, "loss": 0.6957, "step": 10770 }, { "epoch": 0.3144725701439374, "grad_norm": 0.8215642144163299, "learning_rate": 3.8085969180859695e-06, "loss": 0.6022, "step": 10771 }, { "epoch": 0.3145017663717848, "grad_norm": 0.7115611608356877, "learning_rate": 3.8084347120843475e-06, "loss": 0.6299, "step": 10772 }, { "epoch": 0.31453096259963215, "grad_norm": 0.6894945763367948, "learning_rate": 3.808272506082725e-06, "loss": 0.5928, "step": 10773 }, { "epoch": 0.3145601588274795, "grad_norm": 0.6902251610729981, "learning_rate": 3.808110300081103e-06, "loss": 0.6188, "step": 10774 }, { "epoch": 0.31458935505532687, "grad_norm": 0.7022465511820032, "learning_rate": 3.807948094079481e-06, "loss": 0.6407, "step": 10775 }, { "epoch": 0.31461855128317423, "grad_norm": 1.048546399633785, "learning_rate": 3.807785888077859e-06, "loss": 0.731, "step": 10776 }, { "epoch": 0.3146477475110216, "grad_norm": 0.7304799551513893, "learning_rate": 3.8076236820762367e-06, "loss": 0.6851, "step": 10777 }, { "epoch": 0.31467694373886895, "grad_norm": 0.738658968267197, "learning_rate": 3.8074614760746147e-06, "loss": 0.679, "step": 10778 }, { "epoch": 0.3147061399667163, "grad_norm": 0.7295723626084806, "learning_rate": 3.807299270072993e-06, "loss": 0.5932, "step": 10779 }, { "epoch": 0.3147353361945637, "grad_norm": 0.729271637917789, "learning_rate": 3.807137064071371e-06, "loss": 0.6382, "step": 10780 }, { "epoch": 0.31476453242241104, "grad_norm": 0.7512096335724683, "learning_rate": 3.806974858069749e-06, "loss": 0.6786, "step": 10781 }, { "epoch": 0.3147937286502584, "grad_norm": 0.7573957682407967, "learning_rate": 3.806812652068127e-06, "loss": 0.6736, "step": 10782 }, { "epoch": 0.31482292487810576, "grad_norm": 0.7373788007208681, "learning_rate": 3.8066504460665047e-06, "loss": 0.657, "step": 10783 }, { "epoch": 0.3148521211059531, "grad_norm": 0.8020279882979994, "learning_rate": 3.8064882400648827e-06, "loss": 0.8359, "step": 10784 }, { "epoch": 0.3148813173338005, "grad_norm": 0.8345681251380208, "learning_rate": 3.8063260340632607e-06, "loss": 0.7693, "step": 10785 }, { "epoch": 0.31491051356164784, "grad_norm": 0.8019539998617553, "learning_rate": 3.8061638280616387e-06, "loss": 0.7253, "step": 10786 }, { "epoch": 0.3149397097894952, "grad_norm": 0.7739079666744065, "learning_rate": 3.8060016220600168e-06, "loss": 0.6867, "step": 10787 }, { "epoch": 0.31496890601734256, "grad_norm": 0.7754285200040173, "learning_rate": 3.8058394160583943e-06, "loss": 0.7206, "step": 10788 }, { "epoch": 0.3149981022451899, "grad_norm": 0.7560446602018019, "learning_rate": 3.8056772100567723e-06, "loss": 0.6855, "step": 10789 }, { "epoch": 0.3150272984730373, "grad_norm": 0.7267019544656691, "learning_rate": 3.8055150040551503e-06, "loss": 0.6222, "step": 10790 }, { "epoch": 0.31505649470088465, "grad_norm": 0.7340272582483283, "learning_rate": 3.8053527980535284e-06, "loss": 0.6599, "step": 10791 }, { "epoch": 0.315085690928732, "grad_norm": 0.7300749977442388, "learning_rate": 3.805190592051906e-06, "loss": 0.6073, "step": 10792 }, { "epoch": 0.31511488715657937, "grad_norm": 0.7514755147106721, "learning_rate": 3.805028386050284e-06, "loss": 0.7245, "step": 10793 }, { "epoch": 0.31514408338442673, "grad_norm": 0.7605509261057082, "learning_rate": 3.804866180048662e-06, "loss": 0.6626, "step": 10794 }, { "epoch": 0.3151732796122741, "grad_norm": 0.772283080157237, "learning_rate": 3.80470397404704e-06, "loss": 0.7129, "step": 10795 }, { "epoch": 0.31520247584012145, "grad_norm": 0.7347581854459043, "learning_rate": 3.8045417680454175e-06, "loss": 0.6558, "step": 10796 }, { "epoch": 0.3152316720679688, "grad_norm": 0.7362664308308962, "learning_rate": 3.8043795620437956e-06, "loss": 0.6905, "step": 10797 }, { "epoch": 0.3152608682958162, "grad_norm": 0.6864462497598435, "learning_rate": 3.804217356042174e-06, "loss": 0.6305, "step": 10798 }, { "epoch": 0.31529006452366354, "grad_norm": 0.7718851911313486, "learning_rate": 3.804055150040552e-06, "loss": 0.697, "step": 10799 }, { "epoch": 0.3153192607515109, "grad_norm": 0.7923915892463123, "learning_rate": 3.80389294403893e-06, "loss": 0.6606, "step": 10800 }, { "epoch": 0.31534845697935826, "grad_norm": 0.754353720572768, "learning_rate": 3.803730738037308e-06, "loss": 0.7446, "step": 10801 }, { "epoch": 0.3153776532072056, "grad_norm": 0.7308003139657085, "learning_rate": 3.8035685320356856e-06, "loss": 0.6279, "step": 10802 }, { "epoch": 0.315406849435053, "grad_norm": 0.7846923553068588, "learning_rate": 3.8034063260340636e-06, "loss": 0.7291, "step": 10803 }, { "epoch": 0.31543604566290034, "grad_norm": 0.707083471444532, "learning_rate": 3.8032441200324416e-06, "loss": 0.614, "step": 10804 }, { "epoch": 0.3154652418907477, "grad_norm": 0.774062274386386, "learning_rate": 3.8030819140308196e-06, "loss": 0.7157, "step": 10805 }, { "epoch": 0.31549443811859507, "grad_norm": 0.7299712094762134, "learning_rate": 3.8029197080291976e-06, "loss": 0.6711, "step": 10806 }, { "epoch": 0.3155236343464424, "grad_norm": 0.7012957335540501, "learning_rate": 3.802757502027575e-06, "loss": 0.5873, "step": 10807 }, { "epoch": 0.3155528305742898, "grad_norm": 0.7237437037820378, "learning_rate": 3.802595296025953e-06, "loss": 0.6388, "step": 10808 }, { "epoch": 0.31558202680213715, "grad_norm": 0.7121461399868059, "learning_rate": 3.802433090024331e-06, "loss": 0.6571, "step": 10809 }, { "epoch": 0.3156112230299845, "grad_norm": 0.7323117161696395, "learning_rate": 3.8022708840227092e-06, "loss": 0.6073, "step": 10810 }, { "epoch": 0.3156404192578319, "grad_norm": 0.7652452308333034, "learning_rate": 3.802108678021087e-06, "loss": 0.7046, "step": 10811 }, { "epoch": 0.31566961548567923, "grad_norm": 0.8094390957135964, "learning_rate": 3.801946472019465e-06, "loss": 0.7171, "step": 10812 }, { "epoch": 0.3156988117135266, "grad_norm": 0.7524052578933556, "learning_rate": 3.801784266017843e-06, "loss": 0.6333, "step": 10813 }, { "epoch": 0.31572800794137396, "grad_norm": 0.7570388279630611, "learning_rate": 3.801622060016221e-06, "loss": 0.6419, "step": 10814 }, { "epoch": 0.3157572041692213, "grad_norm": 0.703835981181705, "learning_rate": 3.8014598540145984e-06, "loss": 0.6047, "step": 10815 }, { "epoch": 0.3157864003970687, "grad_norm": 0.7963407122096147, "learning_rate": 3.8012976480129764e-06, "loss": 0.7048, "step": 10816 }, { "epoch": 0.31581559662491604, "grad_norm": 0.8122334175265048, "learning_rate": 3.801135442011355e-06, "loss": 0.7106, "step": 10817 }, { "epoch": 0.3158447928527634, "grad_norm": 0.7191215641463288, "learning_rate": 3.800973236009733e-06, "loss": 0.6482, "step": 10818 }, { "epoch": 0.31587398908061076, "grad_norm": 0.7017013769108809, "learning_rate": 3.800811030008111e-06, "loss": 0.5702, "step": 10819 }, { "epoch": 0.3159031853084581, "grad_norm": 0.7767741461622437, "learning_rate": 3.800648824006489e-06, "loss": 0.7548, "step": 10820 }, { "epoch": 0.3159323815363055, "grad_norm": 0.7121061792391394, "learning_rate": 3.8004866180048664e-06, "loss": 0.5728, "step": 10821 }, { "epoch": 0.31596157776415285, "grad_norm": 0.7537759174559651, "learning_rate": 3.8003244120032444e-06, "loss": 0.6461, "step": 10822 }, { "epoch": 0.3159907739920002, "grad_norm": 0.7568169906298723, "learning_rate": 3.8001622060016225e-06, "loss": 0.6789, "step": 10823 }, { "epoch": 0.31601997021984757, "grad_norm": 0.763643681592369, "learning_rate": 3.8000000000000005e-06, "loss": 0.7254, "step": 10824 }, { "epoch": 0.316049166447695, "grad_norm": 0.7635147634176332, "learning_rate": 3.7998377939983785e-06, "loss": 0.6237, "step": 10825 }, { "epoch": 0.31607836267554235, "grad_norm": 0.8050240505754436, "learning_rate": 3.799675587996756e-06, "loss": 0.7266, "step": 10826 }, { "epoch": 0.3161075589033897, "grad_norm": 0.7284443622086498, "learning_rate": 3.799513381995134e-06, "loss": 0.6944, "step": 10827 }, { "epoch": 0.31613675513123707, "grad_norm": 0.7593047981209046, "learning_rate": 3.799351175993512e-06, "loss": 0.659, "step": 10828 }, { "epoch": 0.31616595135908443, "grad_norm": 0.7524577294448661, "learning_rate": 3.79918896999189e-06, "loss": 0.6763, "step": 10829 }, { "epoch": 0.3161951475869318, "grad_norm": 0.7989048980533444, "learning_rate": 3.7990267639902677e-06, "loss": 0.7819, "step": 10830 }, { "epoch": 0.31622434381477915, "grad_norm": 0.757986798659861, "learning_rate": 3.7988645579886457e-06, "loss": 0.6722, "step": 10831 }, { "epoch": 0.3162535400426265, "grad_norm": 0.7367182541702089, "learning_rate": 3.7987023519870237e-06, "loss": 0.613, "step": 10832 }, { "epoch": 0.3162827362704739, "grad_norm": 0.7661337519394671, "learning_rate": 3.7985401459854017e-06, "loss": 0.6622, "step": 10833 }, { "epoch": 0.31631193249832124, "grad_norm": 0.8026667176148583, "learning_rate": 3.7983779399837793e-06, "loss": 0.6946, "step": 10834 }, { "epoch": 0.3163411287261686, "grad_norm": 0.7904215844384097, "learning_rate": 3.798215733982158e-06, "loss": 0.5879, "step": 10835 }, { "epoch": 0.31637032495401596, "grad_norm": 0.6999720012874026, "learning_rate": 3.7980535279805357e-06, "loss": 0.6362, "step": 10836 }, { "epoch": 0.3163995211818633, "grad_norm": 0.7009140830464026, "learning_rate": 3.7978913219789137e-06, "loss": 0.6009, "step": 10837 }, { "epoch": 0.3164287174097107, "grad_norm": 0.8038857204402164, "learning_rate": 3.7977291159772917e-06, "loss": 0.7035, "step": 10838 }, { "epoch": 0.31645791363755804, "grad_norm": 0.8936659956726332, "learning_rate": 3.7975669099756697e-06, "loss": 0.644, "step": 10839 }, { "epoch": 0.3164871098654054, "grad_norm": 0.6805501352510956, "learning_rate": 3.7974047039740473e-06, "loss": 0.586, "step": 10840 }, { "epoch": 0.31651630609325276, "grad_norm": 0.7341924371771298, "learning_rate": 3.7972424979724253e-06, "loss": 0.7144, "step": 10841 }, { "epoch": 0.3165455023211001, "grad_norm": 0.8212748823557636, "learning_rate": 3.7970802919708033e-06, "loss": 0.6725, "step": 10842 }, { "epoch": 0.3165746985489475, "grad_norm": 0.7720520068521367, "learning_rate": 3.7969180859691813e-06, "loss": 0.7032, "step": 10843 }, { "epoch": 0.31660389477679485, "grad_norm": 0.712187106573125, "learning_rate": 3.7967558799675593e-06, "loss": 0.6442, "step": 10844 }, { "epoch": 0.3166330910046422, "grad_norm": 0.7373088414063182, "learning_rate": 3.796593673965937e-06, "loss": 0.6675, "step": 10845 }, { "epoch": 0.31666228723248957, "grad_norm": 0.761275181619865, "learning_rate": 3.796431467964315e-06, "loss": 0.6939, "step": 10846 }, { "epoch": 0.31669148346033693, "grad_norm": 0.6680276518008513, "learning_rate": 3.796269261962693e-06, "loss": 0.6277, "step": 10847 }, { "epoch": 0.3167206796881843, "grad_norm": 0.6908584688447844, "learning_rate": 3.796107055961071e-06, "loss": 0.6211, "step": 10848 }, { "epoch": 0.31674987591603165, "grad_norm": 0.7383237963146532, "learning_rate": 3.7959448499594485e-06, "loss": 0.6818, "step": 10849 }, { "epoch": 0.316779072143879, "grad_norm": 0.73851225423161, "learning_rate": 3.7957826439578265e-06, "loss": 0.7143, "step": 10850 }, { "epoch": 0.3168082683717264, "grad_norm": 0.7320675342613051, "learning_rate": 3.7956204379562045e-06, "loss": 0.6096, "step": 10851 }, { "epoch": 0.31683746459957374, "grad_norm": 0.8129611066969147, "learning_rate": 3.7954582319545825e-06, "loss": 0.6975, "step": 10852 }, { "epoch": 0.3168666608274211, "grad_norm": 0.7795459622604461, "learning_rate": 3.79529602595296e-06, "loss": 0.7216, "step": 10853 }, { "epoch": 0.31689585705526846, "grad_norm": 0.7399740261733587, "learning_rate": 3.795133819951339e-06, "loss": 0.6701, "step": 10854 }, { "epoch": 0.3169250532831158, "grad_norm": 0.762601685190365, "learning_rate": 3.7949716139497166e-06, "loss": 0.6445, "step": 10855 }, { "epoch": 0.3169542495109632, "grad_norm": 0.7300523561939981, "learning_rate": 3.7948094079480946e-06, "loss": 0.6485, "step": 10856 }, { "epoch": 0.31698344573881054, "grad_norm": 0.7861825378591413, "learning_rate": 3.7946472019464726e-06, "loss": 0.7987, "step": 10857 }, { "epoch": 0.3170126419666579, "grad_norm": 0.8066512715413128, "learning_rate": 3.7944849959448506e-06, "loss": 0.7456, "step": 10858 }, { "epoch": 0.31704183819450527, "grad_norm": 0.7114287692841876, "learning_rate": 3.794322789943228e-06, "loss": 0.6261, "step": 10859 }, { "epoch": 0.3170710344223526, "grad_norm": 0.7500955294625735, "learning_rate": 3.794160583941606e-06, "loss": 0.7003, "step": 10860 }, { "epoch": 0.3171002306502, "grad_norm": 0.7869238505025852, "learning_rate": 3.793998377939984e-06, "loss": 0.6742, "step": 10861 }, { "epoch": 0.31712942687804735, "grad_norm": 0.7004810967515152, "learning_rate": 3.793836171938362e-06, "loss": 0.5722, "step": 10862 }, { "epoch": 0.3171586231058947, "grad_norm": 0.7324375418253518, "learning_rate": 3.79367396593674e-06, "loss": 0.6302, "step": 10863 }, { "epoch": 0.31718781933374207, "grad_norm": 0.7360458234038153, "learning_rate": 3.7935117599351178e-06, "loss": 0.6682, "step": 10864 }, { "epoch": 0.31721701556158943, "grad_norm": 0.7593440819434615, "learning_rate": 3.7933495539334958e-06, "loss": 0.7235, "step": 10865 }, { "epoch": 0.3172462117894368, "grad_norm": 0.7363486159148557, "learning_rate": 3.7931873479318738e-06, "loss": 0.6625, "step": 10866 }, { "epoch": 0.31727540801728416, "grad_norm": 0.6716855885566434, "learning_rate": 3.793025141930252e-06, "loss": 0.581, "step": 10867 }, { "epoch": 0.3173046042451315, "grad_norm": 0.7165339029455783, "learning_rate": 3.7928629359286294e-06, "loss": 0.604, "step": 10868 }, { "epoch": 0.3173338004729789, "grad_norm": 0.7244700194620655, "learning_rate": 3.7927007299270074e-06, "loss": 0.6098, "step": 10869 }, { "epoch": 0.31736299670082624, "grad_norm": 0.7780146616091663, "learning_rate": 3.7925385239253854e-06, "loss": 0.703, "step": 10870 }, { "epoch": 0.3173921929286736, "grad_norm": 0.7463077027104399, "learning_rate": 3.7923763179237634e-06, "loss": 0.6454, "step": 10871 }, { "epoch": 0.31742138915652096, "grad_norm": 0.7252466472715151, "learning_rate": 3.792214111922141e-06, "loss": 0.6718, "step": 10872 }, { "epoch": 0.3174505853843683, "grad_norm": 0.7936897308885632, "learning_rate": 3.79205190592052e-06, "loss": 0.7759, "step": 10873 }, { "epoch": 0.3174797816122157, "grad_norm": 0.7494599554132914, "learning_rate": 3.7918896999188974e-06, "loss": 0.7133, "step": 10874 }, { "epoch": 0.31750897784006304, "grad_norm": 0.7357361006786022, "learning_rate": 3.7917274939172754e-06, "loss": 0.6886, "step": 10875 }, { "epoch": 0.3175381740679104, "grad_norm": 0.7381194537321105, "learning_rate": 3.7915652879156534e-06, "loss": 0.6874, "step": 10876 }, { "epoch": 0.31756737029575777, "grad_norm": 0.7405572722882054, "learning_rate": 3.7914030819140314e-06, "loss": 0.6952, "step": 10877 }, { "epoch": 0.31759656652360513, "grad_norm": 0.7320302312448639, "learning_rate": 3.791240875912409e-06, "loss": 0.5887, "step": 10878 }, { "epoch": 0.3176257627514525, "grad_norm": 0.7546589332569391, "learning_rate": 3.791078669910787e-06, "loss": 0.6353, "step": 10879 }, { "epoch": 0.31765495897929985, "grad_norm": 0.717509602707278, "learning_rate": 3.790916463909165e-06, "loss": 0.5946, "step": 10880 }, { "epoch": 0.3176841552071472, "grad_norm": 0.7180528105390651, "learning_rate": 3.790754257907543e-06, "loss": 0.6107, "step": 10881 }, { "epoch": 0.3177133514349946, "grad_norm": 0.6632298151581444, "learning_rate": 3.7905920519059206e-06, "loss": 0.5455, "step": 10882 }, { "epoch": 0.31774254766284193, "grad_norm": 0.730080321142526, "learning_rate": 3.7904298459042986e-06, "loss": 0.6849, "step": 10883 }, { "epoch": 0.3177717438906893, "grad_norm": 0.7570576667012928, "learning_rate": 3.7902676399026766e-06, "loss": 0.6646, "step": 10884 }, { "epoch": 0.3178009401185367, "grad_norm": 0.7156743536529799, "learning_rate": 3.7901054339010546e-06, "loss": 0.6504, "step": 10885 }, { "epoch": 0.3178301363463841, "grad_norm": 0.777656339233435, "learning_rate": 3.7899432278994326e-06, "loss": 0.7101, "step": 10886 }, { "epoch": 0.31785933257423143, "grad_norm": 0.7034045340501335, "learning_rate": 3.7897810218978102e-06, "loss": 0.5898, "step": 10887 }, { "epoch": 0.3178885288020788, "grad_norm": 0.7548752746926803, "learning_rate": 3.7896188158961882e-06, "loss": 0.7219, "step": 10888 }, { "epoch": 0.31791772502992616, "grad_norm": 0.7490810174233774, "learning_rate": 3.7894566098945662e-06, "loss": 0.6751, "step": 10889 }, { "epoch": 0.3179469212577735, "grad_norm": 0.7331941517368198, "learning_rate": 3.7892944038929443e-06, "loss": 0.6418, "step": 10890 }, { "epoch": 0.3179761174856209, "grad_norm": 0.7182926382623163, "learning_rate": 3.789132197891322e-06, "loss": 0.6247, "step": 10891 }, { "epoch": 0.31800531371346824, "grad_norm": 0.8469332180693312, "learning_rate": 3.7889699918897007e-06, "loss": 0.7839, "step": 10892 }, { "epoch": 0.3180345099413156, "grad_norm": 0.6986004763916618, "learning_rate": 3.7888077858880783e-06, "loss": 0.6118, "step": 10893 }, { "epoch": 0.31806370616916296, "grad_norm": 0.6838597908143321, "learning_rate": 3.7886455798864563e-06, "loss": 0.6044, "step": 10894 }, { "epoch": 0.3180929023970103, "grad_norm": 0.811506613701888, "learning_rate": 3.7884833738848343e-06, "loss": 0.7479, "step": 10895 }, { "epoch": 0.3181220986248577, "grad_norm": 0.7076350162712646, "learning_rate": 3.7883211678832123e-06, "loss": 0.5988, "step": 10896 }, { "epoch": 0.31815129485270505, "grad_norm": 0.7875015872301262, "learning_rate": 3.78815896188159e-06, "loss": 0.7059, "step": 10897 }, { "epoch": 0.3181804910805524, "grad_norm": 0.7183773077268875, "learning_rate": 3.787996755879968e-06, "loss": 0.656, "step": 10898 }, { "epoch": 0.31820968730839977, "grad_norm": 0.7702195956834191, "learning_rate": 3.787834549878346e-06, "loss": 0.6636, "step": 10899 }, { "epoch": 0.31823888353624713, "grad_norm": 0.8030492371356556, "learning_rate": 3.787672343876724e-06, "loss": 0.6708, "step": 10900 }, { "epoch": 0.3182680797640945, "grad_norm": 0.759282907652931, "learning_rate": 3.7875101378751015e-06, "loss": 0.6506, "step": 10901 }, { "epoch": 0.31829727599194185, "grad_norm": 0.8419615671535056, "learning_rate": 3.7873479318734795e-06, "loss": 0.7606, "step": 10902 }, { "epoch": 0.3183264722197892, "grad_norm": 0.6901729740069817, "learning_rate": 3.7871857258718575e-06, "loss": 0.5683, "step": 10903 }, { "epoch": 0.3183556684476366, "grad_norm": 0.7721047316235677, "learning_rate": 3.7870235198702355e-06, "loss": 0.7247, "step": 10904 }, { "epoch": 0.31838486467548394, "grad_norm": 0.7745051567631798, "learning_rate": 3.7868613138686135e-06, "loss": 0.6953, "step": 10905 }, { "epoch": 0.3184140609033313, "grad_norm": 0.7071570749615836, "learning_rate": 3.786699107866991e-06, "loss": 0.6427, "step": 10906 }, { "epoch": 0.31844325713117866, "grad_norm": 0.7899598681123422, "learning_rate": 3.786536901865369e-06, "loss": 0.7345, "step": 10907 }, { "epoch": 0.318472453359026, "grad_norm": 0.725849662448816, "learning_rate": 3.786374695863747e-06, "loss": 0.6069, "step": 10908 }, { "epoch": 0.3185016495868734, "grad_norm": 0.7786154406543904, "learning_rate": 3.786212489862125e-06, "loss": 0.7713, "step": 10909 }, { "epoch": 0.31853084581472074, "grad_norm": 0.8385919007969768, "learning_rate": 3.7860502838605027e-06, "loss": 0.7825, "step": 10910 }, { "epoch": 0.3185600420425681, "grad_norm": 0.8075196094909404, "learning_rate": 3.7858880778588815e-06, "loss": 0.6536, "step": 10911 }, { "epoch": 0.31858923827041546, "grad_norm": 0.7173421053369963, "learning_rate": 3.785725871857259e-06, "loss": 0.6622, "step": 10912 }, { "epoch": 0.3186184344982628, "grad_norm": 0.7519455963930244, "learning_rate": 3.785563665855637e-06, "loss": 0.6607, "step": 10913 }, { "epoch": 0.3186476307261102, "grad_norm": 0.7150028149647878, "learning_rate": 3.785401459854015e-06, "loss": 0.6276, "step": 10914 }, { "epoch": 0.31867682695395755, "grad_norm": 0.696300424366585, "learning_rate": 3.785239253852393e-06, "loss": 0.5898, "step": 10915 }, { "epoch": 0.3187060231818049, "grad_norm": 0.7393103249718938, "learning_rate": 3.7850770478507707e-06, "loss": 0.6842, "step": 10916 }, { "epoch": 0.31873521940965227, "grad_norm": 0.8563441787476068, "learning_rate": 3.7849148418491487e-06, "loss": 0.7224, "step": 10917 }, { "epoch": 0.31876441563749963, "grad_norm": 0.7917096522745997, "learning_rate": 3.7847526358475267e-06, "loss": 0.7711, "step": 10918 }, { "epoch": 0.318793611865347, "grad_norm": 0.7980272391170494, "learning_rate": 3.7845904298459048e-06, "loss": 0.7064, "step": 10919 }, { "epoch": 0.31882280809319435, "grad_norm": 0.7419972239693858, "learning_rate": 3.7844282238442823e-06, "loss": 0.6912, "step": 10920 }, { "epoch": 0.3188520043210417, "grad_norm": 0.7069000858738653, "learning_rate": 3.7842660178426603e-06, "loss": 0.6674, "step": 10921 }, { "epoch": 0.3188812005488891, "grad_norm": 0.7163246616339222, "learning_rate": 3.7841038118410384e-06, "loss": 0.629, "step": 10922 }, { "epoch": 0.31891039677673644, "grad_norm": 0.8116292234222686, "learning_rate": 3.7839416058394164e-06, "loss": 0.7308, "step": 10923 }, { "epoch": 0.3189395930045838, "grad_norm": 0.7756381007194388, "learning_rate": 3.7837793998377944e-06, "loss": 0.6375, "step": 10924 }, { "epoch": 0.31896878923243116, "grad_norm": 0.7331186436729062, "learning_rate": 3.783617193836172e-06, "loss": 0.5824, "step": 10925 }, { "epoch": 0.3189979854602785, "grad_norm": 0.8551101885611682, "learning_rate": 3.78345498783455e-06, "loss": 0.7135, "step": 10926 }, { "epoch": 0.3190271816881259, "grad_norm": 0.6989021608891608, "learning_rate": 3.783292781832928e-06, "loss": 0.6206, "step": 10927 }, { "epoch": 0.31905637791597324, "grad_norm": 0.7441760269805906, "learning_rate": 3.783130575831306e-06, "loss": 0.6664, "step": 10928 }, { "epoch": 0.3190855741438206, "grad_norm": 0.7019051592831351, "learning_rate": 3.7829683698296836e-06, "loss": 0.623, "step": 10929 }, { "epoch": 0.31911477037166797, "grad_norm": 0.7174559984064243, "learning_rate": 3.7828061638280624e-06, "loss": 0.6139, "step": 10930 }, { "epoch": 0.3191439665995153, "grad_norm": 0.7484258347382219, "learning_rate": 3.78264395782644e-06, "loss": 0.718, "step": 10931 }, { "epoch": 0.3191731628273627, "grad_norm": 0.7158700973362181, "learning_rate": 3.782481751824818e-06, "loss": 0.648, "step": 10932 }, { "epoch": 0.31920235905521005, "grad_norm": 0.7022532630283833, "learning_rate": 3.782319545823196e-06, "loss": 0.5807, "step": 10933 }, { "epoch": 0.3192315552830574, "grad_norm": 0.7510793970649725, "learning_rate": 3.782157339821574e-06, "loss": 0.6677, "step": 10934 }, { "epoch": 0.31926075151090477, "grad_norm": 0.7307515262494972, "learning_rate": 3.7819951338199516e-06, "loss": 0.6614, "step": 10935 }, { "epoch": 0.31928994773875213, "grad_norm": 0.7830399563724436, "learning_rate": 3.7818329278183296e-06, "loss": 0.6676, "step": 10936 }, { "epoch": 0.3193191439665995, "grad_norm": 0.8981761489148099, "learning_rate": 3.7816707218167076e-06, "loss": 0.7675, "step": 10937 }, { "epoch": 0.31934834019444686, "grad_norm": 0.6903414348192027, "learning_rate": 3.7815085158150856e-06, "loss": 0.5856, "step": 10938 }, { "epoch": 0.3193775364222942, "grad_norm": 0.6822929606036966, "learning_rate": 3.781346309813463e-06, "loss": 0.5962, "step": 10939 }, { "epoch": 0.3194067326501416, "grad_norm": 0.6649149621299784, "learning_rate": 3.781184103811841e-06, "loss": 0.5579, "step": 10940 }, { "epoch": 0.31943592887798894, "grad_norm": 0.688667247293653, "learning_rate": 3.7810218978102192e-06, "loss": 0.5498, "step": 10941 }, { "epoch": 0.3194651251058363, "grad_norm": 0.7361710706753275, "learning_rate": 3.7808596918085972e-06, "loss": 0.6437, "step": 10942 }, { "epoch": 0.31949432133368366, "grad_norm": 0.7530888210038007, "learning_rate": 3.7806974858069752e-06, "loss": 0.7186, "step": 10943 }, { "epoch": 0.319523517561531, "grad_norm": 0.7457710664623277, "learning_rate": 3.780535279805353e-06, "loss": 0.6322, "step": 10944 }, { "epoch": 0.31955271378937844, "grad_norm": 0.791047730003969, "learning_rate": 3.780373073803731e-06, "loss": 0.7164, "step": 10945 }, { "epoch": 0.3195819100172258, "grad_norm": 0.7759962590236938, "learning_rate": 3.780210867802109e-06, "loss": 0.6675, "step": 10946 }, { "epoch": 0.31961110624507316, "grad_norm": 0.7626876322475487, "learning_rate": 3.780048661800487e-06, "loss": 0.6654, "step": 10947 }, { "epoch": 0.3196403024729205, "grad_norm": 0.742769422859685, "learning_rate": 3.7798864557988644e-06, "loss": 0.7339, "step": 10948 }, { "epoch": 0.3196694987007679, "grad_norm": 0.7540177890425019, "learning_rate": 3.7797242497972433e-06, "loss": 0.6611, "step": 10949 }, { "epoch": 0.31969869492861525, "grad_norm": 0.7560299120386034, "learning_rate": 3.779562043795621e-06, "loss": 0.6915, "step": 10950 }, { "epoch": 0.3197278911564626, "grad_norm": 0.6824418610699661, "learning_rate": 3.779399837793999e-06, "loss": 0.6074, "step": 10951 }, { "epoch": 0.31975708738430997, "grad_norm": 0.697228026269156, "learning_rate": 3.779237631792377e-06, "loss": 0.5958, "step": 10952 }, { "epoch": 0.31978628361215733, "grad_norm": 0.7474892499198489, "learning_rate": 3.779075425790755e-06, "loss": 0.7191, "step": 10953 }, { "epoch": 0.3198154798400047, "grad_norm": 0.7462482050366797, "learning_rate": 3.7789132197891325e-06, "loss": 0.6685, "step": 10954 }, { "epoch": 0.31984467606785205, "grad_norm": 0.6699006975718471, "learning_rate": 3.7787510137875105e-06, "loss": 0.5751, "step": 10955 }, { "epoch": 0.3198738722956994, "grad_norm": 0.751465219164698, "learning_rate": 3.7785888077858885e-06, "loss": 0.7319, "step": 10956 }, { "epoch": 0.3199030685235468, "grad_norm": 0.705990902905909, "learning_rate": 3.7784266017842665e-06, "loss": 0.627, "step": 10957 }, { "epoch": 0.31993226475139414, "grad_norm": 0.8528331245174423, "learning_rate": 3.778264395782644e-06, "loss": 0.6963, "step": 10958 }, { "epoch": 0.3199614609792415, "grad_norm": 0.7206471271579711, "learning_rate": 3.778102189781022e-06, "loss": 0.6121, "step": 10959 }, { "epoch": 0.31999065720708886, "grad_norm": 1.2227227488313666, "learning_rate": 3.7779399837794e-06, "loss": 0.714, "step": 10960 }, { "epoch": 0.3200198534349362, "grad_norm": 0.7852794550800888, "learning_rate": 3.777777777777778e-06, "loss": 0.7146, "step": 10961 }, { "epoch": 0.3200490496627836, "grad_norm": 0.7123429716078272, "learning_rate": 3.777615571776156e-06, "loss": 0.6978, "step": 10962 }, { "epoch": 0.32007824589063094, "grad_norm": 1.0215958406426502, "learning_rate": 3.7774533657745337e-06, "loss": 0.7693, "step": 10963 }, { "epoch": 0.3201074421184783, "grad_norm": 0.7981561338782129, "learning_rate": 3.7772911597729117e-06, "loss": 0.7675, "step": 10964 }, { "epoch": 0.32013663834632566, "grad_norm": 0.777434682670319, "learning_rate": 3.7771289537712897e-06, "loss": 0.6398, "step": 10965 }, { "epoch": 0.320165834574173, "grad_norm": 0.7229452485528073, "learning_rate": 3.7769667477696677e-06, "loss": 0.6209, "step": 10966 }, { "epoch": 0.3201950308020204, "grad_norm": 0.7723056147441673, "learning_rate": 3.7768045417680453e-06, "loss": 0.6687, "step": 10967 }, { "epoch": 0.32022422702986775, "grad_norm": 0.7595381489497026, "learning_rate": 3.776642335766424e-06, "loss": 0.6594, "step": 10968 }, { "epoch": 0.3202534232577151, "grad_norm": 0.8557252694383931, "learning_rate": 3.7764801297648017e-06, "loss": 0.765, "step": 10969 }, { "epoch": 0.32028261948556247, "grad_norm": 0.700742941772938, "learning_rate": 3.7763179237631797e-06, "loss": 0.6511, "step": 10970 }, { "epoch": 0.32031181571340983, "grad_norm": 0.746772887253571, "learning_rate": 3.7761557177615577e-06, "loss": 0.5814, "step": 10971 }, { "epoch": 0.3203410119412572, "grad_norm": 0.7209986695607702, "learning_rate": 3.7759935117599357e-06, "loss": 0.6236, "step": 10972 }, { "epoch": 0.32037020816910455, "grad_norm": 0.7179869679680023, "learning_rate": 3.7758313057583133e-06, "loss": 0.6255, "step": 10973 }, { "epoch": 0.3203994043969519, "grad_norm": 0.7134519361630721, "learning_rate": 3.7756690997566913e-06, "loss": 0.6153, "step": 10974 }, { "epoch": 0.3204286006247993, "grad_norm": 0.7970575732425559, "learning_rate": 3.7755068937550693e-06, "loss": 0.7037, "step": 10975 }, { "epoch": 0.32045779685264664, "grad_norm": 0.6866792381880172, "learning_rate": 3.7753446877534473e-06, "loss": 0.5989, "step": 10976 }, { "epoch": 0.320486993080494, "grad_norm": 0.729312193233997, "learning_rate": 3.775182481751825e-06, "loss": 0.6639, "step": 10977 }, { "epoch": 0.32051618930834136, "grad_norm": 0.7146512258070186, "learning_rate": 3.775020275750203e-06, "loss": 0.6615, "step": 10978 }, { "epoch": 0.3205453855361887, "grad_norm": 0.9716816705281135, "learning_rate": 3.774858069748581e-06, "loss": 0.6241, "step": 10979 }, { "epoch": 0.3205745817640361, "grad_norm": 0.839100084219604, "learning_rate": 3.774695863746959e-06, "loss": 0.7272, "step": 10980 }, { "epoch": 0.32060377799188344, "grad_norm": 0.7492982843084861, "learning_rate": 3.774533657745337e-06, "loss": 0.6511, "step": 10981 }, { "epoch": 0.3206329742197308, "grad_norm": 0.7608774537709225, "learning_rate": 3.7743714517437145e-06, "loss": 0.7019, "step": 10982 }, { "epoch": 0.32066217044757817, "grad_norm": 0.735893855320944, "learning_rate": 3.7742092457420925e-06, "loss": 0.6529, "step": 10983 }, { "epoch": 0.3206913666754255, "grad_norm": 0.8599042729289736, "learning_rate": 3.7740470397404705e-06, "loss": 0.728, "step": 10984 }, { "epoch": 0.3207205629032729, "grad_norm": 0.7323309408325637, "learning_rate": 3.7738848337388485e-06, "loss": 0.6633, "step": 10985 }, { "epoch": 0.32074975913112025, "grad_norm": 0.7411467050541267, "learning_rate": 3.773722627737227e-06, "loss": 0.7178, "step": 10986 }, { "epoch": 0.3207789553589676, "grad_norm": 0.7584612710014479, "learning_rate": 3.773560421735605e-06, "loss": 0.7057, "step": 10987 }, { "epoch": 0.32080815158681497, "grad_norm": 0.7737065385750727, "learning_rate": 3.7733982157339826e-06, "loss": 0.6204, "step": 10988 }, { "epoch": 0.32083734781466233, "grad_norm": 0.7426413384308533, "learning_rate": 3.7732360097323606e-06, "loss": 0.6433, "step": 10989 }, { "epoch": 0.3208665440425097, "grad_norm": 0.7112915609602141, "learning_rate": 3.7730738037307386e-06, "loss": 0.6455, "step": 10990 }, { "epoch": 0.32089574027035705, "grad_norm": 0.6805864340715656, "learning_rate": 3.7729115977291166e-06, "loss": 0.5773, "step": 10991 }, { "epoch": 0.3209249364982044, "grad_norm": 0.7208797373333872, "learning_rate": 3.772749391727494e-06, "loss": 0.6305, "step": 10992 }, { "epoch": 0.3209541327260518, "grad_norm": 0.7333887419644234, "learning_rate": 3.772587185725872e-06, "loss": 0.6455, "step": 10993 }, { "epoch": 0.32098332895389914, "grad_norm": 0.7219212415059368, "learning_rate": 3.77242497972425e-06, "loss": 0.6478, "step": 10994 }, { "epoch": 0.3210125251817465, "grad_norm": 0.69995709390038, "learning_rate": 3.772262773722628e-06, "loss": 0.6578, "step": 10995 }, { "epoch": 0.32104172140959386, "grad_norm": 0.7548866219929887, "learning_rate": 3.7721005677210058e-06, "loss": 0.6497, "step": 10996 }, { "epoch": 0.3210709176374412, "grad_norm": 0.8343390270918598, "learning_rate": 3.7719383617193838e-06, "loss": 0.6459, "step": 10997 }, { "epoch": 0.3211001138652886, "grad_norm": 0.762905594999226, "learning_rate": 3.7717761557177618e-06, "loss": 0.6939, "step": 10998 }, { "epoch": 0.32112931009313594, "grad_norm": 0.7674471859133025, "learning_rate": 3.77161394971614e-06, "loss": 0.5857, "step": 10999 }, { "epoch": 0.3211585063209833, "grad_norm": 0.7122550030874834, "learning_rate": 3.771451743714518e-06, "loss": 0.6445, "step": 11000 }, { "epoch": 0.32118770254883067, "grad_norm": 0.7693417161287085, "learning_rate": 3.7712895377128954e-06, "loss": 0.6641, "step": 11001 }, { "epoch": 0.32121689877667803, "grad_norm": 0.7404791880477133, "learning_rate": 3.7711273317112734e-06, "loss": 0.7389, "step": 11002 }, { "epoch": 0.3212460950045254, "grad_norm": 0.7085974322342266, "learning_rate": 3.7709651257096514e-06, "loss": 0.6264, "step": 11003 }, { "epoch": 0.32127529123237275, "grad_norm": 0.7099634674774501, "learning_rate": 3.7708029197080294e-06, "loss": 0.6646, "step": 11004 }, { "epoch": 0.32130448746022017, "grad_norm": 0.748890420004357, "learning_rate": 3.770640713706408e-06, "loss": 0.6517, "step": 11005 }, { "epoch": 0.32133368368806753, "grad_norm": 0.7301160521328486, "learning_rate": 3.7704785077047854e-06, "loss": 0.6144, "step": 11006 }, { "epoch": 0.3213628799159149, "grad_norm": 0.7123949852253952, "learning_rate": 3.7703163017031634e-06, "loss": 0.6797, "step": 11007 }, { "epoch": 0.32139207614376225, "grad_norm": 0.7119210172147443, "learning_rate": 3.7701540957015414e-06, "loss": 0.6274, "step": 11008 }, { "epoch": 0.3214212723716096, "grad_norm": 0.704362960477629, "learning_rate": 3.7699918896999194e-06, "loss": 0.6359, "step": 11009 }, { "epoch": 0.321450468599457, "grad_norm": 0.7222891801322336, "learning_rate": 3.7698296836982974e-06, "loss": 0.6469, "step": 11010 }, { "epoch": 0.32147966482730433, "grad_norm": 0.7380314136452524, "learning_rate": 3.769667477696675e-06, "loss": 0.6628, "step": 11011 }, { "epoch": 0.3215088610551517, "grad_norm": 0.7890941748422226, "learning_rate": 3.769505271695053e-06, "loss": 0.7656, "step": 11012 }, { "epoch": 0.32153805728299906, "grad_norm": 0.7179471211566276, "learning_rate": 3.769343065693431e-06, "loss": 0.6573, "step": 11013 }, { "epoch": 0.3215672535108464, "grad_norm": 0.7847702744859915, "learning_rate": 3.769180859691809e-06, "loss": 0.6255, "step": 11014 }, { "epoch": 0.3215964497386938, "grad_norm": 0.6965714966945149, "learning_rate": 3.7690186536901866e-06, "loss": 0.612, "step": 11015 }, { "epoch": 0.32162564596654114, "grad_norm": 0.813711553270801, "learning_rate": 3.7688564476885646e-06, "loss": 0.6764, "step": 11016 }, { "epoch": 0.3216548421943885, "grad_norm": 0.7741587653602858, "learning_rate": 3.7686942416869426e-06, "loss": 0.7311, "step": 11017 }, { "epoch": 0.32168403842223586, "grad_norm": 0.747754370035513, "learning_rate": 3.7685320356853207e-06, "loss": 0.7416, "step": 11018 }, { "epoch": 0.3217132346500832, "grad_norm": 0.6966158583693609, "learning_rate": 3.7683698296836987e-06, "loss": 0.6366, "step": 11019 }, { "epoch": 0.3217424308779306, "grad_norm": 0.8363571601958696, "learning_rate": 3.7682076236820762e-06, "loss": 0.722, "step": 11020 }, { "epoch": 0.32177162710577795, "grad_norm": 0.7176493607923666, "learning_rate": 3.7680454176804543e-06, "loss": 0.6501, "step": 11021 }, { "epoch": 0.3218008233336253, "grad_norm": 0.7100286869560586, "learning_rate": 3.7678832116788323e-06, "loss": 0.597, "step": 11022 }, { "epoch": 0.32183001956147267, "grad_norm": 0.6894861570940486, "learning_rate": 3.7677210056772103e-06, "loss": 0.5921, "step": 11023 }, { "epoch": 0.32185921578932003, "grad_norm": 0.7642398607201919, "learning_rate": 3.7675587996755887e-06, "loss": 0.713, "step": 11024 }, { "epoch": 0.3218884120171674, "grad_norm": 0.7318603340348033, "learning_rate": 3.7673965936739663e-06, "loss": 0.6186, "step": 11025 }, { "epoch": 0.32191760824501475, "grad_norm": 0.7233004692432096, "learning_rate": 3.7672343876723443e-06, "loss": 0.6405, "step": 11026 }, { "epoch": 0.3219468044728621, "grad_norm": 0.7487089208214848, "learning_rate": 3.7670721816707223e-06, "loss": 0.6555, "step": 11027 }, { "epoch": 0.3219760007007095, "grad_norm": 0.7354157469668757, "learning_rate": 3.7669099756691003e-06, "loss": 0.6784, "step": 11028 }, { "epoch": 0.32200519692855684, "grad_norm": 0.7880220304183475, "learning_rate": 3.7667477696674783e-06, "loss": 0.7477, "step": 11029 }, { "epoch": 0.3220343931564042, "grad_norm": 0.7461074586041903, "learning_rate": 3.766585563665856e-06, "loss": 0.7089, "step": 11030 }, { "epoch": 0.32206358938425156, "grad_norm": 0.765229827827463, "learning_rate": 3.766423357664234e-06, "loss": 0.7213, "step": 11031 }, { "epoch": 0.3220927856120989, "grad_norm": 0.7246069879443408, "learning_rate": 3.766261151662612e-06, "loss": 0.6319, "step": 11032 }, { "epoch": 0.3221219818399463, "grad_norm": 0.7521961860735832, "learning_rate": 3.76609894566099e-06, "loss": 0.6947, "step": 11033 }, { "epoch": 0.32215117806779364, "grad_norm": 0.7356333440303102, "learning_rate": 3.7659367396593675e-06, "loss": 0.6293, "step": 11034 }, { "epoch": 0.322180374295641, "grad_norm": 0.7144207768861405, "learning_rate": 3.7657745336577455e-06, "loss": 0.6012, "step": 11035 }, { "epoch": 0.32220957052348836, "grad_norm": 0.7846908525595551, "learning_rate": 3.7656123276561235e-06, "loss": 0.6652, "step": 11036 }, { "epoch": 0.3222387667513357, "grad_norm": 0.7020149240078066, "learning_rate": 3.7654501216545015e-06, "loss": 0.6157, "step": 11037 }, { "epoch": 0.3222679629791831, "grad_norm": 0.725461941902266, "learning_rate": 3.7652879156528795e-06, "loss": 0.6129, "step": 11038 }, { "epoch": 0.32229715920703045, "grad_norm": 0.823212714281545, "learning_rate": 3.765125709651257e-06, "loss": 0.603, "step": 11039 }, { "epoch": 0.3223263554348778, "grad_norm": 0.7837507087612823, "learning_rate": 3.764963503649635e-06, "loss": 0.7247, "step": 11040 }, { "epoch": 0.32235555166272517, "grad_norm": 0.7942609347191211, "learning_rate": 3.764801297648013e-06, "loss": 0.689, "step": 11041 }, { "epoch": 0.32238474789057253, "grad_norm": 0.718997046651088, "learning_rate": 3.764639091646391e-06, "loss": 0.6418, "step": 11042 }, { "epoch": 0.3224139441184199, "grad_norm": 0.8445346042201934, "learning_rate": 3.7644768856447696e-06, "loss": 0.7938, "step": 11043 }, { "epoch": 0.32244314034626725, "grad_norm": 0.7781940631728568, "learning_rate": 3.764314679643147e-06, "loss": 0.7219, "step": 11044 }, { "epoch": 0.3224723365741146, "grad_norm": 0.7192470404230551, "learning_rate": 3.764152473641525e-06, "loss": 0.6969, "step": 11045 }, { "epoch": 0.322501532801962, "grad_norm": 0.7266750101189902, "learning_rate": 3.763990267639903e-06, "loss": 0.6703, "step": 11046 }, { "epoch": 0.32253072902980934, "grad_norm": 0.7419108127408561, "learning_rate": 3.763828061638281e-06, "loss": 0.6147, "step": 11047 }, { "epoch": 0.3225599252576567, "grad_norm": 0.9574296432558506, "learning_rate": 3.763665855636659e-06, "loss": 0.7129, "step": 11048 }, { "epoch": 0.32258912148550406, "grad_norm": 0.7277500237994288, "learning_rate": 3.7635036496350367e-06, "loss": 0.6991, "step": 11049 }, { "epoch": 0.3226183177133514, "grad_norm": 0.769201538658914, "learning_rate": 3.7633414436334148e-06, "loss": 0.6781, "step": 11050 }, { "epoch": 0.3226475139411988, "grad_norm": 0.685659464413191, "learning_rate": 3.7631792376317928e-06, "loss": 0.5991, "step": 11051 }, { "epoch": 0.32267671016904614, "grad_norm": 0.877978573500968, "learning_rate": 3.7630170316301708e-06, "loss": 0.7019, "step": 11052 }, { "epoch": 0.3227059063968935, "grad_norm": 0.7438611838701181, "learning_rate": 3.7628548256285484e-06, "loss": 0.6835, "step": 11053 }, { "epoch": 0.32273510262474087, "grad_norm": 0.6944906266828619, "learning_rate": 3.7626926196269264e-06, "loss": 0.5905, "step": 11054 }, { "epoch": 0.3227642988525882, "grad_norm": 0.7441156376221829, "learning_rate": 3.7625304136253044e-06, "loss": 0.6567, "step": 11055 }, { "epoch": 0.3227934950804356, "grad_norm": 0.7680290359863747, "learning_rate": 3.7623682076236824e-06, "loss": 0.674, "step": 11056 }, { "epoch": 0.32282269130828295, "grad_norm": 0.7588173067033602, "learning_rate": 3.7622060016220604e-06, "loss": 0.7054, "step": 11057 }, { "epoch": 0.3228518875361303, "grad_norm": 0.6738076668142101, "learning_rate": 3.762043795620438e-06, "loss": 0.5613, "step": 11058 }, { "epoch": 0.32288108376397767, "grad_norm": 0.7061406893673361, "learning_rate": 3.761881589618816e-06, "loss": 0.643, "step": 11059 }, { "epoch": 0.32291027999182503, "grad_norm": 0.6616271272786227, "learning_rate": 3.761719383617194e-06, "loss": 0.5459, "step": 11060 }, { "epoch": 0.3229394762196724, "grad_norm": 0.7048790392548947, "learning_rate": 3.761557177615572e-06, "loss": 0.6612, "step": 11061 }, { "epoch": 0.32296867244751976, "grad_norm": 0.7260660555638261, "learning_rate": 3.7613949716139504e-06, "loss": 0.7275, "step": 11062 }, { "epoch": 0.3229978686753671, "grad_norm": 0.7758680533959276, "learning_rate": 3.761232765612328e-06, "loss": 0.725, "step": 11063 }, { "epoch": 0.3230270649032145, "grad_norm": 0.9002615988411946, "learning_rate": 3.761070559610706e-06, "loss": 0.78, "step": 11064 }, { "epoch": 0.32305626113106184, "grad_norm": 0.7499571443320643, "learning_rate": 3.760908353609084e-06, "loss": 0.7306, "step": 11065 }, { "epoch": 0.32308545735890926, "grad_norm": 0.7588918765487636, "learning_rate": 3.760746147607462e-06, "loss": 0.6809, "step": 11066 }, { "epoch": 0.3231146535867566, "grad_norm": 0.7562363794386915, "learning_rate": 3.76058394160584e-06, "loss": 0.6904, "step": 11067 }, { "epoch": 0.323143849814604, "grad_norm": 0.7830102968614143, "learning_rate": 3.7604217356042176e-06, "loss": 0.6539, "step": 11068 }, { "epoch": 0.32317304604245134, "grad_norm": 0.7819393521983634, "learning_rate": 3.7602595296025956e-06, "loss": 0.776, "step": 11069 }, { "epoch": 0.3232022422702987, "grad_norm": 0.8674851143299898, "learning_rate": 3.7600973236009736e-06, "loss": 0.7262, "step": 11070 }, { "epoch": 0.32323143849814606, "grad_norm": 0.751165217007835, "learning_rate": 3.7599351175993516e-06, "loss": 0.6756, "step": 11071 }, { "epoch": 0.3232606347259934, "grad_norm": 0.6709455825700908, "learning_rate": 3.759772911597729e-06, "loss": 0.5725, "step": 11072 }, { "epoch": 0.3232898309538408, "grad_norm": 0.7344785892663546, "learning_rate": 3.7596107055961072e-06, "loss": 0.6777, "step": 11073 }, { "epoch": 0.32331902718168815, "grad_norm": 0.8158139049957093, "learning_rate": 3.7594484995944852e-06, "loss": 0.7142, "step": 11074 }, { "epoch": 0.3233482234095355, "grad_norm": 0.7986065252169239, "learning_rate": 3.7592862935928632e-06, "loss": 0.8019, "step": 11075 }, { "epoch": 0.32337741963738287, "grad_norm": 0.7760997461872841, "learning_rate": 3.7591240875912412e-06, "loss": 0.6781, "step": 11076 }, { "epoch": 0.32340661586523023, "grad_norm": 0.765566856581941, "learning_rate": 3.758961881589619e-06, "loss": 0.7387, "step": 11077 }, { "epoch": 0.3234358120930776, "grad_norm": 0.7214055686240223, "learning_rate": 3.758799675587997e-06, "loss": 0.662, "step": 11078 }, { "epoch": 0.32346500832092495, "grad_norm": 0.7250800132564068, "learning_rate": 3.758637469586375e-06, "loss": 0.6561, "step": 11079 }, { "epoch": 0.3234942045487723, "grad_norm": 0.773892492564181, "learning_rate": 3.758475263584753e-06, "loss": 0.6799, "step": 11080 }, { "epoch": 0.3235234007766197, "grad_norm": 0.7123639179789877, "learning_rate": 3.7583130575831313e-06, "loss": 0.6456, "step": 11081 }, { "epoch": 0.32355259700446704, "grad_norm": 0.7525429928216335, "learning_rate": 3.758150851581509e-06, "loss": 0.6978, "step": 11082 }, { "epoch": 0.3235817932323144, "grad_norm": 0.7279014440791953, "learning_rate": 3.757988645579887e-06, "loss": 0.6315, "step": 11083 }, { "epoch": 0.32361098946016176, "grad_norm": 0.7188470177121207, "learning_rate": 3.757826439578265e-06, "loss": 0.6344, "step": 11084 }, { "epoch": 0.3236401856880091, "grad_norm": 0.8325565651085349, "learning_rate": 3.757664233576643e-06, "loss": 0.6423, "step": 11085 }, { "epoch": 0.3236693819158565, "grad_norm": 0.7069229533223188, "learning_rate": 3.757502027575021e-06, "loss": 0.5963, "step": 11086 }, { "epoch": 0.32369857814370384, "grad_norm": 0.7156751961640434, "learning_rate": 3.7573398215733985e-06, "loss": 0.6701, "step": 11087 }, { "epoch": 0.3237277743715512, "grad_norm": 0.7515894458585831, "learning_rate": 3.7571776155717765e-06, "loss": 0.6977, "step": 11088 }, { "epoch": 0.32375697059939856, "grad_norm": 0.7536083623686749, "learning_rate": 3.7570154095701545e-06, "loss": 0.67, "step": 11089 }, { "epoch": 0.3237861668272459, "grad_norm": 0.706088708769243, "learning_rate": 3.7568532035685325e-06, "loss": 0.647, "step": 11090 }, { "epoch": 0.3238153630550933, "grad_norm": 0.7431579412343491, "learning_rate": 3.75669099756691e-06, "loss": 0.655, "step": 11091 }, { "epoch": 0.32384455928294065, "grad_norm": 0.7147401480202212, "learning_rate": 3.756528791565288e-06, "loss": 0.6246, "step": 11092 }, { "epoch": 0.323873755510788, "grad_norm": 0.7610191944133887, "learning_rate": 3.756366585563666e-06, "loss": 0.6222, "step": 11093 }, { "epoch": 0.32390295173863537, "grad_norm": 0.6396234732770187, "learning_rate": 3.756204379562044e-06, "loss": 0.5074, "step": 11094 }, { "epoch": 0.32393214796648273, "grad_norm": 0.7150351657857689, "learning_rate": 3.756042173560422e-06, "loss": 0.626, "step": 11095 }, { "epoch": 0.3239613441943301, "grad_norm": 0.727724843085499, "learning_rate": 3.7558799675587997e-06, "loss": 0.6798, "step": 11096 }, { "epoch": 0.32399054042217745, "grad_norm": 0.7773384449302037, "learning_rate": 3.7557177615571777e-06, "loss": 0.7318, "step": 11097 }, { "epoch": 0.3240197366500248, "grad_norm": 0.7624969198840779, "learning_rate": 3.7555555555555557e-06, "loss": 0.7012, "step": 11098 }, { "epoch": 0.3240489328778722, "grad_norm": 0.7133439992575411, "learning_rate": 3.7553933495539337e-06, "loss": 0.6736, "step": 11099 }, { "epoch": 0.32407812910571954, "grad_norm": 0.7057987590994567, "learning_rate": 3.755231143552312e-06, "loss": 0.6243, "step": 11100 }, { "epoch": 0.3241073253335669, "grad_norm": 0.7705458568907935, "learning_rate": 3.7550689375506897e-06, "loss": 0.6818, "step": 11101 }, { "epoch": 0.32413652156141426, "grad_norm": 0.659070444881355, "learning_rate": 3.7549067315490677e-06, "loss": 0.552, "step": 11102 }, { "epoch": 0.3241657177892616, "grad_norm": 0.8450142668317844, "learning_rate": 3.7547445255474457e-06, "loss": 0.7803, "step": 11103 }, { "epoch": 0.324194914017109, "grad_norm": 0.812910886896417, "learning_rate": 3.7545823195458237e-06, "loss": 0.7937, "step": 11104 }, { "epoch": 0.32422411024495634, "grad_norm": 0.7109697211833043, "learning_rate": 3.7544201135442017e-06, "loss": 0.6133, "step": 11105 }, { "epoch": 0.3242533064728037, "grad_norm": 0.7527008742306606, "learning_rate": 3.7542579075425793e-06, "loss": 0.6862, "step": 11106 }, { "epoch": 0.32428250270065107, "grad_norm": 0.6617928208190859, "learning_rate": 3.7540957015409573e-06, "loss": 0.5352, "step": 11107 }, { "epoch": 0.3243116989284984, "grad_norm": 0.7445847310691982, "learning_rate": 3.7539334955393353e-06, "loss": 0.6382, "step": 11108 }, { "epoch": 0.3243408951563458, "grad_norm": 0.8039397190353772, "learning_rate": 3.7537712895377133e-06, "loss": 0.7064, "step": 11109 }, { "epoch": 0.32437009138419315, "grad_norm": 0.7180099889805097, "learning_rate": 3.753609083536091e-06, "loss": 0.6479, "step": 11110 }, { "epoch": 0.3243992876120405, "grad_norm": 0.6777852723909024, "learning_rate": 3.753446877534469e-06, "loss": 0.5574, "step": 11111 }, { "epoch": 0.32442848383988787, "grad_norm": 0.8492804164534051, "learning_rate": 3.753284671532847e-06, "loss": 0.7359, "step": 11112 }, { "epoch": 0.32445768006773523, "grad_norm": 0.7768215447574338, "learning_rate": 3.753122465531225e-06, "loss": 0.7077, "step": 11113 }, { "epoch": 0.3244868762955826, "grad_norm": 0.8127186798786694, "learning_rate": 3.752960259529603e-06, "loss": 0.6845, "step": 11114 }, { "epoch": 0.32451607252342995, "grad_norm": 0.7146598117513285, "learning_rate": 3.7527980535279805e-06, "loss": 0.6032, "step": 11115 }, { "epoch": 0.3245452687512773, "grad_norm": 0.8233387341207699, "learning_rate": 3.7526358475263585e-06, "loss": 0.704, "step": 11116 }, { "epoch": 0.3245744649791247, "grad_norm": 0.7096835385566117, "learning_rate": 3.7524736415247366e-06, "loss": 0.6396, "step": 11117 }, { "epoch": 0.32460366120697204, "grad_norm": 0.6971948035785126, "learning_rate": 3.7523114355231146e-06, "loss": 0.5984, "step": 11118 }, { "epoch": 0.3246328574348194, "grad_norm": 0.7437920791548359, "learning_rate": 3.752149229521493e-06, "loss": 0.6355, "step": 11119 }, { "epoch": 0.32466205366266676, "grad_norm": 0.7107065457097989, "learning_rate": 3.7519870235198706e-06, "loss": 0.6181, "step": 11120 }, { "epoch": 0.3246912498905141, "grad_norm": 0.710479577326643, "learning_rate": 3.7518248175182486e-06, "loss": 0.6274, "step": 11121 }, { "epoch": 0.3247204461183615, "grad_norm": 0.7484737115531028, "learning_rate": 3.7516626115166266e-06, "loss": 0.7245, "step": 11122 }, { "epoch": 0.32474964234620884, "grad_norm": 0.7889115697338998, "learning_rate": 3.7515004055150046e-06, "loss": 0.7439, "step": 11123 }, { "epoch": 0.3247788385740562, "grad_norm": 0.873964608183783, "learning_rate": 3.7513381995133826e-06, "loss": 0.7051, "step": 11124 }, { "epoch": 0.32480803480190357, "grad_norm": 0.7929450575288564, "learning_rate": 3.75117599351176e-06, "loss": 0.6813, "step": 11125 }, { "epoch": 0.324837231029751, "grad_norm": 0.701019292103698, "learning_rate": 3.751013787510138e-06, "loss": 0.5909, "step": 11126 }, { "epoch": 0.32486642725759834, "grad_norm": 0.7428620054219032, "learning_rate": 3.750851581508516e-06, "loss": 0.6691, "step": 11127 }, { "epoch": 0.3248956234854457, "grad_norm": 0.7693454363949122, "learning_rate": 3.750689375506894e-06, "loss": 0.6832, "step": 11128 }, { "epoch": 0.32492481971329307, "grad_norm": 0.7377226297395185, "learning_rate": 3.7505271695052718e-06, "loss": 0.7225, "step": 11129 }, { "epoch": 0.32495401594114043, "grad_norm": 0.7067512941610454, "learning_rate": 3.75036496350365e-06, "loss": 0.6444, "step": 11130 }, { "epoch": 0.3249832121689878, "grad_norm": 0.7977472987496791, "learning_rate": 3.750202757502028e-06, "loss": 0.7197, "step": 11131 }, { "epoch": 0.32501240839683515, "grad_norm": 0.7669616598253663, "learning_rate": 3.750040551500406e-06, "loss": 0.7195, "step": 11132 }, { "epoch": 0.3250416046246825, "grad_norm": 0.7150770956756314, "learning_rate": 3.749878345498784e-06, "loss": 0.5922, "step": 11133 }, { "epoch": 0.3250708008525299, "grad_norm": 0.7027939283841359, "learning_rate": 3.7497161394971614e-06, "loss": 0.6123, "step": 11134 }, { "epoch": 0.32509999708037723, "grad_norm": 1.7103947610275057, "learning_rate": 3.7495539334955394e-06, "loss": 0.7382, "step": 11135 }, { "epoch": 0.3251291933082246, "grad_norm": 0.6861262495153987, "learning_rate": 3.7493917274939174e-06, "loss": 0.5894, "step": 11136 }, { "epoch": 0.32515838953607196, "grad_norm": 0.8060129678048132, "learning_rate": 3.749229521492296e-06, "loss": 0.7005, "step": 11137 }, { "epoch": 0.3251875857639193, "grad_norm": 0.7451949950662266, "learning_rate": 3.749067315490674e-06, "loss": 0.6091, "step": 11138 }, { "epoch": 0.3252167819917667, "grad_norm": 0.724748288005142, "learning_rate": 3.7489051094890514e-06, "loss": 0.6093, "step": 11139 }, { "epoch": 0.32524597821961404, "grad_norm": 0.7112326588330435, "learning_rate": 3.7487429034874294e-06, "loss": 0.6371, "step": 11140 }, { "epoch": 0.3252751744474614, "grad_norm": 0.7486301711126586, "learning_rate": 3.7485806974858074e-06, "loss": 0.6702, "step": 11141 }, { "epoch": 0.32530437067530876, "grad_norm": 0.730958758865378, "learning_rate": 3.7484184914841854e-06, "loss": 0.6496, "step": 11142 }, { "epoch": 0.3253335669031561, "grad_norm": 0.6594338071908644, "learning_rate": 3.7482562854825635e-06, "loss": 0.5665, "step": 11143 }, { "epoch": 0.3253627631310035, "grad_norm": 0.7504110828792169, "learning_rate": 3.748094079480941e-06, "loss": 0.6239, "step": 11144 }, { "epoch": 0.32539195935885085, "grad_norm": 0.715432189256134, "learning_rate": 3.747931873479319e-06, "loss": 0.6069, "step": 11145 }, { "epoch": 0.3254211555866982, "grad_norm": 0.7250351674296771, "learning_rate": 3.747769667477697e-06, "loss": 0.6481, "step": 11146 }, { "epoch": 0.32545035181454557, "grad_norm": 0.9146208521488106, "learning_rate": 3.747607461476075e-06, "loss": 0.6826, "step": 11147 }, { "epoch": 0.32547954804239293, "grad_norm": 0.7671124391037784, "learning_rate": 3.7474452554744526e-06, "loss": 0.6707, "step": 11148 }, { "epoch": 0.3255087442702403, "grad_norm": 0.7470927926985802, "learning_rate": 3.7472830494728307e-06, "loss": 0.643, "step": 11149 }, { "epoch": 0.32553794049808765, "grad_norm": 0.7952477996070225, "learning_rate": 3.7471208434712087e-06, "loss": 0.7768, "step": 11150 }, { "epoch": 0.325567136725935, "grad_norm": 0.750024906804192, "learning_rate": 3.7469586374695867e-06, "loss": 0.6306, "step": 11151 }, { "epoch": 0.3255963329537824, "grad_norm": 0.7020272669895015, "learning_rate": 3.7467964314679642e-06, "loss": 0.6141, "step": 11152 }, { "epoch": 0.32562552918162974, "grad_norm": 0.7521466700937124, "learning_rate": 3.7466342254663423e-06, "loss": 0.7019, "step": 11153 }, { "epoch": 0.3256547254094771, "grad_norm": 0.702869260452555, "learning_rate": 3.7464720194647203e-06, "loss": 0.6063, "step": 11154 }, { "epoch": 0.32568392163732446, "grad_norm": 0.7645023372152647, "learning_rate": 3.7463098134630983e-06, "loss": 0.6766, "step": 11155 }, { "epoch": 0.3257131178651718, "grad_norm": 0.7096531643001417, "learning_rate": 3.7461476074614767e-06, "loss": 0.6308, "step": 11156 }, { "epoch": 0.3257423140930192, "grad_norm": 0.7860441437383875, "learning_rate": 3.7459854014598547e-06, "loss": 0.6983, "step": 11157 }, { "epoch": 0.32577151032086654, "grad_norm": 1.0132212077200795, "learning_rate": 3.7458231954582323e-06, "loss": 0.747, "step": 11158 }, { "epoch": 0.3258007065487139, "grad_norm": 0.7340542537514329, "learning_rate": 3.7456609894566103e-06, "loss": 0.6522, "step": 11159 }, { "epoch": 0.32582990277656126, "grad_norm": 0.7919438808198519, "learning_rate": 3.7454987834549883e-06, "loss": 0.7072, "step": 11160 }, { "epoch": 0.3258590990044086, "grad_norm": 0.7623063470876568, "learning_rate": 3.7453365774533663e-06, "loss": 0.6354, "step": 11161 }, { "epoch": 0.325888295232256, "grad_norm": 0.6769386632448885, "learning_rate": 3.7451743714517443e-06, "loss": 0.6086, "step": 11162 }, { "epoch": 0.32591749146010335, "grad_norm": 0.7880226111211796, "learning_rate": 3.745012165450122e-06, "loss": 0.704, "step": 11163 }, { "epoch": 0.3259466876879507, "grad_norm": 0.7176030999085957, "learning_rate": 3.7448499594485e-06, "loss": 0.614, "step": 11164 }, { "epoch": 0.32597588391579807, "grad_norm": 0.7209845807927071, "learning_rate": 3.744687753446878e-06, "loss": 0.619, "step": 11165 }, { "epoch": 0.32600508014364543, "grad_norm": 0.7249006290833814, "learning_rate": 3.744525547445256e-06, "loss": 0.6034, "step": 11166 }, { "epoch": 0.3260342763714928, "grad_norm": 0.7037527908994063, "learning_rate": 3.7443633414436335e-06, "loss": 0.6587, "step": 11167 }, { "epoch": 0.32606347259934015, "grad_norm": 0.7051089266264493, "learning_rate": 3.7442011354420115e-06, "loss": 0.6026, "step": 11168 }, { "epoch": 0.3260926688271875, "grad_norm": 0.7880189083004225, "learning_rate": 3.7440389294403895e-06, "loss": 0.7739, "step": 11169 }, { "epoch": 0.3261218650550349, "grad_norm": 0.7650807270945356, "learning_rate": 3.7438767234387675e-06, "loss": 0.673, "step": 11170 }, { "epoch": 0.32615106128288224, "grad_norm": 0.9079097360432867, "learning_rate": 3.743714517437145e-06, "loss": 0.7732, "step": 11171 }, { "epoch": 0.3261802575107296, "grad_norm": 0.704091352511578, "learning_rate": 3.743552311435523e-06, "loss": 0.6115, "step": 11172 }, { "epoch": 0.32620945373857696, "grad_norm": 0.8013367871354813, "learning_rate": 3.743390105433901e-06, "loss": 0.606, "step": 11173 }, { "epoch": 0.3262386499664243, "grad_norm": 0.6694571610324306, "learning_rate": 3.743227899432279e-06, "loss": 0.5785, "step": 11174 }, { "epoch": 0.3262678461942717, "grad_norm": 0.7370069424089488, "learning_rate": 3.7430656934306576e-06, "loss": 0.6167, "step": 11175 }, { "epoch": 0.32629704242211904, "grad_norm": 0.7901863694445314, "learning_rate": 3.7429034874290356e-06, "loss": 0.6834, "step": 11176 }, { "epoch": 0.3263262386499664, "grad_norm": 0.7104956144528838, "learning_rate": 3.742741281427413e-06, "loss": 0.6513, "step": 11177 }, { "epoch": 0.32635543487781377, "grad_norm": 0.7366664236861085, "learning_rate": 3.742579075425791e-06, "loss": 0.6414, "step": 11178 }, { "epoch": 0.3263846311056611, "grad_norm": 0.7349501009040028, "learning_rate": 3.742416869424169e-06, "loss": 0.6434, "step": 11179 }, { "epoch": 0.3264138273335085, "grad_norm": 0.7343272856280574, "learning_rate": 3.742254663422547e-06, "loss": 0.6627, "step": 11180 }, { "epoch": 0.32644302356135585, "grad_norm": 0.773856118089278, "learning_rate": 3.742092457420925e-06, "loss": 0.6692, "step": 11181 }, { "epoch": 0.3264722197892032, "grad_norm": 0.8141531785514181, "learning_rate": 3.7419302514193028e-06, "loss": 0.7456, "step": 11182 }, { "epoch": 0.32650141601705057, "grad_norm": 0.7315034443658095, "learning_rate": 3.7417680454176808e-06, "loss": 0.6865, "step": 11183 }, { "epoch": 0.32653061224489793, "grad_norm": 0.7097123161462091, "learning_rate": 3.7416058394160588e-06, "loss": 0.6522, "step": 11184 }, { "epoch": 0.3265598084727453, "grad_norm": 0.7331015584559639, "learning_rate": 3.7414436334144368e-06, "loss": 0.6641, "step": 11185 }, { "epoch": 0.3265890047005927, "grad_norm": 0.7462785376783869, "learning_rate": 3.7412814274128144e-06, "loss": 0.7177, "step": 11186 }, { "epoch": 0.32661820092844007, "grad_norm": 0.679128770333732, "learning_rate": 3.7411192214111924e-06, "loss": 0.5949, "step": 11187 }, { "epoch": 0.32664739715628743, "grad_norm": 0.7788348504013247, "learning_rate": 3.7409570154095704e-06, "loss": 0.6574, "step": 11188 }, { "epoch": 0.3266765933841348, "grad_norm": 0.7370463470995476, "learning_rate": 3.7407948094079484e-06, "loss": 0.6582, "step": 11189 }, { "epoch": 0.32670578961198216, "grad_norm": 0.7438680842065291, "learning_rate": 3.740632603406326e-06, "loss": 0.6156, "step": 11190 }, { "epoch": 0.3267349858398295, "grad_norm": 0.7990077124086268, "learning_rate": 3.740470397404704e-06, "loss": 0.6624, "step": 11191 }, { "epoch": 0.3267641820676769, "grad_norm": 0.6975221816893171, "learning_rate": 3.740308191403082e-06, "loss": 0.6229, "step": 11192 }, { "epoch": 0.32679337829552424, "grad_norm": 0.7316221712533927, "learning_rate": 3.74014598540146e-06, "loss": 0.6656, "step": 11193 }, { "epoch": 0.3268225745233716, "grad_norm": 0.7432935323698411, "learning_rate": 3.7399837793998384e-06, "loss": 0.6993, "step": 11194 }, { "epoch": 0.32685177075121896, "grad_norm": 0.7730982800298967, "learning_rate": 3.7398215733982164e-06, "loss": 0.6662, "step": 11195 }, { "epoch": 0.3268809669790663, "grad_norm": 0.7254616416666951, "learning_rate": 3.739659367396594e-06, "loss": 0.6749, "step": 11196 }, { "epoch": 0.3269101632069137, "grad_norm": 0.7159695715519467, "learning_rate": 3.739497161394972e-06, "loss": 0.6356, "step": 11197 }, { "epoch": 0.32693935943476105, "grad_norm": 0.7281906131407856, "learning_rate": 3.73933495539335e-06, "loss": 0.6052, "step": 11198 }, { "epoch": 0.3269685556626084, "grad_norm": 0.6771326078040477, "learning_rate": 3.739172749391728e-06, "loss": 0.5957, "step": 11199 }, { "epoch": 0.32699775189045577, "grad_norm": 0.7390112575056645, "learning_rate": 3.739010543390106e-06, "loss": 0.6131, "step": 11200 }, { "epoch": 0.32702694811830313, "grad_norm": 0.725626543200961, "learning_rate": 3.7388483373884836e-06, "loss": 0.6323, "step": 11201 }, { "epoch": 0.3270561443461505, "grad_norm": 0.7004013521641878, "learning_rate": 3.7386861313868616e-06, "loss": 0.6498, "step": 11202 }, { "epoch": 0.32708534057399785, "grad_norm": 0.7526350090838166, "learning_rate": 3.7385239253852396e-06, "loss": 0.6885, "step": 11203 }, { "epoch": 0.3271145368018452, "grad_norm": 0.767539962373863, "learning_rate": 3.7383617193836176e-06, "loss": 0.6248, "step": 11204 }, { "epoch": 0.3271437330296926, "grad_norm": 0.765490425026785, "learning_rate": 3.7381995133819952e-06, "loss": 0.6948, "step": 11205 }, { "epoch": 0.32717292925753994, "grad_norm": 0.7379709196963689, "learning_rate": 3.7380373073803732e-06, "loss": 0.7211, "step": 11206 }, { "epoch": 0.3272021254853873, "grad_norm": 0.6986436970594937, "learning_rate": 3.7378751013787512e-06, "loss": 0.6232, "step": 11207 }, { "epoch": 0.32723132171323466, "grad_norm": 0.7074679129825607, "learning_rate": 3.7377128953771292e-06, "loss": 0.5707, "step": 11208 }, { "epoch": 0.327260517941082, "grad_norm": 0.798227599368849, "learning_rate": 3.737550689375507e-06, "loss": 0.6606, "step": 11209 }, { "epoch": 0.3272897141689294, "grad_norm": 0.6981199989293394, "learning_rate": 3.737388483373885e-06, "loss": 0.6532, "step": 11210 }, { "epoch": 0.32731891039677674, "grad_norm": 0.7604132029648765, "learning_rate": 3.737226277372263e-06, "loss": 0.7134, "step": 11211 }, { "epoch": 0.3273481066246241, "grad_norm": 0.7218554082685507, "learning_rate": 3.737064071370641e-06, "loss": 0.6441, "step": 11212 }, { "epoch": 0.32737730285247146, "grad_norm": 0.7763464861487942, "learning_rate": 3.7369018653690193e-06, "loss": 0.734, "step": 11213 }, { "epoch": 0.3274064990803188, "grad_norm": 0.7544681652336397, "learning_rate": 3.7367396593673973e-06, "loss": 0.6298, "step": 11214 }, { "epoch": 0.3274356953081662, "grad_norm": 0.6879643601218635, "learning_rate": 3.736577453365775e-06, "loss": 0.6057, "step": 11215 }, { "epoch": 0.32746489153601355, "grad_norm": 0.8994742301826852, "learning_rate": 3.736415247364153e-06, "loss": 0.7047, "step": 11216 }, { "epoch": 0.3274940877638609, "grad_norm": 0.7186491106370522, "learning_rate": 3.736253041362531e-06, "loss": 0.6895, "step": 11217 }, { "epoch": 0.32752328399170827, "grad_norm": 0.6915766477591765, "learning_rate": 3.736090835360909e-06, "loss": 0.6136, "step": 11218 }, { "epoch": 0.32755248021955563, "grad_norm": 0.7559077303018212, "learning_rate": 3.735928629359287e-06, "loss": 0.7015, "step": 11219 }, { "epoch": 0.327581676447403, "grad_norm": 0.7351523652300421, "learning_rate": 3.7357664233576645e-06, "loss": 0.6659, "step": 11220 }, { "epoch": 0.32761087267525035, "grad_norm": 0.7417794631795379, "learning_rate": 3.7356042173560425e-06, "loss": 0.6829, "step": 11221 }, { "epoch": 0.3276400689030977, "grad_norm": 0.8143026014085653, "learning_rate": 3.7354420113544205e-06, "loss": 0.7631, "step": 11222 }, { "epoch": 0.3276692651309451, "grad_norm": 0.7364183445437601, "learning_rate": 3.7352798053527985e-06, "loss": 0.6648, "step": 11223 }, { "epoch": 0.32769846135879244, "grad_norm": 0.787296693704953, "learning_rate": 3.735117599351176e-06, "loss": 0.7504, "step": 11224 }, { "epoch": 0.3277276575866398, "grad_norm": 0.6957789043141468, "learning_rate": 3.734955393349554e-06, "loss": 0.5877, "step": 11225 }, { "epoch": 0.32775685381448716, "grad_norm": 0.7852902804899076, "learning_rate": 3.734793187347932e-06, "loss": 0.6849, "step": 11226 }, { "epoch": 0.3277860500423345, "grad_norm": 0.7203616029684863, "learning_rate": 3.73463098134631e-06, "loss": 0.6712, "step": 11227 }, { "epoch": 0.3278152462701819, "grad_norm": 0.7955435808584135, "learning_rate": 3.7344687753446877e-06, "loss": 0.6897, "step": 11228 }, { "epoch": 0.32784444249802924, "grad_norm": 0.7420136308008741, "learning_rate": 3.7343065693430657e-06, "loss": 0.6868, "step": 11229 }, { "epoch": 0.3278736387258766, "grad_norm": 0.6616076819141137, "learning_rate": 3.7341443633414437e-06, "loss": 0.5445, "step": 11230 }, { "epoch": 0.32790283495372397, "grad_norm": 34.65077171777883, "learning_rate": 3.7339821573398217e-06, "loss": 1.0238, "step": 11231 }, { "epoch": 0.3279320311815713, "grad_norm": 0.7314271444584872, "learning_rate": 3.7338199513382e-06, "loss": 0.6224, "step": 11232 }, { "epoch": 0.3279612274094187, "grad_norm": 0.7204084388272939, "learning_rate": 3.733657745336578e-06, "loss": 0.6718, "step": 11233 }, { "epoch": 0.32799042363726605, "grad_norm": 0.7478598492158386, "learning_rate": 3.7334955393349557e-06, "loss": 0.5838, "step": 11234 }, { "epoch": 0.3280196198651134, "grad_norm": 0.7366099843055456, "learning_rate": 3.7333333333333337e-06, "loss": 0.6358, "step": 11235 }, { "epoch": 0.32804881609296077, "grad_norm": 0.7418215593019345, "learning_rate": 3.7331711273317117e-06, "loss": 0.6772, "step": 11236 }, { "epoch": 0.32807801232080813, "grad_norm": 0.73323187709745, "learning_rate": 3.7330089213300897e-06, "loss": 0.6326, "step": 11237 }, { "epoch": 0.3281072085486555, "grad_norm": 0.7244251543262595, "learning_rate": 3.7328467153284677e-06, "loss": 0.6639, "step": 11238 }, { "epoch": 0.32813640477650285, "grad_norm": 0.7435099835748392, "learning_rate": 3.7326845093268453e-06, "loss": 0.6598, "step": 11239 }, { "epoch": 0.3281656010043502, "grad_norm": 0.8968138508494693, "learning_rate": 3.7325223033252233e-06, "loss": 0.8292, "step": 11240 }, { "epoch": 0.3281947972321976, "grad_norm": 0.7548416110623205, "learning_rate": 3.7323600973236013e-06, "loss": 0.6764, "step": 11241 }, { "epoch": 0.32822399346004494, "grad_norm": 0.7055845123419798, "learning_rate": 3.7321978913219794e-06, "loss": 0.5862, "step": 11242 }, { "epoch": 0.3282531896878923, "grad_norm": 0.7638888544562105, "learning_rate": 3.732035685320357e-06, "loss": 0.742, "step": 11243 }, { "epoch": 0.32828238591573966, "grad_norm": 0.7330730050051121, "learning_rate": 3.731873479318735e-06, "loss": 0.6616, "step": 11244 }, { "epoch": 0.328311582143587, "grad_norm": 0.8215856478159458, "learning_rate": 3.731711273317113e-06, "loss": 0.7068, "step": 11245 }, { "epoch": 0.3283407783714344, "grad_norm": 0.7445695443230174, "learning_rate": 3.731549067315491e-06, "loss": 0.6947, "step": 11246 }, { "epoch": 0.3283699745992818, "grad_norm": 0.7081154090111507, "learning_rate": 3.7313868613138685e-06, "loss": 0.6035, "step": 11247 }, { "epoch": 0.32839917082712916, "grad_norm": 0.7067640040720276, "learning_rate": 3.7312246553122465e-06, "loss": 0.5827, "step": 11248 }, { "epoch": 0.3284283670549765, "grad_norm": 0.6621758298191761, "learning_rate": 3.7310624493106246e-06, "loss": 0.5719, "step": 11249 }, { "epoch": 0.3284575632828239, "grad_norm": 0.7046819991776797, "learning_rate": 3.7309002433090026e-06, "loss": 0.5905, "step": 11250 }, { "epoch": 0.32848675951067124, "grad_norm": 0.7240412936088981, "learning_rate": 3.730738037307381e-06, "loss": 0.6537, "step": 11251 }, { "epoch": 0.3285159557385186, "grad_norm": 0.7444336871928812, "learning_rate": 3.730575831305759e-06, "loss": 0.6871, "step": 11252 }, { "epoch": 0.32854515196636597, "grad_norm": 0.6846544054674462, "learning_rate": 3.7304136253041366e-06, "loss": 0.6274, "step": 11253 }, { "epoch": 0.32857434819421333, "grad_norm": 0.7600479718648934, "learning_rate": 3.7302514193025146e-06, "loss": 0.702, "step": 11254 }, { "epoch": 0.3286035444220607, "grad_norm": 0.7484017361314359, "learning_rate": 3.7300892133008926e-06, "loss": 0.6743, "step": 11255 }, { "epoch": 0.32863274064990805, "grad_norm": 0.770261037551525, "learning_rate": 3.7299270072992706e-06, "loss": 0.6829, "step": 11256 }, { "epoch": 0.3286619368777554, "grad_norm": 0.6880730584307932, "learning_rate": 3.7297648012976486e-06, "loss": 0.6371, "step": 11257 }, { "epoch": 0.3286911331056028, "grad_norm": 0.9049434524553036, "learning_rate": 3.729602595296026e-06, "loss": 0.7451, "step": 11258 }, { "epoch": 0.32872032933345013, "grad_norm": 0.7036889610886614, "learning_rate": 3.729440389294404e-06, "loss": 0.6221, "step": 11259 }, { "epoch": 0.3287495255612975, "grad_norm": 0.661966464291962, "learning_rate": 3.729278183292782e-06, "loss": 0.5463, "step": 11260 }, { "epoch": 0.32877872178914486, "grad_norm": 0.7602323505357681, "learning_rate": 3.7291159772911602e-06, "loss": 0.6661, "step": 11261 }, { "epoch": 0.3288079180169922, "grad_norm": 0.8212070865156728, "learning_rate": 3.728953771289538e-06, "loss": 0.6328, "step": 11262 }, { "epoch": 0.3288371142448396, "grad_norm": 0.6797699100692842, "learning_rate": 3.728791565287916e-06, "loss": 0.5985, "step": 11263 }, { "epoch": 0.32886631047268694, "grad_norm": 0.7764313721235495, "learning_rate": 3.728629359286294e-06, "loss": 0.6977, "step": 11264 }, { "epoch": 0.3288955067005343, "grad_norm": 0.7684645372288207, "learning_rate": 3.728467153284672e-06, "loss": 0.7191, "step": 11265 }, { "epoch": 0.32892470292838166, "grad_norm": 0.7711211336033588, "learning_rate": 3.7283049472830494e-06, "loss": 0.6242, "step": 11266 }, { "epoch": 0.328953899156229, "grad_norm": 0.7601037837934594, "learning_rate": 3.7281427412814274e-06, "loss": 0.6121, "step": 11267 }, { "epoch": 0.3289830953840764, "grad_norm": 0.7275666441065175, "learning_rate": 3.7279805352798054e-06, "loss": 0.6593, "step": 11268 }, { "epoch": 0.32901229161192375, "grad_norm": 0.7920109583709186, "learning_rate": 3.7278183292781834e-06, "loss": 0.7842, "step": 11269 }, { "epoch": 0.3290414878397711, "grad_norm": 0.7786303577418289, "learning_rate": 3.727656123276562e-06, "loss": 0.6735, "step": 11270 }, { "epoch": 0.32907068406761847, "grad_norm": 0.7380035609991142, "learning_rate": 3.72749391727494e-06, "loss": 0.6658, "step": 11271 }, { "epoch": 0.32909988029546583, "grad_norm": 0.8268076173295389, "learning_rate": 3.7273317112733174e-06, "loss": 0.8046, "step": 11272 }, { "epoch": 0.3291290765233132, "grad_norm": 0.7683925945746352, "learning_rate": 3.7271695052716954e-06, "loss": 0.6618, "step": 11273 }, { "epoch": 0.32915827275116055, "grad_norm": 0.7182941559922386, "learning_rate": 3.7270072992700735e-06, "loss": 0.6464, "step": 11274 }, { "epoch": 0.3291874689790079, "grad_norm": 0.6975028401117004, "learning_rate": 3.7268450932684515e-06, "loss": 0.6617, "step": 11275 }, { "epoch": 0.3292166652068553, "grad_norm": 0.7780150890316689, "learning_rate": 3.726682887266829e-06, "loss": 0.7622, "step": 11276 }, { "epoch": 0.32924586143470264, "grad_norm": 0.6981439179679519, "learning_rate": 3.726520681265207e-06, "loss": 0.6314, "step": 11277 }, { "epoch": 0.32927505766255, "grad_norm": 0.7202733554241172, "learning_rate": 3.726358475263585e-06, "loss": 0.5855, "step": 11278 }, { "epoch": 0.32930425389039736, "grad_norm": 0.7555041411605685, "learning_rate": 3.726196269261963e-06, "loss": 0.7076, "step": 11279 }, { "epoch": 0.3293334501182447, "grad_norm": 0.7396362713486525, "learning_rate": 3.726034063260341e-06, "loss": 0.6642, "step": 11280 }, { "epoch": 0.3293626463460921, "grad_norm": 0.7433161651020758, "learning_rate": 3.7258718572587187e-06, "loss": 0.6121, "step": 11281 }, { "epoch": 0.32939184257393944, "grad_norm": 0.7874970973522941, "learning_rate": 3.7257096512570967e-06, "loss": 0.7491, "step": 11282 }, { "epoch": 0.3294210388017868, "grad_norm": 0.6858066623064972, "learning_rate": 3.7255474452554747e-06, "loss": 0.6094, "step": 11283 }, { "epoch": 0.32945023502963416, "grad_norm": 0.9370019281937131, "learning_rate": 3.7253852392538527e-06, "loss": 0.7952, "step": 11284 }, { "epoch": 0.3294794312574815, "grad_norm": 0.772435760098137, "learning_rate": 3.7252230332522303e-06, "loss": 0.6419, "step": 11285 }, { "epoch": 0.3295086274853289, "grad_norm": 0.7556446177494343, "learning_rate": 3.7250608272506083e-06, "loss": 0.6703, "step": 11286 }, { "epoch": 0.32953782371317625, "grad_norm": 0.7368558328128373, "learning_rate": 3.7248986212489863e-06, "loss": 0.6166, "step": 11287 }, { "epoch": 0.3295670199410236, "grad_norm": 0.7157850420043336, "learning_rate": 3.7247364152473647e-06, "loss": 0.6535, "step": 11288 }, { "epoch": 0.32959621616887097, "grad_norm": 0.8466649206439502, "learning_rate": 3.7245742092457427e-06, "loss": 0.6848, "step": 11289 }, { "epoch": 0.32962541239671833, "grad_norm": 0.7395112227228506, "learning_rate": 3.7244120032441207e-06, "loss": 0.6944, "step": 11290 }, { "epoch": 0.3296546086245657, "grad_norm": 0.7235255733819685, "learning_rate": 3.7242497972424983e-06, "loss": 0.6167, "step": 11291 }, { "epoch": 0.32968380485241305, "grad_norm": 0.7501988403166676, "learning_rate": 3.7240875912408763e-06, "loss": 0.6803, "step": 11292 }, { "epoch": 0.3297130010802604, "grad_norm": 0.7998295915024122, "learning_rate": 3.7239253852392543e-06, "loss": 0.7704, "step": 11293 }, { "epoch": 0.3297421973081078, "grad_norm": 0.7482130340104183, "learning_rate": 3.7237631792376323e-06, "loss": 0.7001, "step": 11294 }, { "epoch": 0.32977139353595514, "grad_norm": 0.7590223068929932, "learning_rate": 3.72360097323601e-06, "loss": 0.6428, "step": 11295 }, { "epoch": 0.3298005897638025, "grad_norm": 0.7424707688006298, "learning_rate": 3.723438767234388e-06, "loss": 0.7084, "step": 11296 }, { "epoch": 0.32982978599164986, "grad_norm": 0.7192785857584055, "learning_rate": 3.723276561232766e-06, "loss": 0.6449, "step": 11297 }, { "epoch": 0.3298589822194972, "grad_norm": 0.7628399425196948, "learning_rate": 3.723114355231144e-06, "loss": 0.6971, "step": 11298 }, { "epoch": 0.3298881784473446, "grad_norm": 0.77641632638113, "learning_rate": 3.722952149229522e-06, "loss": 0.7318, "step": 11299 }, { "epoch": 0.32991737467519194, "grad_norm": 0.7293069778582049, "learning_rate": 3.7227899432278995e-06, "loss": 0.6336, "step": 11300 }, { "epoch": 0.3299465709030393, "grad_norm": 0.7424932647846664, "learning_rate": 3.7226277372262775e-06, "loss": 0.6597, "step": 11301 }, { "epoch": 0.32997576713088667, "grad_norm": 0.7413765385830261, "learning_rate": 3.7224655312246555e-06, "loss": 0.7311, "step": 11302 }, { "epoch": 0.330004963358734, "grad_norm": 0.7677299052832327, "learning_rate": 3.7223033252230335e-06, "loss": 0.6096, "step": 11303 }, { "epoch": 0.3300341595865814, "grad_norm": 0.7233949010333088, "learning_rate": 3.722141119221411e-06, "loss": 0.5804, "step": 11304 }, { "epoch": 0.33006335581442875, "grad_norm": 0.7517186578557136, "learning_rate": 3.721978913219789e-06, "loss": 0.6448, "step": 11305 }, { "epoch": 0.3300925520422761, "grad_norm": 0.735726954664657, "learning_rate": 3.721816707218167e-06, "loss": 0.5922, "step": 11306 }, { "epoch": 0.3301217482701235, "grad_norm": 0.8029551248840537, "learning_rate": 3.7216545012165456e-06, "loss": 0.7745, "step": 11307 }, { "epoch": 0.3301509444979709, "grad_norm": 0.7320038624408945, "learning_rate": 3.7214922952149236e-06, "loss": 0.6909, "step": 11308 }, { "epoch": 0.33018014072581825, "grad_norm": 0.7377209669920146, "learning_rate": 3.7213300892133016e-06, "loss": 0.669, "step": 11309 }, { "epoch": 0.3302093369536656, "grad_norm": 0.7793579286174845, "learning_rate": 3.721167883211679e-06, "loss": 0.7451, "step": 11310 }, { "epoch": 0.33023853318151297, "grad_norm": 0.8231610661114811, "learning_rate": 3.721005677210057e-06, "loss": 0.7839, "step": 11311 }, { "epoch": 0.33026772940936033, "grad_norm": 0.7070789168573042, "learning_rate": 3.720843471208435e-06, "loss": 0.6169, "step": 11312 }, { "epoch": 0.3302969256372077, "grad_norm": 0.7497798012689892, "learning_rate": 3.720681265206813e-06, "loss": 0.6508, "step": 11313 }, { "epoch": 0.33032612186505506, "grad_norm": 0.7343430583566353, "learning_rate": 3.7205190592051908e-06, "loss": 0.6898, "step": 11314 }, { "epoch": 0.3303553180929024, "grad_norm": 0.7620357847000573, "learning_rate": 3.7203568532035688e-06, "loss": 0.7059, "step": 11315 }, { "epoch": 0.3303845143207498, "grad_norm": 0.7737748823690015, "learning_rate": 3.7201946472019468e-06, "loss": 0.6315, "step": 11316 }, { "epoch": 0.33041371054859714, "grad_norm": 0.7736287956112841, "learning_rate": 3.7200324412003248e-06, "loss": 0.611, "step": 11317 }, { "epoch": 0.3304429067764445, "grad_norm": 0.7106246187154591, "learning_rate": 3.7198702351987028e-06, "loss": 0.5956, "step": 11318 }, { "epoch": 0.33047210300429186, "grad_norm": 0.8092716331306116, "learning_rate": 3.7197080291970804e-06, "loss": 0.7231, "step": 11319 }, { "epoch": 0.3305012992321392, "grad_norm": 0.9273396505445158, "learning_rate": 3.7195458231954584e-06, "loss": 0.6225, "step": 11320 }, { "epoch": 0.3305304954599866, "grad_norm": 0.7490820314597215, "learning_rate": 3.7193836171938364e-06, "loss": 0.7007, "step": 11321 }, { "epoch": 0.33055969168783395, "grad_norm": 0.7220116051366239, "learning_rate": 3.7192214111922144e-06, "loss": 0.6561, "step": 11322 }, { "epoch": 0.3305888879156813, "grad_norm": 0.753679600244924, "learning_rate": 3.719059205190592e-06, "loss": 0.719, "step": 11323 }, { "epoch": 0.33061808414352867, "grad_norm": 0.7158356654945566, "learning_rate": 3.71889699918897e-06, "loss": 0.5928, "step": 11324 }, { "epoch": 0.33064728037137603, "grad_norm": 0.7085540760514668, "learning_rate": 3.718734793187348e-06, "loss": 0.6338, "step": 11325 }, { "epoch": 0.3306764765992234, "grad_norm": 0.769319920298136, "learning_rate": 3.7185725871857264e-06, "loss": 0.6975, "step": 11326 }, { "epoch": 0.33070567282707075, "grad_norm": 0.7365414227727354, "learning_rate": 3.7184103811841044e-06, "loss": 0.6232, "step": 11327 }, { "epoch": 0.3307348690549181, "grad_norm": 0.6879886306414968, "learning_rate": 3.7182481751824824e-06, "loss": 0.5929, "step": 11328 }, { "epoch": 0.3307640652827655, "grad_norm": 0.743479906632006, "learning_rate": 3.71808596918086e-06, "loss": 0.6434, "step": 11329 }, { "epoch": 0.33079326151061283, "grad_norm": 0.7501738484611962, "learning_rate": 3.717923763179238e-06, "loss": 0.68, "step": 11330 }, { "epoch": 0.3308224577384602, "grad_norm": 0.7648520017903618, "learning_rate": 3.717761557177616e-06, "loss": 0.6814, "step": 11331 }, { "epoch": 0.33085165396630756, "grad_norm": 0.7141317689486749, "learning_rate": 3.717599351175994e-06, "loss": 0.6321, "step": 11332 }, { "epoch": 0.3308808501941549, "grad_norm": 0.7964719781886928, "learning_rate": 3.7174371451743716e-06, "loss": 0.6827, "step": 11333 }, { "epoch": 0.3309100464220023, "grad_norm": 0.7747731905806348, "learning_rate": 3.7172749391727496e-06, "loss": 0.7185, "step": 11334 }, { "epoch": 0.33093924264984964, "grad_norm": 0.7163121986068189, "learning_rate": 3.7171127331711276e-06, "loss": 0.6311, "step": 11335 }, { "epoch": 0.330968438877697, "grad_norm": 0.75327006175176, "learning_rate": 3.7169505271695056e-06, "loss": 0.6864, "step": 11336 }, { "epoch": 0.33099763510554436, "grad_norm": 0.7511653141119062, "learning_rate": 3.7167883211678836e-06, "loss": 0.6901, "step": 11337 }, { "epoch": 0.3310268313333917, "grad_norm": 0.728251596019764, "learning_rate": 3.7166261151662612e-06, "loss": 0.6895, "step": 11338 }, { "epoch": 0.3310560275612391, "grad_norm": 0.743835558599546, "learning_rate": 3.7164639091646392e-06, "loss": 0.6652, "step": 11339 }, { "epoch": 0.33108522378908645, "grad_norm": 0.6781106157746764, "learning_rate": 3.7163017031630172e-06, "loss": 0.5438, "step": 11340 }, { "epoch": 0.3311144200169338, "grad_norm": 0.751796222019206, "learning_rate": 3.7161394971613952e-06, "loss": 0.6685, "step": 11341 }, { "epoch": 0.33114361624478117, "grad_norm": 0.7479995839179996, "learning_rate": 3.715977291159773e-06, "loss": 0.6678, "step": 11342 }, { "epoch": 0.33117281247262853, "grad_norm": 0.6901194718302225, "learning_rate": 3.715815085158151e-06, "loss": 0.6561, "step": 11343 }, { "epoch": 0.3312020087004759, "grad_norm": 0.6859900218711956, "learning_rate": 3.715652879156529e-06, "loss": 0.6373, "step": 11344 }, { "epoch": 0.33123120492832325, "grad_norm": 0.7133392007719587, "learning_rate": 3.7154906731549073e-06, "loss": 0.6804, "step": 11345 }, { "epoch": 0.3312604011561706, "grad_norm": 0.732823554089753, "learning_rate": 3.7153284671532853e-06, "loss": 0.685, "step": 11346 }, { "epoch": 0.331289597384018, "grad_norm": 0.7494519180627107, "learning_rate": 3.7151662611516633e-06, "loss": 0.6589, "step": 11347 }, { "epoch": 0.33131879361186534, "grad_norm": 0.700341451036855, "learning_rate": 3.715004055150041e-06, "loss": 0.6495, "step": 11348 }, { "epoch": 0.3313479898397127, "grad_norm": 0.7273202943265248, "learning_rate": 3.714841849148419e-06, "loss": 0.6558, "step": 11349 }, { "epoch": 0.33137718606756006, "grad_norm": 0.7284063728749357, "learning_rate": 3.714679643146797e-06, "loss": 0.6561, "step": 11350 }, { "epoch": 0.3314063822954074, "grad_norm": 0.8002742883346544, "learning_rate": 3.714517437145175e-06, "loss": 0.7245, "step": 11351 }, { "epoch": 0.3314355785232548, "grad_norm": 0.7243520647203995, "learning_rate": 3.7143552311435525e-06, "loss": 0.66, "step": 11352 }, { "epoch": 0.33146477475110214, "grad_norm": 0.7397118966201869, "learning_rate": 3.7141930251419305e-06, "loss": 0.7065, "step": 11353 }, { "epoch": 0.3314939709789495, "grad_norm": 0.7055118999586734, "learning_rate": 3.7140308191403085e-06, "loss": 0.5884, "step": 11354 }, { "epoch": 0.33152316720679686, "grad_norm": 0.669447338929062, "learning_rate": 3.7138686131386865e-06, "loss": 0.5922, "step": 11355 }, { "epoch": 0.3315523634346442, "grad_norm": 0.7524869562348419, "learning_rate": 3.7137064071370645e-06, "loss": 0.7282, "step": 11356 }, { "epoch": 0.3315815596624916, "grad_norm": 0.7946712693968924, "learning_rate": 3.713544201135442e-06, "loss": 0.7242, "step": 11357 }, { "epoch": 0.33161075589033895, "grad_norm": 0.8083734753170025, "learning_rate": 3.71338199513382e-06, "loss": 0.7476, "step": 11358 }, { "epoch": 0.3316399521181863, "grad_norm": 0.7775740750259946, "learning_rate": 3.713219789132198e-06, "loss": 0.763, "step": 11359 }, { "epoch": 0.33166914834603367, "grad_norm": 0.7412598305673124, "learning_rate": 3.713057583130576e-06, "loss": 0.6236, "step": 11360 }, { "epoch": 0.33169834457388103, "grad_norm": 0.7837410733738092, "learning_rate": 3.7128953771289537e-06, "loss": 0.6677, "step": 11361 }, { "epoch": 0.3317275408017284, "grad_norm": 0.7444277370959701, "learning_rate": 3.7127331711273317e-06, "loss": 0.6869, "step": 11362 }, { "epoch": 0.33175673702957575, "grad_norm": 0.7290987466918161, "learning_rate": 3.7125709651257097e-06, "loss": 0.5985, "step": 11363 }, { "epoch": 0.3317859332574231, "grad_norm": 0.7803202180485983, "learning_rate": 3.712408759124088e-06, "loss": 0.6613, "step": 11364 }, { "epoch": 0.3318151294852705, "grad_norm": 0.7440533332107081, "learning_rate": 3.712246553122466e-06, "loss": 0.6984, "step": 11365 }, { "epoch": 0.33184432571311784, "grad_norm": 0.7432887006637955, "learning_rate": 3.712084347120844e-06, "loss": 0.7181, "step": 11366 }, { "epoch": 0.33187352194096525, "grad_norm": 0.744382235957373, "learning_rate": 3.7119221411192217e-06, "loss": 0.7114, "step": 11367 }, { "epoch": 0.3319027181688126, "grad_norm": 0.7999118904375381, "learning_rate": 3.7117599351175997e-06, "loss": 0.7527, "step": 11368 }, { "epoch": 0.33193191439666, "grad_norm": 0.7537029475354764, "learning_rate": 3.7115977291159777e-06, "loss": 0.7483, "step": 11369 }, { "epoch": 0.33196111062450734, "grad_norm": 0.6871790090513167, "learning_rate": 3.7114355231143558e-06, "loss": 0.584, "step": 11370 }, { "epoch": 0.3319903068523547, "grad_norm": 1.0056577274760166, "learning_rate": 3.7112733171127333e-06, "loss": 0.6855, "step": 11371 }, { "epoch": 0.33201950308020206, "grad_norm": 0.7357495790108274, "learning_rate": 3.7111111111111113e-06, "loss": 0.6469, "step": 11372 }, { "epoch": 0.3320486993080494, "grad_norm": 0.7064177281215321, "learning_rate": 3.7109489051094893e-06, "loss": 0.6031, "step": 11373 }, { "epoch": 0.3320778955358968, "grad_norm": 0.8870727588754196, "learning_rate": 3.7107866991078674e-06, "loss": 0.6975, "step": 11374 }, { "epoch": 0.33210709176374414, "grad_norm": 0.828917183517508, "learning_rate": 3.7106244931062454e-06, "loss": 0.73, "step": 11375 }, { "epoch": 0.3321362879915915, "grad_norm": 0.8229259949008341, "learning_rate": 3.710462287104623e-06, "loss": 0.7027, "step": 11376 }, { "epoch": 0.33216548421943887, "grad_norm": 0.7092234029140851, "learning_rate": 3.710300081103001e-06, "loss": 0.6051, "step": 11377 }, { "epoch": 0.33219468044728623, "grad_norm": 0.9876527990742638, "learning_rate": 3.710137875101379e-06, "loss": 0.7798, "step": 11378 }, { "epoch": 0.3322238766751336, "grad_norm": 0.7521757751163265, "learning_rate": 3.709975669099757e-06, "loss": 0.6193, "step": 11379 }, { "epoch": 0.33225307290298095, "grad_norm": 0.7554969053464138, "learning_rate": 3.7098134630981346e-06, "loss": 0.6158, "step": 11380 }, { "epoch": 0.3322822691308283, "grad_norm": 0.7136158679465439, "learning_rate": 3.7096512570965126e-06, "loss": 0.6456, "step": 11381 }, { "epoch": 0.3323114653586757, "grad_norm": 0.7999282714467114, "learning_rate": 3.7094890510948906e-06, "loss": 0.6782, "step": 11382 }, { "epoch": 0.33234066158652303, "grad_norm": 0.825403852856773, "learning_rate": 3.709326845093269e-06, "loss": 0.7371, "step": 11383 }, { "epoch": 0.3323698578143704, "grad_norm": 0.8316206266438697, "learning_rate": 3.709164639091647e-06, "loss": 0.7576, "step": 11384 }, { "epoch": 0.33239905404221776, "grad_norm": 0.729867292311461, "learning_rate": 3.709002433090025e-06, "loss": 0.6053, "step": 11385 }, { "epoch": 0.3324282502700651, "grad_norm": 0.7196080146523877, "learning_rate": 3.7088402270884026e-06, "loss": 0.6841, "step": 11386 }, { "epoch": 0.3324574464979125, "grad_norm": 0.6863601634783985, "learning_rate": 3.7086780210867806e-06, "loss": 0.6009, "step": 11387 }, { "epoch": 0.33248664272575984, "grad_norm": 0.7476765718909509, "learning_rate": 3.7085158150851586e-06, "loss": 0.6859, "step": 11388 }, { "epoch": 0.3325158389536072, "grad_norm": 0.8036526882181846, "learning_rate": 3.7083536090835366e-06, "loss": 0.69, "step": 11389 }, { "epoch": 0.33254503518145456, "grad_norm": 0.8637246297870688, "learning_rate": 3.708191403081914e-06, "loss": 0.6447, "step": 11390 }, { "epoch": 0.3325742314093019, "grad_norm": 0.7541396554842076, "learning_rate": 3.708029197080292e-06, "loss": 0.6553, "step": 11391 }, { "epoch": 0.3326034276371493, "grad_norm": 0.6915713262740574, "learning_rate": 3.70786699107867e-06, "loss": 0.5755, "step": 11392 }, { "epoch": 0.33263262386499665, "grad_norm": 0.7499005734793971, "learning_rate": 3.7077047850770482e-06, "loss": 0.6416, "step": 11393 }, { "epoch": 0.332661820092844, "grad_norm": 0.7177030214812692, "learning_rate": 3.7075425790754262e-06, "loss": 0.6237, "step": 11394 }, { "epoch": 0.33269101632069137, "grad_norm": 0.6659060379871979, "learning_rate": 3.707380373073804e-06, "loss": 0.5932, "step": 11395 }, { "epoch": 0.33272021254853873, "grad_norm": 0.7487620428673347, "learning_rate": 3.707218167072182e-06, "loss": 0.6564, "step": 11396 }, { "epoch": 0.3327494087763861, "grad_norm": 0.7584944675235845, "learning_rate": 3.70705596107056e-06, "loss": 0.6873, "step": 11397 }, { "epoch": 0.33277860500423345, "grad_norm": 0.743364739357019, "learning_rate": 3.706893755068938e-06, "loss": 0.7203, "step": 11398 }, { "epoch": 0.3328078012320808, "grad_norm": 0.7551090137149442, "learning_rate": 3.7067315490673154e-06, "loss": 0.6929, "step": 11399 }, { "epoch": 0.3328369974599282, "grad_norm": 0.7480214213123951, "learning_rate": 3.7065693430656934e-06, "loss": 0.6438, "step": 11400 }, { "epoch": 0.33286619368777554, "grad_norm": 0.7778511077994449, "learning_rate": 3.7064071370640714e-06, "loss": 0.7651, "step": 11401 }, { "epoch": 0.3328953899156229, "grad_norm": 0.7149723689735177, "learning_rate": 3.70624493106245e-06, "loss": 0.6147, "step": 11402 }, { "epoch": 0.33292458614347026, "grad_norm": 0.7780114886770193, "learning_rate": 3.706082725060828e-06, "loss": 0.6762, "step": 11403 }, { "epoch": 0.3329537823713176, "grad_norm": 0.6928577255488834, "learning_rate": 3.705920519059206e-06, "loss": 0.5896, "step": 11404 }, { "epoch": 0.332982978599165, "grad_norm": 0.7068976730379791, "learning_rate": 3.7057583130575834e-06, "loss": 0.6305, "step": 11405 }, { "epoch": 0.33301217482701234, "grad_norm": 0.7347561012058507, "learning_rate": 3.7055961070559615e-06, "loss": 0.6784, "step": 11406 }, { "epoch": 0.3330413710548597, "grad_norm": 0.7511793374606215, "learning_rate": 3.7054339010543395e-06, "loss": 0.6645, "step": 11407 }, { "epoch": 0.33307056728270706, "grad_norm": 0.7914674992099078, "learning_rate": 3.7052716950527175e-06, "loss": 0.7648, "step": 11408 }, { "epoch": 0.3330997635105544, "grad_norm": 0.7817377362023157, "learning_rate": 3.705109489051095e-06, "loss": 0.7124, "step": 11409 }, { "epoch": 0.3331289597384018, "grad_norm": 0.7829532924404665, "learning_rate": 3.704947283049473e-06, "loss": 0.6991, "step": 11410 }, { "epoch": 0.33315815596624915, "grad_norm": 0.7304939246388945, "learning_rate": 3.704785077047851e-06, "loss": 0.6116, "step": 11411 }, { "epoch": 0.3331873521940965, "grad_norm": 0.7470551089419505, "learning_rate": 3.704622871046229e-06, "loss": 0.7064, "step": 11412 }, { "epoch": 0.33321654842194387, "grad_norm": 0.7509245676274554, "learning_rate": 3.704460665044607e-06, "loss": 0.7027, "step": 11413 }, { "epoch": 0.33324574464979123, "grad_norm": 0.742190462873267, "learning_rate": 3.7042984590429847e-06, "loss": 0.6618, "step": 11414 }, { "epoch": 0.3332749408776386, "grad_norm": 0.872539575023672, "learning_rate": 3.7041362530413627e-06, "loss": 0.6447, "step": 11415 }, { "epoch": 0.33330413710548595, "grad_norm": 0.7750870963565837, "learning_rate": 3.7039740470397407e-06, "loss": 0.6647, "step": 11416 }, { "epoch": 0.3333333333333333, "grad_norm": 0.7322102417720935, "learning_rate": 3.7038118410381187e-06, "loss": 0.6625, "step": 11417 }, { "epoch": 0.3333625295611807, "grad_norm": 0.7996427592020112, "learning_rate": 3.7036496350364963e-06, "loss": 0.7175, "step": 11418 }, { "epoch": 0.33339172578902804, "grad_norm": 0.7068953298629861, "learning_rate": 3.7034874290348743e-06, "loss": 0.6024, "step": 11419 }, { "epoch": 0.3334209220168754, "grad_norm": 0.7647660243506211, "learning_rate": 3.7033252230332523e-06, "loss": 0.6835, "step": 11420 }, { "epoch": 0.33345011824472276, "grad_norm": 0.9345666881187783, "learning_rate": 3.7031630170316307e-06, "loss": 0.6954, "step": 11421 }, { "epoch": 0.3334793144725701, "grad_norm": 0.6466898940797147, "learning_rate": 3.7030008110300087e-06, "loss": 0.5634, "step": 11422 }, { "epoch": 0.3335085107004175, "grad_norm": 0.7498301727888634, "learning_rate": 3.7028386050283867e-06, "loss": 0.6602, "step": 11423 }, { "epoch": 0.33353770692826484, "grad_norm": 0.7190830789628555, "learning_rate": 3.7026763990267643e-06, "loss": 0.6103, "step": 11424 }, { "epoch": 0.3335669031561122, "grad_norm": 0.7620229503175631, "learning_rate": 3.7025141930251423e-06, "loss": 0.6749, "step": 11425 }, { "epoch": 0.33359609938395957, "grad_norm": 0.765054658119272, "learning_rate": 3.7023519870235203e-06, "loss": 0.7108, "step": 11426 }, { "epoch": 0.3336252956118069, "grad_norm": 0.830010464558896, "learning_rate": 3.7021897810218983e-06, "loss": 0.6805, "step": 11427 }, { "epoch": 0.33365449183965434, "grad_norm": 0.7561976958575302, "learning_rate": 3.702027575020276e-06, "loss": 0.6974, "step": 11428 }, { "epoch": 0.3336836880675017, "grad_norm": 0.7970047312255757, "learning_rate": 3.701865369018654e-06, "loss": 0.7563, "step": 11429 }, { "epoch": 0.33371288429534907, "grad_norm": 0.6877649832320742, "learning_rate": 3.701703163017032e-06, "loss": 0.6081, "step": 11430 }, { "epoch": 0.3337420805231964, "grad_norm": 0.8074487751171074, "learning_rate": 3.70154095701541e-06, "loss": 0.7838, "step": 11431 }, { "epoch": 0.3337712767510438, "grad_norm": 0.7148211906834706, "learning_rate": 3.701378751013788e-06, "loss": 0.6214, "step": 11432 }, { "epoch": 0.33380047297889115, "grad_norm": 0.7662997655641659, "learning_rate": 3.7012165450121655e-06, "loss": 0.7442, "step": 11433 }, { "epoch": 0.3338296692067385, "grad_norm": 0.7043048176816539, "learning_rate": 3.7010543390105435e-06, "loss": 0.6233, "step": 11434 }, { "epoch": 0.33385886543458587, "grad_norm": 0.7010311653068925, "learning_rate": 3.7008921330089215e-06, "loss": 0.5791, "step": 11435 }, { "epoch": 0.33388806166243323, "grad_norm": 0.6893214979317307, "learning_rate": 3.7007299270072995e-06, "loss": 0.6295, "step": 11436 }, { "epoch": 0.3339172578902806, "grad_norm": 0.7518676568055948, "learning_rate": 3.700567721005677e-06, "loss": 0.6933, "step": 11437 }, { "epoch": 0.33394645411812796, "grad_norm": 0.6980946557187659, "learning_rate": 3.700405515004055e-06, "loss": 0.6506, "step": 11438 }, { "epoch": 0.3339756503459753, "grad_norm": 0.7912371871663312, "learning_rate": 3.7002433090024336e-06, "loss": 0.7084, "step": 11439 }, { "epoch": 0.3340048465738227, "grad_norm": 0.7794327348008069, "learning_rate": 3.7000811030008116e-06, "loss": 0.7314, "step": 11440 }, { "epoch": 0.33403404280167004, "grad_norm": 0.7399676670178235, "learning_rate": 3.6999188969991896e-06, "loss": 0.6896, "step": 11441 }, { "epoch": 0.3340632390295174, "grad_norm": 0.639101369415776, "learning_rate": 3.6997566909975676e-06, "loss": 0.5021, "step": 11442 }, { "epoch": 0.33409243525736476, "grad_norm": 0.7504909276204151, "learning_rate": 3.699594484995945e-06, "loss": 0.696, "step": 11443 }, { "epoch": 0.3341216314852121, "grad_norm": 0.7450199980880092, "learning_rate": 3.699432278994323e-06, "loss": 0.6618, "step": 11444 }, { "epoch": 0.3341508277130595, "grad_norm": 0.7730404726767381, "learning_rate": 3.699270072992701e-06, "loss": 0.6796, "step": 11445 }, { "epoch": 0.33418002394090685, "grad_norm": 0.7196472224313808, "learning_rate": 3.699107866991079e-06, "loss": 0.6641, "step": 11446 }, { "epoch": 0.3342092201687542, "grad_norm": 0.6912441249185819, "learning_rate": 3.6989456609894568e-06, "loss": 0.5828, "step": 11447 }, { "epoch": 0.33423841639660157, "grad_norm": 0.7290021847715871, "learning_rate": 3.6987834549878348e-06, "loss": 0.6507, "step": 11448 }, { "epoch": 0.33426761262444893, "grad_norm": 0.7313345098769706, "learning_rate": 3.6986212489862128e-06, "loss": 0.6784, "step": 11449 }, { "epoch": 0.3342968088522963, "grad_norm": 0.8334166370363802, "learning_rate": 3.698459042984591e-06, "loss": 0.7346, "step": 11450 }, { "epoch": 0.33432600508014365, "grad_norm": 0.7888464117267177, "learning_rate": 3.698296836982969e-06, "loss": 0.7706, "step": 11451 }, { "epoch": 0.334355201307991, "grad_norm": 0.7196303735682821, "learning_rate": 3.6981346309813464e-06, "loss": 0.6389, "step": 11452 }, { "epoch": 0.3343843975358384, "grad_norm": 0.7004770743302245, "learning_rate": 3.6979724249797244e-06, "loss": 0.6536, "step": 11453 }, { "epoch": 0.33441359376368573, "grad_norm": 0.7210139517585037, "learning_rate": 3.6978102189781024e-06, "loss": 0.6394, "step": 11454 }, { "epoch": 0.3344427899915331, "grad_norm": 0.7491252899554014, "learning_rate": 3.6976480129764804e-06, "loss": 0.6604, "step": 11455 }, { "epoch": 0.33447198621938046, "grad_norm": 0.7039253344327375, "learning_rate": 3.697485806974858e-06, "loss": 0.6085, "step": 11456 }, { "epoch": 0.3345011824472278, "grad_norm": 0.8309317237760614, "learning_rate": 3.697323600973236e-06, "loss": 0.6847, "step": 11457 }, { "epoch": 0.3345303786750752, "grad_norm": 0.8467776215820204, "learning_rate": 3.6971613949716144e-06, "loss": 0.6709, "step": 11458 }, { "epoch": 0.33455957490292254, "grad_norm": 0.7280271703785861, "learning_rate": 3.6969991889699924e-06, "loss": 0.5795, "step": 11459 }, { "epoch": 0.3345887711307699, "grad_norm": 0.6989215298914421, "learning_rate": 3.6968369829683704e-06, "loss": 0.5723, "step": 11460 }, { "epoch": 0.33461796735861726, "grad_norm": 0.7320345858816049, "learning_rate": 3.6966747769667484e-06, "loss": 0.6494, "step": 11461 }, { "epoch": 0.3346471635864646, "grad_norm": 0.7874056176532584, "learning_rate": 3.696512570965126e-06, "loss": 0.7611, "step": 11462 }, { "epoch": 0.334676359814312, "grad_norm": 0.7228622838489842, "learning_rate": 3.696350364963504e-06, "loss": 0.651, "step": 11463 }, { "epoch": 0.33470555604215935, "grad_norm": 0.7381665507659162, "learning_rate": 3.696188158961882e-06, "loss": 0.6243, "step": 11464 }, { "epoch": 0.3347347522700067, "grad_norm": 0.7522624560412247, "learning_rate": 3.69602595296026e-06, "loss": 0.6665, "step": 11465 }, { "epoch": 0.33476394849785407, "grad_norm": 0.7698489794691304, "learning_rate": 3.6958637469586376e-06, "loss": 0.6025, "step": 11466 }, { "epoch": 0.33479314472570143, "grad_norm": 0.7178453065105161, "learning_rate": 3.6957015409570156e-06, "loss": 0.664, "step": 11467 }, { "epoch": 0.3348223409535488, "grad_norm": 0.7100992380232739, "learning_rate": 3.6955393349553936e-06, "loss": 0.6041, "step": 11468 }, { "epoch": 0.33485153718139615, "grad_norm": 0.7393171859965141, "learning_rate": 3.6953771289537716e-06, "loss": 0.6507, "step": 11469 }, { "epoch": 0.3348807334092435, "grad_norm": 0.6962014088752065, "learning_rate": 3.6952149229521497e-06, "loss": 0.5251, "step": 11470 }, { "epoch": 0.3349099296370909, "grad_norm": 0.8019613477666101, "learning_rate": 3.6950527169505272e-06, "loss": 0.6543, "step": 11471 }, { "epoch": 0.33493912586493824, "grad_norm": 0.7191570814132258, "learning_rate": 3.6948905109489052e-06, "loss": 0.65, "step": 11472 }, { "epoch": 0.3349683220927856, "grad_norm": 0.6955459762303624, "learning_rate": 3.6947283049472833e-06, "loss": 0.6184, "step": 11473 }, { "epoch": 0.33499751832063296, "grad_norm": 0.7424615624741442, "learning_rate": 3.6945660989456613e-06, "loss": 0.6594, "step": 11474 }, { "epoch": 0.3350267145484803, "grad_norm": 0.8259764971779313, "learning_rate": 3.694403892944039e-06, "loss": 0.7926, "step": 11475 }, { "epoch": 0.3350559107763277, "grad_norm": 0.7930206552736401, "learning_rate": 3.694241686942417e-06, "loss": 0.7417, "step": 11476 }, { "epoch": 0.33508510700417504, "grad_norm": 0.7089545875749519, "learning_rate": 3.6940794809407953e-06, "loss": 0.6266, "step": 11477 }, { "epoch": 0.3351143032320224, "grad_norm": 0.737535670279026, "learning_rate": 3.6939172749391733e-06, "loss": 0.6593, "step": 11478 }, { "epoch": 0.33514349945986976, "grad_norm": 0.7561254076198379, "learning_rate": 3.6937550689375513e-06, "loss": 0.7121, "step": 11479 }, { "epoch": 0.3351726956877171, "grad_norm": 0.7569718083937006, "learning_rate": 3.6935928629359293e-06, "loss": 0.6843, "step": 11480 }, { "epoch": 0.3352018919155645, "grad_norm": 0.7403495895645852, "learning_rate": 3.693430656934307e-06, "loss": 0.6615, "step": 11481 }, { "epoch": 0.33523108814341185, "grad_norm": 0.717278592831589, "learning_rate": 3.693268450932685e-06, "loss": 0.6128, "step": 11482 }, { "epoch": 0.3352602843712592, "grad_norm": 0.8001737795256468, "learning_rate": 3.693106244931063e-06, "loss": 0.7257, "step": 11483 }, { "epoch": 0.33528948059910657, "grad_norm": 0.7581050199851037, "learning_rate": 3.692944038929441e-06, "loss": 0.7066, "step": 11484 }, { "epoch": 0.33531867682695393, "grad_norm": 0.9687805536025219, "learning_rate": 3.6927818329278185e-06, "loss": 0.6519, "step": 11485 }, { "epoch": 0.3353478730548013, "grad_norm": 0.7056519090561187, "learning_rate": 3.6926196269261965e-06, "loss": 0.6414, "step": 11486 }, { "epoch": 0.33537706928264865, "grad_norm": 0.6886129309654074, "learning_rate": 3.6924574209245745e-06, "loss": 0.641, "step": 11487 }, { "epoch": 0.33540626551049607, "grad_norm": 0.7374773591665947, "learning_rate": 3.6922952149229525e-06, "loss": 0.6482, "step": 11488 }, { "epoch": 0.33543546173834343, "grad_norm": 0.7783252183440441, "learning_rate": 3.6921330089213305e-06, "loss": 0.6519, "step": 11489 }, { "epoch": 0.3354646579661908, "grad_norm": 0.7129878901222436, "learning_rate": 3.691970802919708e-06, "loss": 0.6345, "step": 11490 }, { "epoch": 0.33549385419403815, "grad_norm": 0.6994268680388062, "learning_rate": 3.691808596918086e-06, "loss": 0.5893, "step": 11491 }, { "epoch": 0.3355230504218855, "grad_norm": 0.7518119607720118, "learning_rate": 3.691646390916464e-06, "loss": 0.6535, "step": 11492 }, { "epoch": 0.3355522466497329, "grad_norm": 0.8130705091018688, "learning_rate": 3.691484184914842e-06, "loss": 0.6406, "step": 11493 }, { "epoch": 0.33558144287758024, "grad_norm": 0.7082397228361351, "learning_rate": 3.6913219789132197e-06, "loss": 0.6523, "step": 11494 }, { "epoch": 0.3356106391054276, "grad_norm": 0.7199729452219193, "learning_rate": 3.6911597729115977e-06, "loss": 0.5771, "step": 11495 }, { "epoch": 0.33563983533327496, "grad_norm": 0.7242634968080617, "learning_rate": 3.690997566909976e-06, "loss": 0.6742, "step": 11496 }, { "epoch": 0.3356690315611223, "grad_norm": 0.7502066854990282, "learning_rate": 3.690835360908354e-06, "loss": 0.6966, "step": 11497 }, { "epoch": 0.3356982277889697, "grad_norm": 0.7581021933556406, "learning_rate": 3.690673154906732e-06, "loss": 0.7645, "step": 11498 }, { "epoch": 0.33572742401681704, "grad_norm": 0.7939673682372473, "learning_rate": 3.69051094890511e-06, "loss": 0.7264, "step": 11499 }, { "epoch": 0.3357566202446644, "grad_norm": 0.7486354210406226, "learning_rate": 3.6903487429034877e-06, "loss": 0.6722, "step": 11500 }, { "epoch": 0.33578581647251177, "grad_norm": 0.726441257496497, "learning_rate": 3.6901865369018657e-06, "loss": 0.6937, "step": 11501 }, { "epoch": 0.33581501270035913, "grad_norm": 0.717932881008349, "learning_rate": 3.6900243309002438e-06, "loss": 0.6524, "step": 11502 }, { "epoch": 0.3358442089282065, "grad_norm": 0.7038244364086212, "learning_rate": 3.6898621248986218e-06, "loss": 0.6352, "step": 11503 }, { "epoch": 0.33587340515605385, "grad_norm": 0.8718552732584027, "learning_rate": 3.6896999188969993e-06, "loss": 0.6851, "step": 11504 }, { "epoch": 0.3359026013839012, "grad_norm": 0.699679452865322, "learning_rate": 3.6895377128953774e-06, "loss": 0.607, "step": 11505 }, { "epoch": 0.3359317976117486, "grad_norm": 0.6690022619433968, "learning_rate": 3.6893755068937554e-06, "loss": 0.5602, "step": 11506 }, { "epoch": 0.33596099383959593, "grad_norm": 0.6897329873101978, "learning_rate": 3.6892133008921334e-06, "loss": 0.5864, "step": 11507 }, { "epoch": 0.3359901900674433, "grad_norm": 0.7863551994498698, "learning_rate": 3.6890510948905114e-06, "loss": 0.7282, "step": 11508 }, { "epoch": 0.33601938629529066, "grad_norm": 0.7158210300490017, "learning_rate": 3.688888888888889e-06, "loss": 0.6185, "step": 11509 }, { "epoch": 0.336048582523138, "grad_norm": 0.7369020321569497, "learning_rate": 3.688726682887267e-06, "loss": 0.7226, "step": 11510 }, { "epoch": 0.3360777787509854, "grad_norm": 0.7012116984427177, "learning_rate": 3.688564476885645e-06, "loss": 0.617, "step": 11511 }, { "epoch": 0.33610697497883274, "grad_norm": 0.771658634234467, "learning_rate": 3.688402270884023e-06, "loss": 0.7497, "step": 11512 }, { "epoch": 0.3361361712066801, "grad_norm": 0.6955729537332427, "learning_rate": 3.6882400648824006e-06, "loss": 0.544, "step": 11513 }, { "epoch": 0.33616536743452746, "grad_norm": 0.7264998773758898, "learning_rate": 3.6880778588807786e-06, "loss": 0.6811, "step": 11514 }, { "epoch": 0.3361945636623748, "grad_norm": 0.7347822799960114, "learning_rate": 3.687915652879157e-06, "loss": 0.6335, "step": 11515 }, { "epoch": 0.3362237598902222, "grad_norm": 0.8160840047022633, "learning_rate": 3.687753446877535e-06, "loss": 0.6897, "step": 11516 }, { "epoch": 0.33625295611806955, "grad_norm": 0.7265570371386727, "learning_rate": 3.687591240875913e-06, "loss": 0.6815, "step": 11517 }, { "epoch": 0.3362821523459169, "grad_norm": 0.7408169322825789, "learning_rate": 3.687429034874291e-06, "loss": 0.6095, "step": 11518 }, { "epoch": 0.33631134857376427, "grad_norm": 0.879197715913706, "learning_rate": 3.6872668288726686e-06, "loss": 0.7574, "step": 11519 }, { "epoch": 0.33634054480161163, "grad_norm": 0.7178242265683763, "learning_rate": 3.6871046228710466e-06, "loss": 0.6211, "step": 11520 }, { "epoch": 0.336369741029459, "grad_norm": 1.2360800778802377, "learning_rate": 3.6869424168694246e-06, "loss": 0.6107, "step": 11521 }, { "epoch": 0.33639893725730635, "grad_norm": 0.7582928203426416, "learning_rate": 3.6867802108678026e-06, "loss": 0.6791, "step": 11522 }, { "epoch": 0.3364281334851537, "grad_norm": 0.7682444758451485, "learning_rate": 3.68661800486618e-06, "loss": 0.7728, "step": 11523 }, { "epoch": 0.3364573297130011, "grad_norm": 0.714752874235827, "learning_rate": 3.6864557988645582e-06, "loss": 0.6071, "step": 11524 }, { "epoch": 0.33648652594084844, "grad_norm": 0.7601769305195073, "learning_rate": 3.6862935928629362e-06, "loss": 0.6923, "step": 11525 }, { "epoch": 0.3365157221686958, "grad_norm": 0.7568876950296393, "learning_rate": 3.6861313868613142e-06, "loss": 0.6347, "step": 11526 }, { "epoch": 0.33654491839654316, "grad_norm": 0.7761903661912779, "learning_rate": 3.685969180859692e-06, "loss": 0.6963, "step": 11527 }, { "epoch": 0.3365741146243905, "grad_norm": 0.7172487575313171, "learning_rate": 3.68580697485807e-06, "loss": 0.6501, "step": 11528 }, { "epoch": 0.3366033108522379, "grad_norm": 0.6983968614482817, "learning_rate": 3.685644768856448e-06, "loss": 0.6151, "step": 11529 }, { "epoch": 0.33663250708008524, "grad_norm": 0.7178904222925565, "learning_rate": 3.685482562854826e-06, "loss": 0.6449, "step": 11530 }, { "epoch": 0.3366617033079326, "grad_norm": 0.6875091032781371, "learning_rate": 3.685320356853204e-06, "loss": 0.6321, "step": 11531 }, { "epoch": 0.33669089953577996, "grad_norm": 0.7240086707444535, "learning_rate": 3.6851581508515814e-06, "loss": 0.637, "step": 11532 }, { "epoch": 0.3367200957636273, "grad_norm": 0.6880357557280969, "learning_rate": 3.6849959448499594e-06, "loss": 0.5768, "step": 11533 }, { "epoch": 0.3367492919914747, "grad_norm": 0.754830112315009, "learning_rate": 3.684833738848338e-06, "loss": 0.6757, "step": 11534 }, { "epoch": 0.33677848821932205, "grad_norm": 1.0446864534311289, "learning_rate": 3.684671532846716e-06, "loss": 0.6826, "step": 11535 }, { "epoch": 0.3368076844471694, "grad_norm": 0.7525360875511179, "learning_rate": 3.684509326845094e-06, "loss": 0.6811, "step": 11536 }, { "epoch": 0.33683688067501677, "grad_norm": 0.7684762876277955, "learning_rate": 3.684347120843472e-06, "loss": 0.6985, "step": 11537 }, { "epoch": 0.33686607690286413, "grad_norm": 0.7558736653019702, "learning_rate": 3.6841849148418495e-06, "loss": 0.7121, "step": 11538 }, { "epoch": 0.3368952731307115, "grad_norm": 0.8023343889005384, "learning_rate": 3.6840227088402275e-06, "loss": 0.6267, "step": 11539 }, { "epoch": 0.33692446935855885, "grad_norm": 0.7604152088026498, "learning_rate": 3.6838605028386055e-06, "loss": 0.6913, "step": 11540 }, { "epoch": 0.3369536655864062, "grad_norm": 0.7273307310831018, "learning_rate": 3.6836982968369835e-06, "loss": 0.631, "step": 11541 }, { "epoch": 0.3369828618142536, "grad_norm": 0.7617295775066086, "learning_rate": 3.683536090835361e-06, "loss": 0.6329, "step": 11542 }, { "epoch": 0.33701205804210094, "grad_norm": 0.7324328564016914, "learning_rate": 3.683373884833739e-06, "loss": 0.6505, "step": 11543 }, { "epoch": 0.3370412542699483, "grad_norm": 0.6940601351574363, "learning_rate": 3.683211678832117e-06, "loss": 0.6104, "step": 11544 }, { "epoch": 0.33707045049779566, "grad_norm": 0.7329220535565708, "learning_rate": 3.683049472830495e-06, "loss": 0.6293, "step": 11545 }, { "epoch": 0.337099646725643, "grad_norm": 0.6883485522901535, "learning_rate": 3.6828872668288727e-06, "loss": 0.5944, "step": 11546 }, { "epoch": 0.3371288429534904, "grad_norm": 0.7257513604099577, "learning_rate": 3.6827250608272507e-06, "loss": 0.6345, "step": 11547 }, { "epoch": 0.3371580391813378, "grad_norm": 0.7250491933329556, "learning_rate": 3.6825628548256287e-06, "loss": 0.6329, "step": 11548 }, { "epoch": 0.33718723540918516, "grad_norm": 0.721029319933447, "learning_rate": 3.6824006488240067e-06, "loss": 0.6029, "step": 11549 }, { "epoch": 0.3372164316370325, "grad_norm": 0.7065504492328614, "learning_rate": 3.6822384428223847e-06, "loss": 0.6575, "step": 11550 }, { "epoch": 0.3372456278648799, "grad_norm": 0.6950275021989792, "learning_rate": 3.6820762368207623e-06, "loss": 0.6039, "step": 11551 }, { "epoch": 0.33727482409272724, "grad_norm": 0.7073638478376094, "learning_rate": 3.6819140308191403e-06, "loss": 0.6614, "step": 11552 }, { "epoch": 0.3373040203205746, "grad_norm": 0.7359091293428204, "learning_rate": 3.6817518248175187e-06, "loss": 0.7036, "step": 11553 }, { "epoch": 0.33733321654842197, "grad_norm": 0.7647807852518409, "learning_rate": 3.6815896188158967e-06, "loss": 0.6671, "step": 11554 }, { "epoch": 0.3373624127762693, "grad_norm": 0.7142789323499977, "learning_rate": 3.6814274128142747e-06, "loss": 0.6158, "step": 11555 }, { "epoch": 0.3373916090041167, "grad_norm": 0.7679592862628564, "learning_rate": 3.6812652068126527e-06, "loss": 0.658, "step": 11556 }, { "epoch": 0.33742080523196405, "grad_norm": 0.7716568441097866, "learning_rate": 3.6811030008110303e-06, "loss": 0.7338, "step": 11557 }, { "epoch": 0.3374500014598114, "grad_norm": 0.7966431118605247, "learning_rate": 3.6809407948094083e-06, "loss": 0.6544, "step": 11558 }, { "epoch": 0.33747919768765877, "grad_norm": 0.7973856455746693, "learning_rate": 3.6807785888077863e-06, "loss": 0.7186, "step": 11559 }, { "epoch": 0.33750839391550613, "grad_norm": 0.7486755089206589, "learning_rate": 3.6806163828061643e-06, "loss": 0.6833, "step": 11560 }, { "epoch": 0.3375375901433535, "grad_norm": 0.6970205779648622, "learning_rate": 3.680454176804542e-06, "loss": 0.5781, "step": 11561 }, { "epoch": 0.33756678637120086, "grad_norm": 0.7637121203940785, "learning_rate": 3.68029197080292e-06, "loss": 0.6621, "step": 11562 }, { "epoch": 0.3375959825990482, "grad_norm": 0.6940222202276841, "learning_rate": 3.680129764801298e-06, "loss": 0.5954, "step": 11563 }, { "epoch": 0.3376251788268956, "grad_norm": 0.7411638900315237, "learning_rate": 3.679967558799676e-06, "loss": 0.6344, "step": 11564 }, { "epoch": 0.33765437505474294, "grad_norm": 0.7825732881090082, "learning_rate": 3.6798053527980535e-06, "loss": 0.7696, "step": 11565 }, { "epoch": 0.3376835712825903, "grad_norm": 0.7363366761090798, "learning_rate": 3.6796431467964315e-06, "loss": 0.6489, "step": 11566 }, { "epoch": 0.33771276751043766, "grad_norm": 0.7209893969905575, "learning_rate": 3.6794809407948095e-06, "loss": 0.6638, "step": 11567 }, { "epoch": 0.337741963738285, "grad_norm": 0.7309868133301384, "learning_rate": 3.6793187347931875e-06, "loss": 0.6401, "step": 11568 }, { "epoch": 0.3377711599661324, "grad_norm": 0.7584991749698774, "learning_rate": 3.6791565287915656e-06, "loss": 0.6691, "step": 11569 }, { "epoch": 0.33780035619397975, "grad_norm": 0.7207997852089579, "learning_rate": 3.678994322789943e-06, "loss": 0.6065, "step": 11570 }, { "epoch": 0.3378295524218271, "grad_norm": 0.7241295960968629, "learning_rate": 3.678832116788321e-06, "loss": 0.6457, "step": 11571 }, { "epoch": 0.33785874864967447, "grad_norm": 0.7645689723151167, "learning_rate": 3.6786699107866996e-06, "loss": 0.7383, "step": 11572 }, { "epoch": 0.33788794487752183, "grad_norm": 0.6842799351729479, "learning_rate": 3.6785077047850776e-06, "loss": 0.6022, "step": 11573 }, { "epoch": 0.3379171411053692, "grad_norm": 0.7037602098216418, "learning_rate": 3.6783454987834556e-06, "loss": 0.6377, "step": 11574 }, { "epoch": 0.33794633733321655, "grad_norm": 0.7333986429258113, "learning_rate": 3.6781832927818336e-06, "loss": 0.7042, "step": 11575 }, { "epoch": 0.3379755335610639, "grad_norm": 0.7197728333019061, "learning_rate": 3.678021086780211e-06, "loss": 0.6393, "step": 11576 }, { "epoch": 0.3380047297889113, "grad_norm": 0.7371657749103045, "learning_rate": 3.677858880778589e-06, "loss": 0.6828, "step": 11577 }, { "epoch": 0.33803392601675863, "grad_norm": 0.7181847515098012, "learning_rate": 3.677696674776967e-06, "loss": 0.6494, "step": 11578 }, { "epoch": 0.338063122244606, "grad_norm": 0.7224283273339812, "learning_rate": 3.677534468775345e-06, "loss": 0.6596, "step": 11579 }, { "epoch": 0.33809231847245336, "grad_norm": 0.7334750169978579, "learning_rate": 3.6773722627737228e-06, "loss": 0.5862, "step": 11580 }, { "epoch": 0.3381215147003007, "grad_norm": 0.7614582349948216, "learning_rate": 3.6772100567721008e-06, "loss": 0.7039, "step": 11581 }, { "epoch": 0.3381507109281481, "grad_norm": 0.7563674160940218, "learning_rate": 3.677047850770479e-06, "loss": 0.6848, "step": 11582 }, { "epoch": 0.33817990715599544, "grad_norm": 0.7083370085207449, "learning_rate": 3.676885644768857e-06, "loss": 0.6502, "step": 11583 }, { "epoch": 0.3382091033838428, "grad_norm": 0.8228923208543378, "learning_rate": 3.6767234387672344e-06, "loss": 0.6611, "step": 11584 }, { "epoch": 0.33823829961169016, "grad_norm": 0.753512273503482, "learning_rate": 3.6765612327656124e-06, "loss": 0.6529, "step": 11585 }, { "epoch": 0.3382674958395375, "grad_norm": 0.7272308949466123, "learning_rate": 3.6763990267639904e-06, "loss": 0.6754, "step": 11586 }, { "epoch": 0.3382966920673849, "grad_norm": 0.7752332741693269, "learning_rate": 3.6762368207623684e-06, "loss": 0.7248, "step": 11587 }, { "epoch": 0.33832588829523225, "grad_norm": 0.7601759157044746, "learning_rate": 3.6760746147607464e-06, "loss": 0.6613, "step": 11588 }, { "epoch": 0.3383550845230796, "grad_norm": 0.735675573911423, "learning_rate": 3.675912408759124e-06, "loss": 0.7091, "step": 11589 }, { "epoch": 0.33838428075092697, "grad_norm": 0.7202372854128203, "learning_rate": 3.675750202757502e-06, "loss": 0.6126, "step": 11590 }, { "epoch": 0.33841347697877433, "grad_norm": 0.7558865134079982, "learning_rate": 3.6755879967558804e-06, "loss": 0.6982, "step": 11591 }, { "epoch": 0.3384426732066217, "grad_norm": 0.7772786127673859, "learning_rate": 3.6754257907542584e-06, "loss": 0.7124, "step": 11592 }, { "epoch": 0.33847186943446905, "grad_norm": 0.7613163355471185, "learning_rate": 3.6752635847526364e-06, "loss": 0.6834, "step": 11593 }, { "epoch": 0.3385010656623164, "grad_norm": 0.7300379281832156, "learning_rate": 3.6751013787510145e-06, "loss": 0.6682, "step": 11594 }, { "epoch": 0.3385302618901638, "grad_norm": 0.6873866970759632, "learning_rate": 3.674939172749392e-06, "loss": 0.5634, "step": 11595 }, { "epoch": 0.33855945811801114, "grad_norm": 0.7944360934875848, "learning_rate": 3.67477696674777e-06, "loss": 0.7441, "step": 11596 }, { "epoch": 0.3385886543458585, "grad_norm": 0.716927418801266, "learning_rate": 3.674614760746148e-06, "loss": 0.6895, "step": 11597 }, { "epoch": 0.33861785057370586, "grad_norm": 0.7471408441216832, "learning_rate": 3.674452554744526e-06, "loss": 0.6302, "step": 11598 }, { "epoch": 0.3386470468015532, "grad_norm": 0.824481334633608, "learning_rate": 3.6742903487429036e-06, "loss": 0.6891, "step": 11599 }, { "epoch": 0.3386762430294006, "grad_norm": 0.7446828960452625, "learning_rate": 3.6741281427412816e-06, "loss": 0.6634, "step": 11600 }, { "epoch": 0.33870543925724794, "grad_norm": 0.7630799213369436, "learning_rate": 3.6739659367396597e-06, "loss": 0.714, "step": 11601 }, { "epoch": 0.3387346354850953, "grad_norm": 0.7622532322905226, "learning_rate": 3.6738037307380377e-06, "loss": 0.7118, "step": 11602 }, { "epoch": 0.33876383171294266, "grad_norm": 0.8154856861276406, "learning_rate": 3.6736415247364152e-06, "loss": 0.7567, "step": 11603 }, { "epoch": 0.33879302794079, "grad_norm": 0.7073820755760412, "learning_rate": 3.6734793187347933e-06, "loss": 0.616, "step": 11604 }, { "epoch": 0.3388222241686374, "grad_norm": 0.7752091527998023, "learning_rate": 3.6733171127331713e-06, "loss": 0.7262, "step": 11605 }, { "epoch": 0.33885142039648475, "grad_norm": 0.6842565307705718, "learning_rate": 3.6731549067315493e-06, "loss": 0.5571, "step": 11606 }, { "epoch": 0.3388806166243321, "grad_norm": 0.7758631762790864, "learning_rate": 3.6729927007299273e-06, "loss": 0.7046, "step": 11607 }, { "epoch": 0.3389098128521795, "grad_norm": 0.7626468801610504, "learning_rate": 3.672830494728305e-06, "loss": 0.7024, "step": 11608 }, { "epoch": 0.3389390090800269, "grad_norm": 0.811231869633059, "learning_rate": 3.6726682887266833e-06, "loss": 0.6837, "step": 11609 }, { "epoch": 0.33896820530787425, "grad_norm": 0.7853081256803746, "learning_rate": 3.6725060827250613e-06, "loss": 0.7118, "step": 11610 }, { "epoch": 0.3389974015357216, "grad_norm": 0.6983215954911397, "learning_rate": 3.6723438767234393e-06, "loss": 0.6443, "step": 11611 }, { "epoch": 0.33902659776356897, "grad_norm": 0.8094526580571653, "learning_rate": 3.6721816707218173e-06, "loss": 0.6725, "step": 11612 }, { "epoch": 0.33905579399141633, "grad_norm": 0.7630456036832534, "learning_rate": 3.6720194647201953e-06, "loss": 0.6582, "step": 11613 }, { "epoch": 0.3390849902192637, "grad_norm": 0.7228575205832874, "learning_rate": 3.671857258718573e-06, "loss": 0.621, "step": 11614 }, { "epoch": 0.33911418644711105, "grad_norm": 0.6765670773191079, "learning_rate": 3.671695052716951e-06, "loss": 0.5747, "step": 11615 }, { "epoch": 0.3391433826749584, "grad_norm": 0.7891413366741602, "learning_rate": 3.671532846715329e-06, "loss": 0.7261, "step": 11616 }, { "epoch": 0.3391725789028058, "grad_norm": 0.7452967167005167, "learning_rate": 3.671370640713707e-06, "loss": 0.7535, "step": 11617 }, { "epoch": 0.33920177513065314, "grad_norm": 0.702707605248049, "learning_rate": 3.6712084347120845e-06, "loss": 0.6017, "step": 11618 }, { "epoch": 0.3392309713585005, "grad_norm": 0.7399021282396485, "learning_rate": 3.6710462287104625e-06, "loss": 0.672, "step": 11619 }, { "epoch": 0.33926016758634786, "grad_norm": 0.7706746467133102, "learning_rate": 3.6708840227088405e-06, "loss": 0.7408, "step": 11620 }, { "epoch": 0.3392893638141952, "grad_norm": 0.6957094634821503, "learning_rate": 3.6707218167072185e-06, "loss": 0.5807, "step": 11621 }, { "epoch": 0.3393185600420426, "grad_norm": 0.6956159878968045, "learning_rate": 3.670559610705596e-06, "loss": 0.6034, "step": 11622 }, { "epoch": 0.33934775626988994, "grad_norm": 0.744310723482963, "learning_rate": 3.670397404703974e-06, "loss": 0.6537, "step": 11623 }, { "epoch": 0.3393769524977373, "grad_norm": 0.7120787462711377, "learning_rate": 3.670235198702352e-06, "loss": 0.6603, "step": 11624 }, { "epoch": 0.33940614872558467, "grad_norm": 0.9500790546267127, "learning_rate": 3.67007299270073e-06, "loss": 0.7561, "step": 11625 }, { "epoch": 0.339435344953432, "grad_norm": 0.7113895218939518, "learning_rate": 3.669910786699108e-06, "loss": 0.6521, "step": 11626 }, { "epoch": 0.3394645411812794, "grad_norm": 0.8810915995799883, "learning_rate": 3.6697485806974857e-06, "loss": 0.7043, "step": 11627 }, { "epoch": 0.33949373740912675, "grad_norm": 0.6718952262527129, "learning_rate": 3.669586374695864e-06, "loss": 0.6257, "step": 11628 }, { "epoch": 0.3395229336369741, "grad_norm": 0.7458457627150371, "learning_rate": 3.669424168694242e-06, "loss": 0.6377, "step": 11629 }, { "epoch": 0.3395521298648215, "grad_norm": 0.6607503215424272, "learning_rate": 3.66926196269262e-06, "loss": 0.5627, "step": 11630 }, { "epoch": 0.33958132609266883, "grad_norm": 0.7208021690049762, "learning_rate": 3.669099756690998e-06, "loss": 0.6452, "step": 11631 }, { "epoch": 0.3396105223205162, "grad_norm": 0.7455553677180121, "learning_rate": 3.668937550689376e-06, "loss": 0.6127, "step": 11632 }, { "epoch": 0.33963971854836356, "grad_norm": 0.6997434152107704, "learning_rate": 3.6687753446877538e-06, "loss": 0.617, "step": 11633 }, { "epoch": 0.3396689147762109, "grad_norm": 0.7714288410987371, "learning_rate": 3.6686131386861318e-06, "loss": 0.7358, "step": 11634 }, { "epoch": 0.3396981110040583, "grad_norm": 0.872244397797519, "learning_rate": 3.6684509326845098e-06, "loss": 0.7339, "step": 11635 }, { "epoch": 0.33972730723190564, "grad_norm": 0.7427099788036172, "learning_rate": 3.6682887266828878e-06, "loss": 0.6613, "step": 11636 }, { "epoch": 0.339756503459753, "grad_norm": 0.6852625010823007, "learning_rate": 3.6681265206812654e-06, "loss": 0.5781, "step": 11637 }, { "epoch": 0.33978569968760036, "grad_norm": 0.6791553417978472, "learning_rate": 3.6679643146796434e-06, "loss": 0.5541, "step": 11638 }, { "epoch": 0.3398148959154477, "grad_norm": 0.7477095231873978, "learning_rate": 3.6678021086780214e-06, "loss": 0.6709, "step": 11639 }, { "epoch": 0.3398440921432951, "grad_norm": 0.7420362971814144, "learning_rate": 3.6676399026763994e-06, "loss": 0.6151, "step": 11640 }, { "epoch": 0.33987328837114245, "grad_norm": 0.7595267756824485, "learning_rate": 3.667477696674777e-06, "loss": 0.6272, "step": 11641 }, { "epoch": 0.3399024845989898, "grad_norm": 0.7344991962917803, "learning_rate": 3.667315490673155e-06, "loss": 0.6602, "step": 11642 }, { "epoch": 0.33993168082683717, "grad_norm": 0.7383855935054219, "learning_rate": 3.667153284671533e-06, "loss": 0.6486, "step": 11643 }, { "epoch": 0.33996087705468453, "grad_norm": 0.8485040127759986, "learning_rate": 3.666991078669911e-06, "loss": 0.6083, "step": 11644 }, { "epoch": 0.3399900732825319, "grad_norm": 0.6971262701548044, "learning_rate": 3.666828872668289e-06, "loss": 0.5898, "step": 11645 }, { "epoch": 0.34001926951037925, "grad_norm": 0.7334974334105366, "learning_rate": 3.6666666666666666e-06, "loss": 0.6542, "step": 11646 }, { "epoch": 0.3400484657382266, "grad_norm": 0.6728055171631754, "learning_rate": 3.666504460665045e-06, "loss": 0.5852, "step": 11647 }, { "epoch": 0.340077661966074, "grad_norm": 0.7428585912165387, "learning_rate": 3.666342254663423e-06, "loss": 0.6687, "step": 11648 }, { "epoch": 0.34010685819392134, "grad_norm": 0.7712525698238152, "learning_rate": 3.666180048661801e-06, "loss": 0.6603, "step": 11649 }, { "epoch": 0.3401360544217687, "grad_norm": 0.7228958434109898, "learning_rate": 3.666017842660179e-06, "loss": 0.6139, "step": 11650 }, { "epoch": 0.34016525064961606, "grad_norm": 0.702276246654231, "learning_rate": 3.6658556366585566e-06, "loss": 0.6391, "step": 11651 }, { "epoch": 0.3401944468774634, "grad_norm": 0.7454519029615161, "learning_rate": 3.6656934306569346e-06, "loss": 0.6136, "step": 11652 }, { "epoch": 0.3402236431053108, "grad_norm": 0.7844779158403338, "learning_rate": 3.6655312246553126e-06, "loss": 0.6911, "step": 11653 }, { "epoch": 0.34025283933315814, "grad_norm": 0.74949254786956, "learning_rate": 3.6653690186536906e-06, "loss": 0.6987, "step": 11654 }, { "epoch": 0.3402820355610055, "grad_norm": 0.7327961887818721, "learning_rate": 3.6652068126520686e-06, "loss": 0.6603, "step": 11655 }, { "epoch": 0.34031123178885286, "grad_norm": 0.7009656712638043, "learning_rate": 3.6650446066504462e-06, "loss": 0.6214, "step": 11656 }, { "epoch": 0.3403404280167002, "grad_norm": 0.7692745404890502, "learning_rate": 3.6648824006488242e-06, "loss": 0.701, "step": 11657 }, { "epoch": 0.3403696242445476, "grad_norm": 0.6704274464958108, "learning_rate": 3.6647201946472022e-06, "loss": 0.5754, "step": 11658 }, { "epoch": 0.34039882047239495, "grad_norm": 0.6779294495175011, "learning_rate": 3.6645579886455802e-06, "loss": 0.5909, "step": 11659 }, { "epoch": 0.3404280167002423, "grad_norm": 0.7668632800452194, "learning_rate": 3.664395782643958e-06, "loss": 0.7101, "step": 11660 }, { "epoch": 0.34045721292808967, "grad_norm": 0.7355829258755895, "learning_rate": 3.664233576642336e-06, "loss": 0.6556, "step": 11661 }, { "epoch": 0.34048640915593703, "grad_norm": 0.7384552637439492, "learning_rate": 3.664071370640714e-06, "loss": 0.6511, "step": 11662 }, { "epoch": 0.3405156053837844, "grad_norm": 0.6999016229341658, "learning_rate": 3.663909164639092e-06, "loss": 0.5778, "step": 11663 }, { "epoch": 0.34054480161163175, "grad_norm": 0.7102574323869763, "learning_rate": 3.66374695863747e-06, "loss": 0.6191, "step": 11664 }, { "epoch": 0.3405739978394791, "grad_norm": 0.7159790104957239, "learning_rate": 3.6635847526358474e-06, "loss": 0.6707, "step": 11665 }, { "epoch": 0.3406031940673265, "grad_norm": 1.026195864251708, "learning_rate": 3.663422546634226e-06, "loss": 0.7608, "step": 11666 }, { "epoch": 0.34063239029517384, "grad_norm": 0.7725954617294726, "learning_rate": 3.663260340632604e-06, "loss": 0.7134, "step": 11667 }, { "epoch": 0.3406615865230212, "grad_norm": 0.6995232703977063, "learning_rate": 3.663098134630982e-06, "loss": 0.6065, "step": 11668 }, { "epoch": 0.3406907827508686, "grad_norm": 0.7795551549171673, "learning_rate": 3.66293592862936e-06, "loss": 0.7609, "step": 11669 }, { "epoch": 0.340719978978716, "grad_norm": 0.706249367236681, "learning_rate": 3.6627737226277375e-06, "loss": 0.6576, "step": 11670 }, { "epoch": 0.34074917520656334, "grad_norm": 1.0647684348211952, "learning_rate": 3.6626115166261155e-06, "loss": 0.6051, "step": 11671 }, { "epoch": 0.3407783714344107, "grad_norm": 0.7231312749522729, "learning_rate": 3.6624493106244935e-06, "loss": 0.6968, "step": 11672 }, { "epoch": 0.34080756766225806, "grad_norm": 0.7925367312927211, "learning_rate": 3.6622871046228715e-06, "loss": 0.7187, "step": 11673 }, { "epoch": 0.3408367638901054, "grad_norm": 0.7103065213591219, "learning_rate": 3.6621248986212495e-06, "loss": 0.5564, "step": 11674 }, { "epoch": 0.3408659601179528, "grad_norm": 0.7157915671083173, "learning_rate": 3.661962692619627e-06, "loss": 0.6937, "step": 11675 }, { "epoch": 0.34089515634580014, "grad_norm": 0.6838412209998721, "learning_rate": 3.661800486618005e-06, "loss": 0.6053, "step": 11676 }, { "epoch": 0.3409243525736475, "grad_norm": 0.6776732917390287, "learning_rate": 3.661638280616383e-06, "loss": 0.5372, "step": 11677 }, { "epoch": 0.34095354880149487, "grad_norm": 0.8079865322483625, "learning_rate": 3.661476074614761e-06, "loss": 0.7001, "step": 11678 }, { "epoch": 0.3409827450293422, "grad_norm": 0.7818993558582001, "learning_rate": 3.6613138686131387e-06, "loss": 0.7524, "step": 11679 }, { "epoch": 0.3410119412571896, "grad_norm": 0.7336503457134341, "learning_rate": 3.6611516626115167e-06, "loss": 0.5767, "step": 11680 }, { "epoch": 0.34104113748503695, "grad_norm": 0.8210847371235878, "learning_rate": 3.6609894566098947e-06, "loss": 0.7315, "step": 11681 }, { "epoch": 0.3410703337128843, "grad_norm": 0.6636201407179395, "learning_rate": 3.6608272506082727e-06, "loss": 0.5111, "step": 11682 }, { "epoch": 0.34109952994073167, "grad_norm": 0.6771009023633704, "learning_rate": 3.6606650446066507e-06, "loss": 0.6133, "step": 11683 }, { "epoch": 0.34112872616857903, "grad_norm": 0.7412786960363427, "learning_rate": 3.6605028386050283e-06, "loss": 0.7306, "step": 11684 }, { "epoch": 0.3411579223964264, "grad_norm": 0.8605195396767793, "learning_rate": 3.6603406326034067e-06, "loss": 0.6564, "step": 11685 }, { "epoch": 0.34118711862427376, "grad_norm": 0.6732944083190889, "learning_rate": 3.6601784266017847e-06, "loss": 0.5574, "step": 11686 }, { "epoch": 0.3412163148521211, "grad_norm": 0.7397230159363881, "learning_rate": 3.6600162206001627e-06, "loss": 0.7399, "step": 11687 }, { "epoch": 0.3412455110799685, "grad_norm": 0.7810439861845759, "learning_rate": 3.6598540145985407e-06, "loss": 0.6991, "step": 11688 }, { "epoch": 0.34127470730781584, "grad_norm": 0.926348330618402, "learning_rate": 3.6596918085969183e-06, "loss": 0.6835, "step": 11689 }, { "epoch": 0.3413039035356632, "grad_norm": 0.7589687353304189, "learning_rate": 3.6595296025952963e-06, "loss": 0.7657, "step": 11690 }, { "epoch": 0.34133309976351056, "grad_norm": 1.0503916976371477, "learning_rate": 3.6593673965936743e-06, "loss": 0.7157, "step": 11691 }, { "epoch": 0.3413622959913579, "grad_norm": 0.8204231637868099, "learning_rate": 3.6592051905920523e-06, "loss": 0.7748, "step": 11692 }, { "epoch": 0.3413914922192053, "grad_norm": 0.751574049578693, "learning_rate": 3.6590429845904303e-06, "loss": 0.6769, "step": 11693 }, { "epoch": 0.34142068844705264, "grad_norm": 0.7728699186324438, "learning_rate": 3.658880778588808e-06, "loss": 0.7337, "step": 11694 }, { "epoch": 0.3414498846749, "grad_norm": 0.8359281666906365, "learning_rate": 3.658718572587186e-06, "loss": 0.6743, "step": 11695 }, { "epoch": 0.34147908090274737, "grad_norm": 0.7576996384021023, "learning_rate": 3.658556366585564e-06, "loss": 0.668, "step": 11696 }, { "epoch": 0.34150827713059473, "grad_norm": 0.6846009483969818, "learning_rate": 3.658394160583942e-06, "loss": 0.6279, "step": 11697 }, { "epoch": 0.3415374733584421, "grad_norm": 0.7558550579249349, "learning_rate": 3.6582319545823195e-06, "loss": 0.7131, "step": 11698 }, { "epoch": 0.34156666958628945, "grad_norm": 0.7364894028732325, "learning_rate": 3.6580697485806975e-06, "loss": 0.662, "step": 11699 }, { "epoch": 0.3415958658141368, "grad_norm": 0.6975400666038045, "learning_rate": 3.6579075425790756e-06, "loss": 0.5819, "step": 11700 }, { "epoch": 0.3416250620419842, "grad_norm": 0.7379100780450916, "learning_rate": 3.6577453365774536e-06, "loss": 0.7208, "step": 11701 }, { "epoch": 0.34165425826983153, "grad_norm": 0.758673763126793, "learning_rate": 3.6575831305758316e-06, "loss": 0.6928, "step": 11702 }, { "epoch": 0.3416834544976789, "grad_norm": 0.7372970948754456, "learning_rate": 3.657420924574209e-06, "loss": 0.7119, "step": 11703 }, { "epoch": 0.34171265072552626, "grad_norm": 0.747985035853557, "learning_rate": 3.6572587185725876e-06, "loss": 0.7235, "step": 11704 }, { "epoch": 0.3417418469533736, "grad_norm": 0.7228897111316398, "learning_rate": 3.6570965125709656e-06, "loss": 0.6891, "step": 11705 }, { "epoch": 0.341771043181221, "grad_norm": 0.7212816594519035, "learning_rate": 3.6569343065693436e-06, "loss": 0.6195, "step": 11706 }, { "epoch": 0.34180023940906834, "grad_norm": 0.7331996810506863, "learning_rate": 3.6567721005677216e-06, "loss": 0.6377, "step": 11707 }, { "epoch": 0.3418294356369157, "grad_norm": 0.7494954510154584, "learning_rate": 3.656609894566099e-06, "loss": 0.7465, "step": 11708 }, { "epoch": 0.34185863186476306, "grad_norm": 0.7935804574929235, "learning_rate": 3.656447688564477e-06, "loss": 0.7062, "step": 11709 }, { "epoch": 0.3418878280926104, "grad_norm": 0.7542977084488177, "learning_rate": 3.656285482562855e-06, "loss": 0.7402, "step": 11710 }, { "epoch": 0.3419170243204578, "grad_norm": 0.7428647211343956, "learning_rate": 3.656123276561233e-06, "loss": 0.6719, "step": 11711 }, { "epoch": 0.34194622054830515, "grad_norm": 0.754035912984259, "learning_rate": 3.655961070559611e-06, "loss": 0.7324, "step": 11712 }, { "epoch": 0.3419754167761525, "grad_norm": 0.7331870577703993, "learning_rate": 3.655798864557989e-06, "loss": 0.6521, "step": 11713 }, { "epoch": 0.34200461300399987, "grad_norm": 0.7389193708153811, "learning_rate": 3.655636658556367e-06, "loss": 0.6284, "step": 11714 }, { "epoch": 0.34203380923184723, "grad_norm": 0.7443655383424165, "learning_rate": 3.655474452554745e-06, "loss": 0.7032, "step": 11715 }, { "epoch": 0.3420630054596946, "grad_norm": 0.8317998806137491, "learning_rate": 3.655312246553123e-06, "loss": 0.7783, "step": 11716 }, { "epoch": 0.34209220168754195, "grad_norm": 0.7420656172417682, "learning_rate": 3.6551500405515004e-06, "loss": 0.7545, "step": 11717 }, { "epoch": 0.3421213979153893, "grad_norm": 0.7742831147168309, "learning_rate": 3.6549878345498784e-06, "loss": 0.706, "step": 11718 }, { "epoch": 0.3421505941432367, "grad_norm": 0.7565478429023151, "learning_rate": 3.6548256285482564e-06, "loss": 0.6904, "step": 11719 }, { "epoch": 0.34217979037108404, "grad_norm": 0.7179060078356652, "learning_rate": 3.6546634225466344e-06, "loss": 0.6755, "step": 11720 }, { "epoch": 0.3422089865989314, "grad_norm": 0.7794955498645352, "learning_rate": 3.6545012165450124e-06, "loss": 0.7253, "step": 11721 }, { "epoch": 0.34223818282677876, "grad_norm": 0.7458730228616773, "learning_rate": 3.65433901054339e-06, "loss": 0.636, "step": 11722 }, { "epoch": 0.3422673790546261, "grad_norm": 0.7466046208663224, "learning_rate": 3.6541768045417684e-06, "loss": 0.6993, "step": 11723 }, { "epoch": 0.3422965752824735, "grad_norm": 0.7292653215283069, "learning_rate": 3.6540145985401464e-06, "loss": 0.6653, "step": 11724 }, { "epoch": 0.34232577151032084, "grad_norm": 0.7509453045787762, "learning_rate": 3.6538523925385244e-06, "loss": 0.6697, "step": 11725 }, { "epoch": 0.3423549677381682, "grad_norm": 0.704997854798365, "learning_rate": 3.6536901865369025e-06, "loss": 0.6349, "step": 11726 }, { "epoch": 0.34238416396601556, "grad_norm": 0.7372861536029779, "learning_rate": 3.65352798053528e-06, "loss": 0.7, "step": 11727 }, { "epoch": 0.3424133601938629, "grad_norm": 0.7632432439232063, "learning_rate": 3.653365774533658e-06, "loss": 0.6735, "step": 11728 }, { "epoch": 0.34244255642171034, "grad_norm": 0.9470171666833528, "learning_rate": 3.653203568532036e-06, "loss": 0.6577, "step": 11729 }, { "epoch": 0.3424717526495577, "grad_norm": 0.7649475410220908, "learning_rate": 3.653041362530414e-06, "loss": 0.7016, "step": 11730 }, { "epoch": 0.34250094887740506, "grad_norm": 0.7070542110757587, "learning_rate": 3.652879156528792e-06, "loss": 0.6509, "step": 11731 }, { "epoch": 0.3425301451052524, "grad_norm": 0.6728091733137818, "learning_rate": 3.6527169505271697e-06, "loss": 0.5551, "step": 11732 }, { "epoch": 0.3425593413330998, "grad_norm": 0.7106089250978411, "learning_rate": 3.6525547445255477e-06, "loss": 0.6241, "step": 11733 }, { "epoch": 0.34258853756094715, "grad_norm": 0.684208789813526, "learning_rate": 3.6523925385239257e-06, "loss": 0.6088, "step": 11734 }, { "epoch": 0.3426177337887945, "grad_norm": 1.056310669945255, "learning_rate": 3.6522303325223037e-06, "loss": 0.6886, "step": 11735 }, { "epoch": 0.34264693001664187, "grad_norm": 0.7579201945049611, "learning_rate": 3.6520681265206813e-06, "loss": 0.686, "step": 11736 }, { "epoch": 0.34267612624448923, "grad_norm": 0.8384668659069581, "learning_rate": 3.6519059205190593e-06, "loss": 0.7289, "step": 11737 }, { "epoch": 0.3427053224723366, "grad_norm": 0.7041082355783788, "learning_rate": 3.6517437145174373e-06, "loss": 0.6154, "step": 11738 }, { "epoch": 0.34273451870018395, "grad_norm": 0.6829435289220642, "learning_rate": 3.6515815085158153e-06, "loss": 0.5661, "step": 11739 }, { "epoch": 0.3427637149280313, "grad_norm": 0.7060838022591093, "learning_rate": 3.6514193025141933e-06, "loss": 0.6616, "step": 11740 }, { "epoch": 0.3427929111558787, "grad_norm": 0.7242492502115816, "learning_rate": 3.651257096512571e-06, "loss": 0.6665, "step": 11741 }, { "epoch": 0.34282210738372604, "grad_norm": 0.7821895024083141, "learning_rate": 3.6510948905109493e-06, "loss": 0.7508, "step": 11742 }, { "epoch": 0.3428513036115734, "grad_norm": 0.7020037807420235, "learning_rate": 3.6509326845093273e-06, "loss": 0.6207, "step": 11743 }, { "epoch": 0.34288049983942076, "grad_norm": 0.740377325705414, "learning_rate": 3.6507704785077053e-06, "loss": 0.6806, "step": 11744 }, { "epoch": 0.3429096960672681, "grad_norm": 0.7236063523192015, "learning_rate": 3.6506082725060833e-06, "loss": 0.6406, "step": 11745 }, { "epoch": 0.3429388922951155, "grad_norm": 0.7912223319929764, "learning_rate": 3.650446066504461e-06, "loss": 0.6681, "step": 11746 }, { "epoch": 0.34296808852296284, "grad_norm": 0.742243864176898, "learning_rate": 3.650283860502839e-06, "loss": 0.6565, "step": 11747 }, { "epoch": 0.3429972847508102, "grad_norm": 0.7665028276142747, "learning_rate": 3.650121654501217e-06, "loss": 0.6614, "step": 11748 }, { "epoch": 0.34302648097865757, "grad_norm": 0.7650555769163536, "learning_rate": 3.649959448499595e-06, "loss": 0.6694, "step": 11749 }, { "epoch": 0.3430556772065049, "grad_norm": 0.763529437427517, "learning_rate": 3.649797242497973e-06, "loss": 0.6966, "step": 11750 }, { "epoch": 0.3430848734343523, "grad_norm": 0.7271151241326355, "learning_rate": 3.6496350364963505e-06, "loss": 0.6546, "step": 11751 }, { "epoch": 0.34311406966219965, "grad_norm": 0.7269333676038587, "learning_rate": 3.6494728304947285e-06, "loss": 0.6135, "step": 11752 }, { "epoch": 0.343143265890047, "grad_norm": 0.7997384018843494, "learning_rate": 3.6493106244931065e-06, "loss": 0.7919, "step": 11753 }, { "epoch": 0.3431724621178944, "grad_norm": 0.7365342429802704, "learning_rate": 3.6491484184914845e-06, "loss": 0.626, "step": 11754 }, { "epoch": 0.34320165834574173, "grad_norm": 1.9897782557339074, "learning_rate": 3.648986212489862e-06, "loss": 0.7471, "step": 11755 }, { "epoch": 0.3432308545735891, "grad_norm": 0.8297281808736309, "learning_rate": 3.64882400648824e-06, "loss": 0.8091, "step": 11756 }, { "epoch": 0.34326005080143646, "grad_norm": 0.7157167349399381, "learning_rate": 3.648661800486618e-06, "loss": 0.6695, "step": 11757 }, { "epoch": 0.3432892470292838, "grad_norm": 0.7298354083717259, "learning_rate": 3.648499594484996e-06, "loss": 0.6784, "step": 11758 }, { "epoch": 0.3433184432571312, "grad_norm": 0.7994070606462984, "learning_rate": 3.648337388483374e-06, "loss": 0.7183, "step": 11759 }, { "epoch": 0.34334763948497854, "grad_norm": 0.7447151232578053, "learning_rate": 3.6481751824817526e-06, "loss": 0.6905, "step": 11760 }, { "epoch": 0.3433768357128259, "grad_norm": 0.7551769028716935, "learning_rate": 3.64801297648013e-06, "loss": 0.6823, "step": 11761 }, { "epoch": 0.34340603194067326, "grad_norm": 0.774758834232227, "learning_rate": 3.647850770478508e-06, "loss": 0.7132, "step": 11762 }, { "epoch": 0.3434352281685206, "grad_norm": 0.7620004806417523, "learning_rate": 3.647688564476886e-06, "loss": 0.7101, "step": 11763 }, { "epoch": 0.343464424396368, "grad_norm": 0.7969265009653216, "learning_rate": 3.647526358475264e-06, "loss": 0.5544, "step": 11764 }, { "epoch": 0.34349362062421535, "grad_norm": 0.7894288431624464, "learning_rate": 3.6473641524736418e-06, "loss": 0.7273, "step": 11765 }, { "epoch": 0.3435228168520627, "grad_norm": 0.7132321575325109, "learning_rate": 3.6472019464720198e-06, "loss": 0.6691, "step": 11766 }, { "epoch": 0.34355201307991007, "grad_norm": 0.7583364089885245, "learning_rate": 3.6470397404703978e-06, "loss": 0.6917, "step": 11767 }, { "epoch": 0.34358120930775743, "grad_norm": 0.7415382720377843, "learning_rate": 3.6468775344687758e-06, "loss": 0.6835, "step": 11768 }, { "epoch": 0.3436104055356048, "grad_norm": 0.7160385779800891, "learning_rate": 3.6467153284671538e-06, "loss": 0.6327, "step": 11769 }, { "epoch": 0.34363960176345215, "grad_norm": 0.7181689035485616, "learning_rate": 3.6465531224655314e-06, "loss": 0.6344, "step": 11770 }, { "epoch": 0.3436687979912995, "grad_norm": 0.742263924144143, "learning_rate": 3.6463909164639094e-06, "loss": 0.6752, "step": 11771 }, { "epoch": 0.3436979942191469, "grad_norm": 0.6853260726076237, "learning_rate": 3.6462287104622874e-06, "loss": 0.6095, "step": 11772 }, { "epoch": 0.34372719044699424, "grad_norm": 0.7273955424708674, "learning_rate": 3.6460665044606654e-06, "loss": 0.6353, "step": 11773 }, { "epoch": 0.3437563866748416, "grad_norm": 0.7245003341018947, "learning_rate": 3.645904298459043e-06, "loss": 0.6632, "step": 11774 }, { "epoch": 0.34378558290268896, "grad_norm": 0.7350803806801158, "learning_rate": 3.645742092457421e-06, "loss": 0.7381, "step": 11775 }, { "epoch": 0.3438147791305363, "grad_norm": 0.7060879670270265, "learning_rate": 3.645579886455799e-06, "loss": 0.5971, "step": 11776 }, { "epoch": 0.3438439753583837, "grad_norm": 0.7491055702858472, "learning_rate": 3.645417680454177e-06, "loss": 0.7001, "step": 11777 }, { "epoch": 0.34387317158623104, "grad_norm": 0.7302061445985482, "learning_rate": 3.645255474452555e-06, "loss": 0.5875, "step": 11778 }, { "epoch": 0.3439023678140784, "grad_norm": 0.7329504556954836, "learning_rate": 3.6450932684509334e-06, "loss": 0.6015, "step": 11779 }, { "epoch": 0.34393156404192576, "grad_norm": 0.7415206537965049, "learning_rate": 3.644931062449311e-06, "loss": 0.6672, "step": 11780 }, { "epoch": 0.3439607602697731, "grad_norm": 0.7548084353125388, "learning_rate": 3.644768856447689e-06, "loss": 0.6714, "step": 11781 }, { "epoch": 0.3439899564976205, "grad_norm": 0.756656405884115, "learning_rate": 3.644606650446067e-06, "loss": 0.6375, "step": 11782 }, { "epoch": 0.34401915272546785, "grad_norm": 0.7273167270186929, "learning_rate": 3.644444444444445e-06, "loss": 0.661, "step": 11783 }, { "epoch": 0.3440483489533152, "grad_norm": 0.7214653482993949, "learning_rate": 3.6442822384428226e-06, "loss": 0.6437, "step": 11784 }, { "epoch": 0.34407754518116257, "grad_norm": 0.7961319885000057, "learning_rate": 3.6441200324412006e-06, "loss": 0.7162, "step": 11785 }, { "epoch": 0.34410674140900993, "grad_norm": 0.7905434525877822, "learning_rate": 3.6439578264395786e-06, "loss": 0.7092, "step": 11786 }, { "epoch": 0.3441359376368573, "grad_norm": 0.7934000126933899, "learning_rate": 3.6437956204379566e-06, "loss": 0.6999, "step": 11787 }, { "epoch": 0.34416513386470465, "grad_norm": 0.7472061793875545, "learning_rate": 3.6436334144363346e-06, "loss": 0.6908, "step": 11788 }, { "epoch": 0.34419433009255207, "grad_norm": 0.6465518218068219, "learning_rate": 3.6434712084347122e-06, "loss": 0.5901, "step": 11789 }, { "epoch": 0.34422352632039943, "grad_norm": 0.7403266990591569, "learning_rate": 3.6433090024330902e-06, "loss": 0.6274, "step": 11790 }, { "epoch": 0.3442527225482468, "grad_norm": 0.7801761684002275, "learning_rate": 3.6431467964314682e-06, "loss": 0.6994, "step": 11791 }, { "epoch": 0.34428191877609415, "grad_norm": 0.7227593712752087, "learning_rate": 3.6429845904298462e-06, "loss": 0.6376, "step": 11792 }, { "epoch": 0.3443111150039415, "grad_norm": 0.7166046129330853, "learning_rate": 3.642822384428224e-06, "loss": 0.6014, "step": 11793 }, { "epoch": 0.3443403112317889, "grad_norm": 0.7044945816119281, "learning_rate": 3.642660178426602e-06, "loss": 0.6413, "step": 11794 }, { "epoch": 0.34436950745963624, "grad_norm": 0.7872554754035603, "learning_rate": 3.64249797242498e-06, "loss": 0.6309, "step": 11795 }, { "epoch": 0.3443987036874836, "grad_norm": 0.7090117931502243, "learning_rate": 3.642335766423358e-06, "loss": 0.6458, "step": 11796 }, { "epoch": 0.34442789991533096, "grad_norm": 0.7730077497944443, "learning_rate": 3.6421735604217354e-06, "loss": 0.7024, "step": 11797 }, { "epoch": 0.3444570961431783, "grad_norm": 0.6990036173455465, "learning_rate": 3.6420113544201143e-06, "loss": 0.6002, "step": 11798 }, { "epoch": 0.3444862923710257, "grad_norm": 0.7087368361022205, "learning_rate": 3.641849148418492e-06, "loss": 0.6121, "step": 11799 }, { "epoch": 0.34451548859887304, "grad_norm": 0.7609815927438377, "learning_rate": 3.64168694241687e-06, "loss": 0.7094, "step": 11800 }, { "epoch": 0.3445446848267204, "grad_norm": 0.8769886909679088, "learning_rate": 3.641524736415248e-06, "loss": 0.6953, "step": 11801 }, { "epoch": 0.34457388105456777, "grad_norm": 0.8360672174432352, "learning_rate": 3.641362530413626e-06, "loss": 0.7787, "step": 11802 }, { "epoch": 0.3446030772824151, "grad_norm": 0.8981036388302955, "learning_rate": 3.6412003244120035e-06, "loss": 0.8781, "step": 11803 }, { "epoch": 0.3446322735102625, "grad_norm": 0.7398108002540716, "learning_rate": 3.6410381184103815e-06, "loss": 0.6621, "step": 11804 }, { "epoch": 0.34466146973810985, "grad_norm": 0.7793047440254415, "learning_rate": 3.6408759124087595e-06, "loss": 0.6967, "step": 11805 }, { "epoch": 0.3446906659659572, "grad_norm": 0.7614777697668149, "learning_rate": 3.6407137064071375e-06, "loss": 0.6938, "step": 11806 }, { "epoch": 0.34471986219380457, "grad_norm": 0.7757866868044382, "learning_rate": 3.6405515004055155e-06, "loss": 0.7121, "step": 11807 }, { "epoch": 0.34474905842165193, "grad_norm": 0.7808505190284647, "learning_rate": 3.640389294403893e-06, "loss": 0.7175, "step": 11808 }, { "epoch": 0.3447782546494993, "grad_norm": 0.6578936068877108, "learning_rate": 3.640227088402271e-06, "loss": 0.56, "step": 11809 }, { "epoch": 0.34480745087734666, "grad_norm": 0.7387916080651887, "learning_rate": 3.640064882400649e-06, "loss": 0.6195, "step": 11810 }, { "epoch": 0.344836647105194, "grad_norm": 0.6738232496768783, "learning_rate": 3.639902676399027e-06, "loss": 0.5575, "step": 11811 }, { "epoch": 0.3448658433330414, "grad_norm": 0.8184986767967274, "learning_rate": 3.6397404703974047e-06, "loss": 0.8482, "step": 11812 }, { "epoch": 0.34489503956088874, "grad_norm": 0.7537114291059365, "learning_rate": 3.6395782643957827e-06, "loss": 0.6866, "step": 11813 }, { "epoch": 0.3449242357887361, "grad_norm": 0.7052542435298729, "learning_rate": 3.6394160583941607e-06, "loss": 0.6189, "step": 11814 }, { "epoch": 0.34495343201658346, "grad_norm": 0.7706921971799094, "learning_rate": 3.6392538523925387e-06, "loss": 0.6913, "step": 11815 }, { "epoch": 0.3449826282444308, "grad_norm": 0.7256676941287001, "learning_rate": 3.6390916463909163e-06, "loss": 0.6579, "step": 11816 }, { "epoch": 0.3450118244722782, "grad_norm": 0.7065333837914082, "learning_rate": 3.638929440389295e-06, "loss": 0.6, "step": 11817 }, { "epoch": 0.34504102070012554, "grad_norm": 0.7334318629142039, "learning_rate": 3.6387672343876727e-06, "loss": 0.6588, "step": 11818 }, { "epoch": 0.3450702169279729, "grad_norm": 0.7952465953267075, "learning_rate": 3.6386050283860507e-06, "loss": 0.6508, "step": 11819 }, { "epoch": 0.34509941315582027, "grad_norm": 0.6668794607616042, "learning_rate": 3.6384428223844287e-06, "loss": 0.5921, "step": 11820 }, { "epoch": 0.34512860938366763, "grad_norm": 0.7410625196197205, "learning_rate": 3.6382806163828067e-06, "loss": 0.6565, "step": 11821 }, { "epoch": 0.345157805611515, "grad_norm": 0.7002224294798638, "learning_rate": 3.6381184103811843e-06, "loss": 0.6215, "step": 11822 }, { "epoch": 0.34518700183936235, "grad_norm": 0.7461439343037837, "learning_rate": 3.6379562043795623e-06, "loss": 0.6351, "step": 11823 }, { "epoch": 0.3452161980672097, "grad_norm": 0.7996206832402921, "learning_rate": 3.6377939983779403e-06, "loss": 0.7522, "step": 11824 }, { "epoch": 0.3452453942950571, "grad_norm": 0.7749209124536753, "learning_rate": 3.6376317923763184e-06, "loss": 0.6464, "step": 11825 }, { "epoch": 0.34527459052290443, "grad_norm": 0.7197120883237752, "learning_rate": 3.6374695863746964e-06, "loss": 0.6658, "step": 11826 }, { "epoch": 0.3453037867507518, "grad_norm": 0.7525519406309262, "learning_rate": 3.637307380373074e-06, "loss": 0.6954, "step": 11827 }, { "epoch": 0.34533298297859916, "grad_norm": 0.8563412494894054, "learning_rate": 3.637145174371452e-06, "loss": 0.7541, "step": 11828 }, { "epoch": 0.3453621792064465, "grad_norm": 0.7589870371872516, "learning_rate": 3.63698296836983e-06, "loss": 0.7255, "step": 11829 }, { "epoch": 0.3453913754342939, "grad_norm": 0.7362816021383788, "learning_rate": 3.636820762368208e-06, "loss": 0.6809, "step": 11830 }, { "epoch": 0.34542057166214124, "grad_norm": 0.7433441133036404, "learning_rate": 3.6366585563665855e-06, "loss": 0.6599, "step": 11831 }, { "epoch": 0.3454497678899886, "grad_norm": 0.7959462539985797, "learning_rate": 3.6364963503649636e-06, "loss": 0.7924, "step": 11832 }, { "epoch": 0.34547896411783596, "grad_norm": 0.8016632012856052, "learning_rate": 3.6363341443633416e-06, "loss": 0.6393, "step": 11833 }, { "epoch": 0.3455081603456833, "grad_norm": 0.7062834247630431, "learning_rate": 3.6361719383617196e-06, "loss": 0.6304, "step": 11834 }, { "epoch": 0.3455373565735307, "grad_norm": 0.7446939113585386, "learning_rate": 3.636009732360097e-06, "loss": 0.6328, "step": 11835 }, { "epoch": 0.34556655280137805, "grad_norm": 0.7399208194387585, "learning_rate": 3.635847526358476e-06, "loss": 0.6561, "step": 11836 }, { "epoch": 0.3455957490292254, "grad_norm": 0.7825881272193411, "learning_rate": 3.6356853203568536e-06, "loss": 0.6595, "step": 11837 }, { "epoch": 0.34562494525707277, "grad_norm": 0.7171335539587228, "learning_rate": 3.6355231143552316e-06, "loss": 0.6585, "step": 11838 }, { "epoch": 0.34565414148492013, "grad_norm": 0.7310186910576625, "learning_rate": 3.6353609083536096e-06, "loss": 0.6687, "step": 11839 }, { "epoch": 0.3456833377127675, "grad_norm": 0.904786140022238, "learning_rate": 3.6351987023519876e-06, "loss": 0.7219, "step": 11840 }, { "epoch": 0.34571253394061485, "grad_norm": 0.6613132141239739, "learning_rate": 3.635036496350365e-06, "loss": 0.5805, "step": 11841 }, { "epoch": 0.3457417301684622, "grad_norm": 0.7211502999542727, "learning_rate": 3.634874290348743e-06, "loss": 0.6496, "step": 11842 }, { "epoch": 0.3457709263963096, "grad_norm": 0.7855871086456475, "learning_rate": 3.634712084347121e-06, "loss": 0.6756, "step": 11843 }, { "epoch": 0.34580012262415694, "grad_norm": 0.8227660528208437, "learning_rate": 3.6345498783454992e-06, "loss": 0.7592, "step": 11844 }, { "epoch": 0.3458293188520043, "grad_norm": 0.7716951613866132, "learning_rate": 3.6343876723438772e-06, "loss": 0.7205, "step": 11845 }, { "epoch": 0.34585851507985166, "grad_norm": 0.7665658483946195, "learning_rate": 3.634225466342255e-06, "loss": 0.731, "step": 11846 }, { "epoch": 0.345887711307699, "grad_norm": 0.783889939827885, "learning_rate": 3.634063260340633e-06, "loss": 0.5887, "step": 11847 }, { "epoch": 0.3459169075355464, "grad_norm": 0.8138911802687635, "learning_rate": 3.633901054339011e-06, "loss": 0.7383, "step": 11848 }, { "epoch": 0.34594610376339374, "grad_norm": 0.7967074067855743, "learning_rate": 3.633738848337389e-06, "loss": 0.7785, "step": 11849 }, { "epoch": 0.34597529999124116, "grad_norm": 0.7356856975602399, "learning_rate": 3.6335766423357664e-06, "loss": 0.6525, "step": 11850 }, { "epoch": 0.3460044962190885, "grad_norm": 0.7168150875511826, "learning_rate": 3.6334144363341444e-06, "loss": 0.5754, "step": 11851 }, { "epoch": 0.3460336924469359, "grad_norm": 0.6934359750917726, "learning_rate": 3.6332522303325224e-06, "loss": 0.5926, "step": 11852 }, { "epoch": 0.34606288867478324, "grad_norm": 0.8194793078520127, "learning_rate": 3.6330900243309004e-06, "loss": 0.6573, "step": 11853 }, { "epoch": 0.3460920849026306, "grad_norm": 0.7103200677607947, "learning_rate": 3.632927818329278e-06, "loss": 0.6502, "step": 11854 }, { "epoch": 0.34612128113047796, "grad_norm": 0.8570651722146989, "learning_rate": 3.632765612327657e-06, "loss": 0.6918, "step": 11855 }, { "epoch": 0.3461504773583253, "grad_norm": 0.6969336994825177, "learning_rate": 3.6326034063260344e-06, "loss": 0.6161, "step": 11856 }, { "epoch": 0.3461796735861727, "grad_norm": 0.7525752895057485, "learning_rate": 3.6324412003244125e-06, "loss": 0.676, "step": 11857 }, { "epoch": 0.34620886981402005, "grad_norm": 0.7059903404878853, "learning_rate": 3.6322789943227905e-06, "loss": 0.6192, "step": 11858 }, { "epoch": 0.3462380660418674, "grad_norm": 0.6838180779788282, "learning_rate": 3.6321167883211685e-06, "loss": 0.6066, "step": 11859 }, { "epoch": 0.34626726226971477, "grad_norm": 0.7006487518054927, "learning_rate": 3.631954582319546e-06, "loss": 0.6255, "step": 11860 }, { "epoch": 0.34629645849756213, "grad_norm": 0.6803054699502262, "learning_rate": 3.631792376317924e-06, "loss": 0.5962, "step": 11861 }, { "epoch": 0.3463256547254095, "grad_norm": 0.7203917649561279, "learning_rate": 3.631630170316302e-06, "loss": 0.61, "step": 11862 }, { "epoch": 0.34635485095325685, "grad_norm": 0.7412088065611936, "learning_rate": 3.63146796431468e-06, "loss": 0.6039, "step": 11863 }, { "epoch": 0.3463840471811042, "grad_norm": 0.7820029175063, "learning_rate": 3.631305758313058e-06, "loss": 0.6947, "step": 11864 }, { "epoch": 0.3464132434089516, "grad_norm": 0.7313899774220729, "learning_rate": 3.6311435523114357e-06, "loss": 0.673, "step": 11865 }, { "epoch": 0.34644243963679894, "grad_norm": 0.7781367578755559, "learning_rate": 3.6309813463098137e-06, "loss": 0.6355, "step": 11866 }, { "epoch": 0.3464716358646463, "grad_norm": 0.8077483380099445, "learning_rate": 3.6308191403081917e-06, "loss": 0.6581, "step": 11867 }, { "epoch": 0.34650083209249366, "grad_norm": 0.7782003945024487, "learning_rate": 3.6306569343065697e-06, "loss": 0.6803, "step": 11868 }, { "epoch": 0.346530028320341, "grad_norm": 0.7141622378040059, "learning_rate": 3.6304947283049473e-06, "loss": 0.6356, "step": 11869 }, { "epoch": 0.3465592245481884, "grad_norm": 0.7434318608410484, "learning_rate": 3.6303325223033253e-06, "loss": 0.6905, "step": 11870 }, { "epoch": 0.34658842077603574, "grad_norm": 0.7165346283900188, "learning_rate": 3.6301703163017033e-06, "loss": 0.622, "step": 11871 }, { "epoch": 0.3466176170038831, "grad_norm": 0.7165742217553721, "learning_rate": 3.6300081103000813e-06, "loss": 0.6047, "step": 11872 }, { "epoch": 0.34664681323173047, "grad_norm": 0.7053571264316166, "learning_rate": 3.629845904298459e-06, "loss": 0.6402, "step": 11873 }, { "epoch": 0.3466760094595778, "grad_norm": 0.6977850571195269, "learning_rate": 3.6296836982968377e-06, "loss": 0.6011, "step": 11874 }, { "epoch": 0.3467052056874252, "grad_norm": 0.7659772981658919, "learning_rate": 3.6295214922952153e-06, "loss": 0.6416, "step": 11875 }, { "epoch": 0.34673440191527255, "grad_norm": 0.6874612512560556, "learning_rate": 3.6293592862935933e-06, "loss": 0.563, "step": 11876 }, { "epoch": 0.3467635981431199, "grad_norm": 0.6872947877830033, "learning_rate": 3.6291970802919713e-06, "loss": 0.615, "step": 11877 }, { "epoch": 0.34679279437096727, "grad_norm": 0.784099392508901, "learning_rate": 3.6290348742903493e-06, "loss": 0.7795, "step": 11878 }, { "epoch": 0.34682199059881463, "grad_norm": 0.7659430190067823, "learning_rate": 3.628872668288727e-06, "loss": 0.6999, "step": 11879 }, { "epoch": 0.346851186826662, "grad_norm": 0.7427392236149721, "learning_rate": 3.628710462287105e-06, "loss": 0.6795, "step": 11880 }, { "epoch": 0.34688038305450936, "grad_norm": 0.7809676016022324, "learning_rate": 3.628548256285483e-06, "loss": 0.7688, "step": 11881 }, { "epoch": 0.3469095792823567, "grad_norm": 0.8429352062429109, "learning_rate": 3.628386050283861e-06, "loss": 0.6715, "step": 11882 }, { "epoch": 0.3469387755102041, "grad_norm": 0.6869492319505469, "learning_rate": 3.628223844282239e-06, "loss": 0.6056, "step": 11883 }, { "epoch": 0.34696797173805144, "grad_norm": 0.7644485196153485, "learning_rate": 3.6280616382806165e-06, "loss": 0.7569, "step": 11884 }, { "epoch": 0.3469971679658988, "grad_norm": 0.7291912730662338, "learning_rate": 3.6278994322789945e-06, "loss": 0.6086, "step": 11885 }, { "epoch": 0.34702636419374616, "grad_norm": 0.6938724914378362, "learning_rate": 3.6277372262773725e-06, "loss": 0.5999, "step": 11886 }, { "epoch": 0.3470555604215935, "grad_norm": 0.7904320303098822, "learning_rate": 3.6275750202757505e-06, "loss": 0.6885, "step": 11887 }, { "epoch": 0.3470847566494409, "grad_norm": 0.7134685083895597, "learning_rate": 3.627412814274128e-06, "loss": 0.6149, "step": 11888 }, { "epoch": 0.34711395287728825, "grad_norm": 0.7053414749562876, "learning_rate": 3.627250608272506e-06, "loss": 0.6548, "step": 11889 }, { "epoch": 0.3471431491051356, "grad_norm": 0.7465036253135987, "learning_rate": 3.627088402270884e-06, "loss": 0.6429, "step": 11890 }, { "epoch": 0.34717234533298297, "grad_norm": 0.7794347784815449, "learning_rate": 3.626926196269262e-06, "loss": 0.6942, "step": 11891 }, { "epoch": 0.34720154156083033, "grad_norm": 0.7237154114901049, "learning_rate": 3.6267639902676397e-06, "loss": 0.6588, "step": 11892 }, { "epoch": 0.3472307377886777, "grad_norm": 0.891498959563869, "learning_rate": 3.6266017842660186e-06, "loss": 0.763, "step": 11893 }, { "epoch": 0.34725993401652505, "grad_norm": 0.6828318179289974, "learning_rate": 3.626439578264396e-06, "loss": 0.5682, "step": 11894 }, { "epoch": 0.3472891302443724, "grad_norm": 0.7202535851464429, "learning_rate": 3.626277372262774e-06, "loss": 0.686, "step": 11895 }, { "epoch": 0.3473183264722198, "grad_norm": 0.7469625373333054, "learning_rate": 3.626115166261152e-06, "loss": 0.6421, "step": 11896 }, { "epoch": 0.34734752270006714, "grad_norm": 0.7488397964920583, "learning_rate": 3.62595296025953e-06, "loss": 0.7103, "step": 11897 }, { "epoch": 0.3473767189279145, "grad_norm": 0.6571303072831212, "learning_rate": 3.6257907542579078e-06, "loss": 0.5258, "step": 11898 }, { "epoch": 0.34740591515576186, "grad_norm": 0.7153668321123513, "learning_rate": 3.6256285482562858e-06, "loss": 0.6443, "step": 11899 }, { "epoch": 0.3474351113836092, "grad_norm": 0.7788055943281263, "learning_rate": 3.6254663422546638e-06, "loss": 0.7147, "step": 11900 }, { "epoch": 0.3474643076114566, "grad_norm": 0.7637102228157383, "learning_rate": 3.6253041362530418e-06, "loss": 0.6577, "step": 11901 }, { "epoch": 0.34749350383930394, "grad_norm": 0.7304679533593477, "learning_rate": 3.62514193025142e-06, "loss": 0.7124, "step": 11902 }, { "epoch": 0.3475227000671513, "grad_norm": 0.62928642766941, "learning_rate": 3.6249797242497974e-06, "loss": 0.4956, "step": 11903 }, { "epoch": 0.34755189629499866, "grad_norm": 0.784033305063106, "learning_rate": 3.6248175182481754e-06, "loss": 0.6836, "step": 11904 }, { "epoch": 0.347581092522846, "grad_norm": 0.7339493271108241, "learning_rate": 3.6246553122465534e-06, "loss": 0.6897, "step": 11905 }, { "epoch": 0.3476102887506934, "grad_norm": 0.7958390108599969, "learning_rate": 3.6244931062449314e-06, "loss": 0.7445, "step": 11906 }, { "epoch": 0.34763948497854075, "grad_norm": 0.7859110541298197, "learning_rate": 3.624330900243309e-06, "loss": 0.6899, "step": 11907 }, { "epoch": 0.3476686812063881, "grad_norm": 0.7210314070282167, "learning_rate": 3.624168694241687e-06, "loss": 0.6009, "step": 11908 }, { "epoch": 0.34769787743423547, "grad_norm": 0.6773655559578079, "learning_rate": 3.624006488240065e-06, "loss": 0.5961, "step": 11909 }, { "epoch": 0.3477270736620829, "grad_norm": 0.7070252421279328, "learning_rate": 3.623844282238443e-06, "loss": 0.6291, "step": 11910 }, { "epoch": 0.34775626988993025, "grad_norm": 0.7205662502632076, "learning_rate": 3.6236820762368214e-06, "loss": 0.6407, "step": 11911 }, { "epoch": 0.3477854661177776, "grad_norm": 0.819769848962194, "learning_rate": 3.6235198702351994e-06, "loss": 0.6985, "step": 11912 }, { "epoch": 0.34781466234562497, "grad_norm": 0.7525990751675496, "learning_rate": 3.623357664233577e-06, "loss": 0.6123, "step": 11913 }, { "epoch": 0.34784385857347233, "grad_norm": 0.7486282907173476, "learning_rate": 3.623195458231955e-06, "loss": 0.6635, "step": 11914 }, { "epoch": 0.3478730548013197, "grad_norm": 0.7442244905627448, "learning_rate": 3.623033252230333e-06, "loss": 0.6562, "step": 11915 }, { "epoch": 0.34790225102916705, "grad_norm": 0.7654808049681053, "learning_rate": 3.622871046228711e-06, "loss": 0.6633, "step": 11916 }, { "epoch": 0.3479314472570144, "grad_norm": 0.7591318015786707, "learning_rate": 3.6227088402270886e-06, "loss": 0.6443, "step": 11917 }, { "epoch": 0.3479606434848618, "grad_norm": 0.7693132355946963, "learning_rate": 3.6225466342254666e-06, "loss": 0.6648, "step": 11918 }, { "epoch": 0.34798983971270914, "grad_norm": 0.6840886643760204, "learning_rate": 3.6223844282238446e-06, "loss": 0.6066, "step": 11919 }, { "epoch": 0.3480190359405565, "grad_norm": 0.7058154031643235, "learning_rate": 3.6222222222222226e-06, "loss": 0.5956, "step": 11920 }, { "epoch": 0.34804823216840386, "grad_norm": 0.7329243509264043, "learning_rate": 3.6220600162206002e-06, "loss": 0.6754, "step": 11921 }, { "epoch": 0.3480774283962512, "grad_norm": 0.7177044625011021, "learning_rate": 3.6218978102189782e-06, "loss": 0.6827, "step": 11922 }, { "epoch": 0.3481066246240986, "grad_norm": 0.7072714938662937, "learning_rate": 3.6217356042173562e-06, "loss": 0.6036, "step": 11923 }, { "epoch": 0.34813582085194594, "grad_norm": 0.7568050990311614, "learning_rate": 3.6215733982157343e-06, "loss": 0.6875, "step": 11924 }, { "epoch": 0.3481650170797933, "grad_norm": 0.727170979136605, "learning_rate": 3.6214111922141123e-06, "loss": 0.6447, "step": 11925 }, { "epoch": 0.34819421330764067, "grad_norm": 0.703293504059694, "learning_rate": 3.62124898621249e-06, "loss": 0.5972, "step": 11926 }, { "epoch": 0.348223409535488, "grad_norm": 0.7412482284606005, "learning_rate": 3.621086780210868e-06, "loss": 0.6729, "step": 11927 }, { "epoch": 0.3482526057633354, "grad_norm": 0.70007093175668, "learning_rate": 3.620924574209246e-06, "loss": 0.63, "step": 11928 }, { "epoch": 0.34828180199118275, "grad_norm": 0.6949909372096433, "learning_rate": 3.620762368207624e-06, "loss": 0.5995, "step": 11929 }, { "epoch": 0.3483109982190301, "grad_norm": 0.8081385077617612, "learning_rate": 3.6206001622060023e-06, "loss": 0.6529, "step": 11930 }, { "epoch": 0.34834019444687747, "grad_norm": 0.7475891798547146, "learning_rate": 3.6204379562043803e-06, "loss": 0.7123, "step": 11931 }, { "epoch": 0.34836939067472483, "grad_norm": 0.7211730451926478, "learning_rate": 3.620275750202758e-06, "loss": 0.6437, "step": 11932 }, { "epoch": 0.3483985869025722, "grad_norm": 0.8220003551871399, "learning_rate": 3.620113544201136e-06, "loss": 0.7049, "step": 11933 }, { "epoch": 0.34842778313041955, "grad_norm": 0.9779842549305566, "learning_rate": 3.619951338199514e-06, "loss": 0.6572, "step": 11934 }, { "epoch": 0.3484569793582669, "grad_norm": 0.7372153553088583, "learning_rate": 3.619789132197892e-06, "loss": 0.716, "step": 11935 }, { "epoch": 0.3484861755861143, "grad_norm": 0.7662161914284528, "learning_rate": 3.6196269261962695e-06, "loss": 0.7314, "step": 11936 }, { "epoch": 0.34851537181396164, "grad_norm": 0.7391581001127352, "learning_rate": 3.6194647201946475e-06, "loss": 0.6066, "step": 11937 }, { "epoch": 0.348544568041809, "grad_norm": 0.6832447889163923, "learning_rate": 3.6193025141930255e-06, "loss": 0.5729, "step": 11938 }, { "epoch": 0.34857376426965636, "grad_norm": 0.7173535839074836, "learning_rate": 3.6191403081914035e-06, "loss": 0.6255, "step": 11939 }, { "epoch": 0.3486029604975037, "grad_norm": 0.6548275691651123, "learning_rate": 3.618978102189781e-06, "loss": 0.5537, "step": 11940 }, { "epoch": 0.3486321567253511, "grad_norm": 0.7391462351052808, "learning_rate": 3.618815896188159e-06, "loss": 0.5868, "step": 11941 }, { "epoch": 0.34866135295319844, "grad_norm": 0.7619499201683765, "learning_rate": 3.618653690186537e-06, "loss": 0.741, "step": 11942 }, { "epoch": 0.3486905491810458, "grad_norm": 0.7548371569502499, "learning_rate": 3.618491484184915e-06, "loss": 0.6575, "step": 11943 }, { "epoch": 0.34871974540889317, "grad_norm": 0.7438577894140287, "learning_rate": 3.618329278183293e-06, "loss": 0.7195, "step": 11944 }, { "epoch": 0.34874894163674053, "grad_norm": 0.7027412809435772, "learning_rate": 3.6181670721816707e-06, "loss": 0.6575, "step": 11945 }, { "epoch": 0.3487781378645879, "grad_norm": 0.7443685837629644, "learning_rate": 3.6180048661800487e-06, "loss": 0.6978, "step": 11946 }, { "epoch": 0.34880733409243525, "grad_norm": 0.70295705197438, "learning_rate": 3.6178426601784267e-06, "loss": 0.6022, "step": 11947 }, { "epoch": 0.3488365303202826, "grad_norm": 0.7419111946076843, "learning_rate": 3.6176804541768047e-06, "loss": 0.6809, "step": 11948 }, { "epoch": 0.34886572654813, "grad_norm": 0.7349921436752409, "learning_rate": 3.617518248175183e-06, "loss": 0.6428, "step": 11949 }, { "epoch": 0.34889492277597733, "grad_norm": 0.6821921316794379, "learning_rate": 3.617356042173561e-06, "loss": 0.5557, "step": 11950 }, { "epoch": 0.3489241190038247, "grad_norm": 0.7444298316706888, "learning_rate": 3.6171938361719387e-06, "loss": 0.6807, "step": 11951 }, { "epoch": 0.34895331523167206, "grad_norm": 0.7471774944094207, "learning_rate": 3.6170316301703167e-06, "loss": 0.6686, "step": 11952 }, { "epoch": 0.3489825114595194, "grad_norm": 0.7264063702611377, "learning_rate": 3.6168694241686948e-06, "loss": 0.661, "step": 11953 }, { "epoch": 0.3490117076873668, "grad_norm": 0.7527265070490468, "learning_rate": 3.6167072181670728e-06, "loss": 0.6776, "step": 11954 }, { "epoch": 0.34904090391521414, "grad_norm": 0.7402490711247258, "learning_rate": 3.6165450121654503e-06, "loss": 0.6648, "step": 11955 }, { "epoch": 0.3490701001430615, "grad_norm": 0.7163953238744611, "learning_rate": 3.6163828061638284e-06, "loss": 0.5996, "step": 11956 }, { "epoch": 0.34909929637090886, "grad_norm": 0.776723804701241, "learning_rate": 3.6162206001622064e-06, "loss": 0.7046, "step": 11957 }, { "epoch": 0.3491284925987562, "grad_norm": 0.7458251475500567, "learning_rate": 3.6160583941605844e-06, "loss": 0.6308, "step": 11958 }, { "epoch": 0.3491576888266036, "grad_norm": 0.8043249689498445, "learning_rate": 3.615896188158962e-06, "loss": 0.7866, "step": 11959 }, { "epoch": 0.34918688505445095, "grad_norm": 0.8003867223856338, "learning_rate": 3.61573398215734e-06, "loss": 0.6672, "step": 11960 }, { "epoch": 0.3492160812822983, "grad_norm": 0.7091422585987021, "learning_rate": 3.615571776155718e-06, "loss": 0.5723, "step": 11961 }, { "epoch": 0.34924527751014567, "grad_norm": 0.70633462777508, "learning_rate": 3.615409570154096e-06, "loss": 0.6465, "step": 11962 }, { "epoch": 0.34927447373799303, "grad_norm": 0.6834548669217764, "learning_rate": 3.615247364152474e-06, "loss": 0.5795, "step": 11963 }, { "epoch": 0.3493036699658404, "grad_norm": 0.7317963965261914, "learning_rate": 3.6150851581508516e-06, "loss": 0.6581, "step": 11964 }, { "epoch": 0.34933286619368775, "grad_norm": 0.7792497618296345, "learning_rate": 3.6149229521492296e-06, "loss": 0.6436, "step": 11965 }, { "epoch": 0.3493620624215351, "grad_norm": 0.7447015393815088, "learning_rate": 3.6147607461476076e-06, "loss": 0.6755, "step": 11966 }, { "epoch": 0.3493912586493825, "grad_norm": 0.7807507614513656, "learning_rate": 3.6145985401459856e-06, "loss": 0.6923, "step": 11967 }, { "epoch": 0.34942045487722984, "grad_norm": 0.705915124501085, "learning_rate": 3.614436334144364e-06, "loss": 0.6181, "step": 11968 }, { "epoch": 0.3494496511050772, "grad_norm": 0.6905163822699077, "learning_rate": 3.614274128142742e-06, "loss": 0.6028, "step": 11969 }, { "epoch": 0.3494788473329246, "grad_norm": 0.7325538938041095, "learning_rate": 3.6141119221411196e-06, "loss": 0.6432, "step": 11970 }, { "epoch": 0.349508043560772, "grad_norm": 0.7694033013711653, "learning_rate": 3.6139497161394976e-06, "loss": 0.6947, "step": 11971 }, { "epoch": 0.34953723978861934, "grad_norm": 0.7701337366942209, "learning_rate": 3.6137875101378756e-06, "loss": 0.6998, "step": 11972 }, { "epoch": 0.3495664360164667, "grad_norm": 0.9391550729629535, "learning_rate": 3.6136253041362536e-06, "loss": 0.6836, "step": 11973 }, { "epoch": 0.34959563224431406, "grad_norm": 0.7623938227181495, "learning_rate": 3.613463098134631e-06, "loss": 0.6681, "step": 11974 }, { "epoch": 0.3496248284721614, "grad_norm": 0.7487982864015221, "learning_rate": 3.613300892133009e-06, "loss": 0.6771, "step": 11975 }, { "epoch": 0.3496540247000088, "grad_norm": 0.6778073639386859, "learning_rate": 3.6131386861313872e-06, "loss": 0.5336, "step": 11976 }, { "epoch": 0.34968322092785614, "grad_norm": 0.7248801230756603, "learning_rate": 3.6129764801297652e-06, "loss": 0.635, "step": 11977 }, { "epoch": 0.3497124171557035, "grad_norm": 0.7476331814885546, "learning_rate": 3.612814274128143e-06, "loss": 0.6774, "step": 11978 }, { "epoch": 0.34974161338355086, "grad_norm": 0.7431053179333355, "learning_rate": 3.612652068126521e-06, "loss": 0.6788, "step": 11979 }, { "epoch": 0.3497708096113982, "grad_norm": 0.7396458949818936, "learning_rate": 3.612489862124899e-06, "loss": 0.6964, "step": 11980 }, { "epoch": 0.3498000058392456, "grad_norm": 0.7469338442214202, "learning_rate": 3.612327656123277e-06, "loss": 0.6555, "step": 11981 }, { "epoch": 0.34982920206709295, "grad_norm": 0.8332675335844685, "learning_rate": 3.612165450121655e-06, "loss": 0.714, "step": 11982 }, { "epoch": 0.3498583982949403, "grad_norm": 0.6973848598908976, "learning_rate": 3.6120032441200324e-06, "loss": 0.6026, "step": 11983 }, { "epoch": 0.34988759452278767, "grad_norm": 0.7286659456717424, "learning_rate": 3.6118410381184104e-06, "loss": 0.6599, "step": 11984 }, { "epoch": 0.34991679075063503, "grad_norm": 0.7488891928962164, "learning_rate": 3.6116788321167884e-06, "loss": 0.6651, "step": 11985 }, { "epoch": 0.3499459869784824, "grad_norm": 0.7331302727293338, "learning_rate": 3.6115166261151664e-06, "loss": 0.672, "step": 11986 }, { "epoch": 0.34997518320632975, "grad_norm": 0.6802036361296447, "learning_rate": 3.611354420113545e-06, "loss": 0.5891, "step": 11987 }, { "epoch": 0.3500043794341771, "grad_norm": 0.7056813483992898, "learning_rate": 3.611192214111923e-06, "loss": 0.6742, "step": 11988 }, { "epoch": 0.3500335756620245, "grad_norm": 0.7310487304650993, "learning_rate": 3.6110300081103005e-06, "loss": 0.6431, "step": 11989 }, { "epoch": 0.35006277188987184, "grad_norm": 0.6864726654813281, "learning_rate": 3.6108678021086785e-06, "loss": 0.6064, "step": 11990 }, { "epoch": 0.3500919681177192, "grad_norm": 0.8257517001590412, "learning_rate": 3.6107055961070565e-06, "loss": 0.6977, "step": 11991 }, { "epoch": 0.35012116434556656, "grad_norm": 0.7636252479290044, "learning_rate": 3.6105433901054345e-06, "loss": 0.7481, "step": 11992 }, { "epoch": 0.3501503605734139, "grad_norm": 0.7285351607434468, "learning_rate": 3.610381184103812e-06, "loss": 0.6559, "step": 11993 }, { "epoch": 0.3501795568012613, "grad_norm": 0.713916075064162, "learning_rate": 3.61021897810219e-06, "loss": 0.6855, "step": 11994 }, { "epoch": 0.35020875302910864, "grad_norm": 0.7068108680065702, "learning_rate": 3.610056772100568e-06, "loss": 0.6722, "step": 11995 }, { "epoch": 0.350237949256956, "grad_norm": 0.7062061314110757, "learning_rate": 3.609894566098946e-06, "loss": 0.63, "step": 11996 }, { "epoch": 0.35026714548480337, "grad_norm": 0.7312743094968959, "learning_rate": 3.6097323600973237e-06, "loss": 0.6742, "step": 11997 }, { "epoch": 0.3502963417126507, "grad_norm": 0.6866884900422756, "learning_rate": 3.6095701540957017e-06, "loss": 0.628, "step": 11998 }, { "epoch": 0.3503255379404981, "grad_norm": 0.6660439289614347, "learning_rate": 3.6094079480940797e-06, "loss": 0.584, "step": 11999 }, { "epoch": 0.35035473416834545, "grad_norm": 0.7519553967687199, "learning_rate": 3.6092457420924577e-06, "loss": 0.6299, "step": 12000 }, { "epoch": 0.3503839303961928, "grad_norm": 0.7509647526206447, "learning_rate": 3.6090835360908357e-06, "loss": 0.6818, "step": 12001 }, { "epoch": 0.35041312662404017, "grad_norm": 0.7801603033927969, "learning_rate": 3.6089213300892133e-06, "loss": 0.7505, "step": 12002 }, { "epoch": 0.35044232285188753, "grad_norm": 0.738673678805913, "learning_rate": 3.6087591240875913e-06, "loss": 0.6803, "step": 12003 }, { "epoch": 0.3504715190797349, "grad_norm": 0.7035962860225354, "learning_rate": 3.6085969180859693e-06, "loss": 0.6641, "step": 12004 }, { "epoch": 0.35050071530758226, "grad_norm": 0.7214932310435138, "learning_rate": 3.6084347120843473e-06, "loss": 0.6266, "step": 12005 }, { "epoch": 0.3505299115354296, "grad_norm": 0.6842056289020866, "learning_rate": 3.6082725060827257e-06, "loss": 0.5659, "step": 12006 }, { "epoch": 0.350559107763277, "grad_norm": 0.7571678331388141, "learning_rate": 3.6081103000811037e-06, "loss": 0.6392, "step": 12007 }, { "epoch": 0.35058830399112434, "grad_norm": 0.7602618481874, "learning_rate": 3.6079480940794813e-06, "loss": 0.694, "step": 12008 }, { "epoch": 0.3506175002189717, "grad_norm": 0.7136642368946348, "learning_rate": 3.6077858880778593e-06, "loss": 0.6589, "step": 12009 }, { "epoch": 0.35064669644681906, "grad_norm": 0.7631916726801138, "learning_rate": 3.6076236820762373e-06, "loss": 0.6448, "step": 12010 }, { "epoch": 0.3506758926746664, "grad_norm": 0.7525966778609188, "learning_rate": 3.6074614760746153e-06, "loss": 0.7054, "step": 12011 }, { "epoch": 0.3507050889025138, "grad_norm": 0.7550537900059024, "learning_rate": 3.607299270072993e-06, "loss": 0.6822, "step": 12012 }, { "epoch": 0.35073428513036115, "grad_norm": 0.6822932284868068, "learning_rate": 3.607137064071371e-06, "loss": 0.6096, "step": 12013 }, { "epoch": 0.3507634813582085, "grad_norm": 0.721311397662531, "learning_rate": 3.606974858069749e-06, "loss": 0.6766, "step": 12014 }, { "epoch": 0.35079267758605587, "grad_norm": 0.7375868550919922, "learning_rate": 3.606812652068127e-06, "loss": 0.6661, "step": 12015 }, { "epoch": 0.35082187381390323, "grad_norm": 0.7010627919409312, "learning_rate": 3.6066504460665045e-06, "loss": 0.6522, "step": 12016 }, { "epoch": 0.3508510700417506, "grad_norm": 0.6875893355183146, "learning_rate": 3.6064882400648825e-06, "loss": 0.619, "step": 12017 }, { "epoch": 0.35088026626959795, "grad_norm": 0.7030797136934217, "learning_rate": 3.6063260340632605e-06, "loss": 0.6147, "step": 12018 }, { "epoch": 0.3509094624974453, "grad_norm": 0.7764679836012273, "learning_rate": 3.6061638280616385e-06, "loss": 0.6505, "step": 12019 }, { "epoch": 0.3509386587252927, "grad_norm": 0.7235562570667469, "learning_rate": 3.6060016220600166e-06, "loss": 0.6182, "step": 12020 }, { "epoch": 0.35096785495314003, "grad_norm": 0.8040405893626635, "learning_rate": 3.605839416058394e-06, "loss": 0.7363, "step": 12021 }, { "epoch": 0.3509970511809874, "grad_norm": 0.6981753845110865, "learning_rate": 3.605677210056772e-06, "loss": 0.6642, "step": 12022 }, { "epoch": 0.35102624740883476, "grad_norm": 0.7596434578396174, "learning_rate": 3.60551500405515e-06, "loss": 0.5844, "step": 12023 }, { "epoch": 0.3510554436366821, "grad_norm": 0.758407296868691, "learning_rate": 3.605352798053528e-06, "loss": 0.7585, "step": 12024 }, { "epoch": 0.3510846398645295, "grad_norm": 0.7788689458229402, "learning_rate": 3.6051905920519066e-06, "loss": 0.7024, "step": 12025 }, { "epoch": 0.35111383609237684, "grad_norm": 0.7288176377969435, "learning_rate": 3.605028386050284e-06, "loss": 0.6967, "step": 12026 }, { "epoch": 0.3511430323202242, "grad_norm": 0.7867481889566402, "learning_rate": 3.604866180048662e-06, "loss": 0.6549, "step": 12027 }, { "epoch": 0.35117222854807156, "grad_norm": 0.7398463012512694, "learning_rate": 3.60470397404704e-06, "loss": 0.6517, "step": 12028 }, { "epoch": 0.3512014247759189, "grad_norm": 0.7626589678598557, "learning_rate": 3.604541768045418e-06, "loss": 0.756, "step": 12029 }, { "epoch": 0.3512306210037663, "grad_norm": 0.7533833083738504, "learning_rate": 3.604379562043796e-06, "loss": 0.6731, "step": 12030 }, { "epoch": 0.3512598172316137, "grad_norm": 0.7428315340052973, "learning_rate": 3.6042173560421738e-06, "loss": 0.7162, "step": 12031 }, { "epoch": 0.35128901345946106, "grad_norm": 0.7219927889660652, "learning_rate": 3.6040551500405518e-06, "loss": 0.663, "step": 12032 }, { "epoch": 0.3513182096873084, "grad_norm": 0.784606959270182, "learning_rate": 3.60389294403893e-06, "loss": 0.7602, "step": 12033 }, { "epoch": 0.3513474059151558, "grad_norm": 0.8412335634429015, "learning_rate": 3.603730738037308e-06, "loss": 0.7259, "step": 12034 }, { "epoch": 0.35137660214300315, "grad_norm": 0.6881855835429053, "learning_rate": 3.6035685320356854e-06, "loss": 0.5706, "step": 12035 }, { "epoch": 0.3514057983708505, "grad_norm": 0.8449994835447413, "learning_rate": 3.6034063260340634e-06, "loss": 0.6582, "step": 12036 }, { "epoch": 0.35143499459869787, "grad_norm": 0.7963538537588764, "learning_rate": 3.6032441200324414e-06, "loss": 0.7, "step": 12037 }, { "epoch": 0.35146419082654523, "grad_norm": 0.7333152110754317, "learning_rate": 3.6030819140308194e-06, "loss": 0.6986, "step": 12038 }, { "epoch": 0.3514933870543926, "grad_norm": 0.6898249527848197, "learning_rate": 3.6029197080291974e-06, "loss": 0.5945, "step": 12039 }, { "epoch": 0.35152258328223995, "grad_norm": 0.7067823303149645, "learning_rate": 3.602757502027575e-06, "loss": 0.6071, "step": 12040 }, { "epoch": 0.3515517795100873, "grad_norm": 0.8261214426102027, "learning_rate": 3.602595296025953e-06, "loss": 0.7501, "step": 12041 }, { "epoch": 0.3515809757379347, "grad_norm": 0.7338498427126403, "learning_rate": 3.602433090024331e-06, "loss": 0.6906, "step": 12042 }, { "epoch": 0.35161017196578204, "grad_norm": 0.7213028864345038, "learning_rate": 3.602270884022709e-06, "loss": 0.6602, "step": 12043 }, { "epoch": 0.3516393681936294, "grad_norm": 0.7629112348213754, "learning_rate": 3.6021086780210874e-06, "loss": 0.6919, "step": 12044 }, { "epoch": 0.35166856442147676, "grad_norm": 0.7662721268298743, "learning_rate": 3.601946472019465e-06, "loss": 0.6827, "step": 12045 }, { "epoch": 0.3516977606493241, "grad_norm": 0.7711468795219028, "learning_rate": 3.601784266017843e-06, "loss": 0.7169, "step": 12046 }, { "epoch": 0.3517269568771715, "grad_norm": 0.7666795146625641, "learning_rate": 3.601622060016221e-06, "loss": 0.7158, "step": 12047 }, { "epoch": 0.35175615310501884, "grad_norm": 0.7474519446943809, "learning_rate": 3.601459854014599e-06, "loss": 0.6489, "step": 12048 }, { "epoch": 0.3517853493328662, "grad_norm": 0.6793776439869414, "learning_rate": 3.601297648012977e-06, "loss": 0.6067, "step": 12049 }, { "epoch": 0.35181454556071357, "grad_norm": 0.7579351030005245, "learning_rate": 3.6011354420113546e-06, "loss": 0.7065, "step": 12050 }, { "epoch": 0.3518437417885609, "grad_norm": 0.7731895393203829, "learning_rate": 3.6009732360097326e-06, "loss": 0.7076, "step": 12051 }, { "epoch": 0.3518729380164083, "grad_norm": 0.8848342320596317, "learning_rate": 3.6008110300081107e-06, "loss": 0.5766, "step": 12052 }, { "epoch": 0.35190213424425565, "grad_norm": 0.6962921572714185, "learning_rate": 3.6006488240064887e-06, "loss": 0.6388, "step": 12053 }, { "epoch": 0.351931330472103, "grad_norm": 0.7305062356114744, "learning_rate": 3.6004866180048662e-06, "loss": 0.5973, "step": 12054 }, { "epoch": 0.35196052669995037, "grad_norm": 0.693688782339592, "learning_rate": 3.6003244120032442e-06, "loss": 0.6648, "step": 12055 }, { "epoch": 0.35198972292779773, "grad_norm": 0.8328637937955391, "learning_rate": 3.6001622060016223e-06, "loss": 0.6062, "step": 12056 }, { "epoch": 0.3520189191556451, "grad_norm": 0.7029204438830937, "learning_rate": 3.6000000000000003e-06, "loss": 0.5887, "step": 12057 }, { "epoch": 0.35204811538349245, "grad_norm": 0.7483221961108216, "learning_rate": 3.5998377939983783e-06, "loss": 0.6659, "step": 12058 }, { "epoch": 0.3520773116113398, "grad_norm": 0.7255167186864546, "learning_rate": 3.599675587996756e-06, "loss": 0.6274, "step": 12059 }, { "epoch": 0.3521065078391872, "grad_norm": 0.6698847478037593, "learning_rate": 3.599513381995134e-06, "loss": 0.5545, "step": 12060 }, { "epoch": 0.35213570406703454, "grad_norm": 0.7947598754031637, "learning_rate": 3.599351175993512e-06, "loss": 0.6501, "step": 12061 }, { "epoch": 0.3521649002948819, "grad_norm": 0.7456730050141063, "learning_rate": 3.5991889699918903e-06, "loss": 0.6297, "step": 12062 }, { "epoch": 0.35219409652272926, "grad_norm": 0.7435252197606965, "learning_rate": 3.5990267639902683e-06, "loss": 0.6068, "step": 12063 }, { "epoch": 0.3522232927505766, "grad_norm": 0.7265872951790787, "learning_rate": 3.598864557988646e-06, "loss": 0.7092, "step": 12064 }, { "epoch": 0.352252488978424, "grad_norm": 0.7068754761499156, "learning_rate": 3.598702351987024e-06, "loss": 0.6029, "step": 12065 }, { "epoch": 0.35228168520627134, "grad_norm": 0.7855064669003128, "learning_rate": 3.598540145985402e-06, "loss": 0.7674, "step": 12066 }, { "epoch": 0.3523108814341187, "grad_norm": 0.8021196892875143, "learning_rate": 3.59837793998378e-06, "loss": 0.6689, "step": 12067 }, { "epoch": 0.35234007766196607, "grad_norm": 0.7279816390234272, "learning_rate": 3.598215733982158e-06, "loss": 0.6237, "step": 12068 }, { "epoch": 0.35236927388981343, "grad_norm": 0.7704607377931891, "learning_rate": 3.5980535279805355e-06, "loss": 0.6904, "step": 12069 }, { "epoch": 0.3523984701176608, "grad_norm": 0.6799887044098264, "learning_rate": 3.5978913219789135e-06, "loss": 0.5876, "step": 12070 }, { "epoch": 0.35242766634550815, "grad_norm": 0.722065109909784, "learning_rate": 3.5977291159772915e-06, "loss": 0.6534, "step": 12071 }, { "epoch": 0.3524568625733555, "grad_norm": 0.6674526913995321, "learning_rate": 3.5975669099756695e-06, "loss": 0.5948, "step": 12072 }, { "epoch": 0.3524860588012029, "grad_norm": 0.7072966742297669, "learning_rate": 3.597404703974047e-06, "loss": 0.6163, "step": 12073 }, { "epoch": 0.35251525502905023, "grad_norm": 0.6903784413765804, "learning_rate": 3.597242497972425e-06, "loss": 0.5904, "step": 12074 }, { "epoch": 0.3525444512568976, "grad_norm": 0.6872854432889275, "learning_rate": 3.597080291970803e-06, "loss": 0.6299, "step": 12075 }, { "epoch": 0.35257364748474496, "grad_norm": 0.7268669902883721, "learning_rate": 3.596918085969181e-06, "loss": 0.6551, "step": 12076 }, { "epoch": 0.3526028437125923, "grad_norm": 0.8696822724556726, "learning_rate": 3.596755879967559e-06, "loss": 0.6124, "step": 12077 }, { "epoch": 0.3526320399404397, "grad_norm": 0.6941507314262929, "learning_rate": 3.5965936739659367e-06, "loss": 0.6193, "step": 12078 }, { "epoch": 0.35266123616828704, "grad_norm": 0.7635143049371033, "learning_rate": 3.5964314679643147e-06, "loss": 0.6794, "step": 12079 }, { "epoch": 0.3526904323961344, "grad_norm": 0.7434609909777578, "learning_rate": 3.5962692619626927e-06, "loss": 0.7187, "step": 12080 }, { "epoch": 0.35271962862398176, "grad_norm": 0.6557785440819532, "learning_rate": 3.596107055961071e-06, "loss": 0.5539, "step": 12081 }, { "epoch": 0.3527488248518291, "grad_norm": 0.7399295784966222, "learning_rate": 3.595944849959449e-06, "loss": 0.6205, "step": 12082 }, { "epoch": 0.3527780210796765, "grad_norm": 0.6935017628186684, "learning_rate": 3.5957826439578267e-06, "loss": 0.658, "step": 12083 }, { "epoch": 0.35280721730752385, "grad_norm": 0.7794405441823294, "learning_rate": 3.5956204379562048e-06, "loss": 0.7291, "step": 12084 }, { "epoch": 0.3528364135353712, "grad_norm": 0.7217556813947913, "learning_rate": 3.5954582319545828e-06, "loss": 0.6821, "step": 12085 }, { "epoch": 0.35286560976321857, "grad_norm": 0.6696966490701799, "learning_rate": 3.5952960259529608e-06, "loss": 0.549, "step": 12086 }, { "epoch": 0.35289480599106593, "grad_norm": 0.7227887072196619, "learning_rate": 3.5951338199513388e-06, "loss": 0.6752, "step": 12087 }, { "epoch": 0.3529240022189133, "grad_norm": 0.7185182143371414, "learning_rate": 3.5949716139497164e-06, "loss": 0.6976, "step": 12088 }, { "epoch": 0.35295319844676065, "grad_norm": 0.7973179404563244, "learning_rate": 3.5948094079480944e-06, "loss": 0.7117, "step": 12089 }, { "epoch": 0.352982394674608, "grad_norm": 0.8857219120309661, "learning_rate": 3.5946472019464724e-06, "loss": 0.676, "step": 12090 }, { "epoch": 0.35301159090245543, "grad_norm": 0.7336989028194218, "learning_rate": 3.5944849959448504e-06, "loss": 0.593, "step": 12091 }, { "epoch": 0.3530407871303028, "grad_norm": 1.1432150034883422, "learning_rate": 3.594322789943228e-06, "loss": 0.7359, "step": 12092 }, { "epoch": 0.35306998335815015, "grad_norm": 0.9529129089286325, "learning_rate": 3.594160583941606e-06, "loss": 0.725, "step": 12093 }, { "epoch": 0.3530991795859975, "grad_norm": 0.7292907030182809, "learning_rate": 3.593998377939984e-06, "loss": 0.677, "step": 12094 }, { "epoch": 0.3531283758138449, "grad_norm": 0.7215703116093152, "learning_rate": 3.593836171938362e-06, "loss": 0.6804, "step": 12095 }, { "epoch": 0.35315757204169224, "grad_norm": 0.8289907227298926, "learning_rate": 3.59367396593674e-06, "loss": 0.6914, "step": 12096 }, { "epoch": 0.3531867682695396, "grad_norm": 0.7179780179833072, "learning_rate": 3.5935117599351176e-06, "loss": 0.6602, "step": 12097 }, { "epoch": 0.35321596449738696, "grad_norm": 0.7290409006995633, "learning_rate": 3.5933495539334956e-06, "loss": 0.6295, "step": 12098 }, { "epoch": 0.3532451607252343, "grad_norm": 0.6640871732805489, "learning_rate": 3.5931873479318736e-06, "loss": 0.5804, "step": 12099 }, { "epoch": 0.3532743569530817, "grad_norm": 0.7484072089930871, "learning_rate": 3.593025141930252e-06, "loss": 0.7005, "step": 12100 }, { "epoch": 0.35330355318092904, "grad_norm": 0.7385606675259213, "learning_rate": 3.59286293592863e-06, "loss": 0.6693, "step": 12101 }, { "epoch": 0.3533327494087764, "grad_norm": 0.768870231281054, "learning_rate": 3.5927007299270076e-06, "loss": 0.7331, "step": 12102 }, { "epoch": 0.35336194563662376, "grad_norm": 0.7434091270903193, "learning_rate": 3.5925385239253856e-06, "loss": 0.5912, "step": 12103 }, { "epoch": 0.3533911418644711, "grad_norm": 0.668963466079863, "learning_rate": 3.5923763179237636e-06, "loss": 0.5486, "step": 12104 }, { "epoch": 0.3534203380923185, "grad_norm": 0.7662805285669998, "learning_rate": 3.5922141119221416e-06, "loss": 0.6584, "step": 12105 }, { "epoch": 0.35344953432016585, "grad_norm": 0.7366572223106642, "learning_rate": 3.5920519059205196e-06, "loss": 0.6526, "step": 12106 }, { "epoch": 0.3534787305480132, "grad_norm": 0.8224394846432574, "learning_rate": 3.5918896999188972e-06, "loss": 0.7523, "step": 12107 }, { "epoch": 0.35350792677586057, "grad_norm": 0.8377830712441493, "learning_rate": 3.5917274939172752e-06, "loss": 0.7608, "step": 12108 }, { "epoch": 0.35353712300370793, "grad_norm": 0.70239863671147, "learning_rate": 3.5915652879156532e-06, "loss": 0.6323, "step": 12109 }, { "epoch": 0.3535663192315553, "grad_norm": 0.7451352983084758, "learning_rate": 3.5914030819140312e-06, "loss": 0.6287, "step": 12110 }, { "epoch": 0.35359551545940265, "grad_norm": 0.7773280179503246, "learning_rate": 3.591240875912409e-06, "loss": 0.6967, "step": 12111 }, { "epoch": 0.35362471168725, "grad_norm": 0.7176335497969754, "learning_rate": 3.591078669910787e-06, "loss": 0.7232, "step": 12112 }, { "epoch": 0.3536539079150974, "grad_norm": 1.0108829027393307, "learning_rate": 3.590916463909165e-06, "loss": 0.5874, "step": 12113 }, { "epoch": 0.35368310414294474, "grad_norm": 0.712451872128731, "learning_rate": 3.590754257907543e-06, "loss": 0.6353, "step": 12114 }, { "epoch": 0.3537123003707921, "grad_norm": 0.8092351432826934, "learning_rate": 3.590592051905921e-06, "loss": 0.7345, "step": 12115 }, { "epoch": 0.35374149659863946, "grad_norm": 0.7635908293904364, "learning_rate": 3.5904298459042984e-06, "loss": 0.6657, "step": 12116 }, { "epoch": 0.3537706928264868, "grad_norm": 0.7852719815416386, "learning_rate": 3.5902676399026764e-06, "loss": 0.7652, "step": 12117 }, { "epoch": 0.3537998890543342, "grad_norm": 0.6719605897217181, "learning_rate": 3.5901054339010544e-06, "loss": 0.5339, "step": 12118 }, { "epoch": 0.35382908528218154, "grad_norm": 0.69448203980758, "learning_rate": 3.589943227899433e-06, "loss": 0.5904, "step": 12119 }, { "epoch": 0.3538582815100289, "grad_norm": 0.7034301318732177, "learning_rate": 3.589781021897811e-06, "loss": 0.5803, "step": 12120 }, { "epoch": 0.35388747773787627, "grad_norm": 0.68738877698107, "learning_rate": 3.5896188158961885e-06, "loss": 0.571, "step": 12121 }, { "epoch": 0.3539166739657236, "grad_norm": 0.7457755298217487, "learning_rate": 3.5894566098945665e-06, "loss": 0.7305, "step": 12122 }, { "epoch": 0.353945870193571, "grad_norm": 0.6752746981744299, "learning_rate": 3.5892944038929445e-06, "loss": 0.5445, "step": 12123 }, { "epoch": 0.35397506642141835, "grad_norm": 0.7275940448769738, "learning_rate": 3.5891321978913225e-06, "loss": 0.6363, "step": 12124 }, { "epoch": 0.3540042626492657, "grad_norm": 0.8174847594628227, "learning_rate": 3.5889699918897005e-06, "loss": 0.8142, "step": 12125 }, { "epoch": 0.35403345887711307, "grad_norm": 0.7648367072830595, "learning_rate": 3.588807785888078e-06, "loss": 0.6731, "step": 12126 }, { "epoch": 0.35406265510496043, "grad_norm": 0.7549206967458365, "learning_rate": 3.588645579886456e-06, "loss": 0.675, "step": 12127 }, { "epoch": 0.3540918513328078, "grad_norm": 0.7302768786729269, "learning_rate": 3.588483373884834e-06, "loss": 0.6428, "step": 12128 }, { "epoch": 0.35412104756065516, "grad_norm": 0.7480384155744457, "learning_rate": 3.588321167883212e-06, "loss": 0.6667, "step": 12129 }, { "epoch": 0.3541502437885025, "grad_norm": 0.7363109058631623, "learning_rate": 3.5881589618815897e-06, "loss": 0.5628, "step": 12130 }, { "epoch": 0.3541794400163499, "grad_norm": 0.7173676809057451, "learning_rate": 3.5879967558799677e-06, "loss": 0.6336, "step": 12131 }, { "epoch": 0.35420863624419724, "grad_norm": 0.7331836728965258, "learning_rate": 3.5878345498783457e-06, "loss": 0.6579, "step": 12132 }, { "epoch": 0.3542378324720446, "grad_norm": 0.7355052340901241, "learning_rate": 3.5876723438767237e-06, "loss": 0.6653, "step": 12133 }, { "epoch": 0.35426702869989196, "grad_norm": 0.7399092473079593, "learning_rate": 3.5875101378751017e-06, "loss": 0.6669, "step": 12134 }, { "epoch": 0.3542962249277393, "grad_norm": 0.7933503540611216, "learning_rate": 3.5873479318734793e-06, "loss": 0.6873, "step": 12135 }, { "epoch": 0.3543254211555867, "grad_norm": 0.7234591880718272, "learning_rate": 3.5871857258718573e-06, "loss": 0.7009, "step": 12136 }, { "epoch": 0.35435461738343405, "grad_norm": 0.7960853038216984, "learning_rate": 3.5870235198702353e-06, "loss": 0.6988, "step": 12137 }, { "epoch": 0.3543838136112814, "grad_norm": 0.704792311915271, "learning_rate": 3.5868613138686137e-06, "loss": 0.6154, "step": 12138 }, { "epoch": 0.35441300983912877, "grad_norm": 0.7303287201001077, "learning_rate": 3.5866991078669917e-06, "loss": 0.654, "step": 12139 }, { "epoch": 0.35444220606697613, "grad_norm": 0.8079246031244891, "learning_rate": 3.5865369018653693e-06, "loss": 0.7611, "step": 12140 }, { "epoch": 0.3544714022948235, "grad_norm": 0.7342064870951504, "learning_rate": 3.5863746958637473e-06, "loss": 0.6732, "step": 12141 }, { "epoch": 0.35450059852267085, "grad_norm": 0.8576145853283829, "learning_rate": 3.5862124898621253e-06, "loss": 0.7461, "step": 12142 }, { "epoch": 0.3545297947505182, "grad_norm": 0.7698329510748061, "learning_rate": 3.5860502838605033e-06, "loss": 0.6988, "step": 12143 }, { "epoch": 0.3545589909783656, "grad_norm": 0.6843641375417621, "learning_rate": 3.5858880778588813e-06, "loss": 0.5867, "step": 12144 }, { "epoch": 0.35458818720621293, "grad_norm": 0.710987652598911, "learning_rate": 3.585725871857259e-06, "loss": 0.5899, "step": 12145 }, { "epoch": 0.3546173834340603, "grad_norm": 0.7063566457335387, "learning_rate": 3.585563665855637e-06, "loss": 0.5869, "step": 12146 }, { "epoch": 0.35464657966190766, "grad_norm": 0.6952877593008998, "learning_rate": 3.585401459854015e-06, "loss": 0.6312, "step": 12147 }, { "epoch": 0.354675775889755, "grad_norm": 0.7284816133728134, "learning_rate": 3.585239253852393e-06, "loss": 0.6404, "step": 12148 }, { "epoch": 0.3547049721176024, "grad_norm": 0.7744444908172217, "learning_rate": 3.5850770478507705e-06, "loss": 0.6987, "step": 12149 }, { "epoch": 0.35473416834544974, "grad_norm": 0.7997204612062319, "learning_rate": 3.5849148418491485e-06, "loss": 0.7339, "step": 12150 }, { "epoch": 0.35476336457329716, "grad_norm": 0.8687231493299367, "learning_rate": 3.5847526358475265e-06, "loss": 0.7905, "step": 12151 }, { "epoch": 0.3547925608011445, "grad_norm": 0.7978132051350627, "learning_rate": 3.5845904298459046e-06, "loss": 0.6473, "step": 12152 }, { "epoch": 0.3548217570289919, "grad_norm": 0.734420831728226, "learning_rate": 3.5844282238442826e-06, "loss": 0.6311, "step": 12153 }, { "epoch": 0.35485095325683924, "grad_norm": 0.8891828088672104, "learning_rate": 3.58426601784266e-06, "loss": 0.73, "step": 12154 }, { "epoch": 0.3548801494846866, "grad_norm": 0.690401895038892, "learning_rate": 3.584103811841038e-06, "loss": 0.5797, "step": 12155 }, { "epoch": 0.35490934571253396, "grad_norm": 0.7569986153732305, "learning_rate": 3.583941605839416e-06, "loss": 0.6946, "step": 12156 }, { "epoch": 0.3549385419403813, "grad_norm": 0.7400934167271274, "learning_rate": 3.5837793998377946e-06, "loss": 0.6842, "step": 12157 }, { "epoch": 0.3549677381682287, "grad_norm": 0.7342448778537666, "learning_rate": 3.5836171938361726e-06, "loss": 0.6823, "step": 12158 }, { "epoch": 0.35499693439607605, "grad_norm": 0.7094487336482528, "learning_rate": 3.58345498783455e-06, "loss": 0.5712, "step": 12159 }, { "epoch": 0.3550261306239234, "grad_norm": 0.7633407921066666, "learning_rate": 3.583292781832928e-06, "loss": 0.6996, "step": 12160 }, { "epoch": 0.35505532685177077, "grad_norm": 0.7599267218748693, "learning_rate": 3.583130575831306e-06, "loss": 0.6627, "step": 12161 }, { "epoch": 0.35508452307961813, "grad_norm": 0.7737821501762092, "learning_rate": 3.582968369829684e-06, "loss": 0.7496, "step": 12162 }, { "epoch": 0.3551137193074655, "grad_norm": 0.7584968101177392, "learning_rate": 3.582806163828062e-06, "loss": 0.7102, "step": 12163 }, { "epoch": 0.35514291553531285, "grad_norm": 0.8180989582687809, "learning_rate": 3.58264395782644e-06, "loss": 0.7147, "step": 12164 }, { "epoch": 0.3551721117631602, "grad_norm": 0.7225323255491434, "learning_rate": 3.582481751824818e-06, "loss": 0.6522, "step": 12165 }, { "epoch": 0.3552013079910076, "grad_norm": 0.7443583432110142, "learning_rate": 3.582319545823196e-06, "loss": 0.668, "step": 12166 }, { "epoch": 0.35523050421885494, "grad_norm": 0.6995118792245946, "learning_rate": 3.582157339821574e-06, "loss": 0.6189, "step": 12167 }, { "epoch": 0.3552597004467023, "grad_norm": 0.8113521955666846, "learning_rate": 3.5819951338199514e-06, "loss": 0.7003, "step": 12168 }, { "epoch": 0.35528889667454966, "grad_norm": 0.6979101326685256, "learning_rate": 3.5818329278183294e-06, "loss": 0.5997, "step": 12169 }, { "epoch": 0.355318092902397, "grad_norm": 0.7799691134327608, "learning_rate": 3.5816707218167074e-06, "loss": 0.7064, "step": 12170 }, { "epoch": 0.3553472891302444, "grad_norm": 0.7029375642307778, "learning_rate": 3.5815085158150854e-06, "loss": 0.5806, "step": 12171 }, { "epoch": 0.35537648535809174, "grad_norm": 0.7107628820644342, "learning_rate": 3.581346309813463e-06, "loss": 0.689, "step": 12172 }, { "epoch": 0.3554056815859391, "grad_norm": 0.7649728657865851, "learning_rate": 3.581184103811841e-06, "loss": 0.7099, "step": 12173 }, { "epoch": 0.35543487781378647, "grad_norm": 0.6991507024280219, "learning_rate": 3.581021897810219e-06, "loss": 0.6717, "step": 12174 }, { "epoch": 0.3554640740416338, "grad_norm": 0.7665031754567122, "learning_rate": 3.580859691808597e-06, "loss": 0.7196, "step": 12175 }, { "epoch": 0.3554932702694812, "grad_norm": 0.7624222135120827, "learning_rate": 3.5806974858069754e-06, "loss": 0.7367, "step": 12176 }, { "epoch": 0.35552246649732855, "grad_norm": 0.7556432121625326, "learning_rate": 3.5805352798053535e-06, "loss": 0.692, "step": 12177 }, { "epoch": 0.3555516627251759, "grad_norm": 0.7380775975081832, "learning_rate": 3.580373073803731e-06, "loss": 0.7098, "step": 12178 }, { "epoch": 0.35558085895302327, "grad_norm": 0.7503762531900661, "learning_rate": 3.580210867802109e-06, "loss": 0.6744, "step": 12179 }, { "epoch": 0.35561005518087063, "grad_norm": 0.7753723393812209, "learning_rate": 3.580048661800487e-06, "loss": 0.682, "step": 12180 }, { "epoch": 0.355639251408718, "grad_norm": 0.7441030632539265, "learning_rate": 3.579886455798865e-06, "loss": 0.6665, "step": 12181 }, { "epoch": 0.35566844763656535, "grad_norm": 0.7775740567313979, "learning_rate": 3.579724249797243e-06, "loss": 0.7085, "step": 12182 }, { "epoch": 0.3556976438644127, "grad_norm": 0.7325527622553581, "learning_rate": 3.5795620437956206e-06, "loss": 0.6452, "step": 12183 }, { "epoch": 0.3557268400922601, "grad_norm": 0.7066847895183311, "learning_rate": 3.5793998377939987e-06, "loss": 0.6281, "step": 12184 }, { "epoch": 0.35575603632010744, "grad_norm": 0.7510663994119503, "learning_rate": 3.5792376317923767e-06, "loss": 0.5779, "step": 12185 }, { "epoch": 0.3557852325479548, "grad_norm": 0.751178599849591, "learning_rate": 3.5790754257907547e-06, "loss": 0.6292, "step": 12186 }, { "epoch": 0.35581442877580216, "grad_norm": 0.6938940114999848, "learning_rate": 3.5789132197891323e-06, "loss": 0.6229, "step": 12187 }, { "epoch": 0.3558436250036495, "grad_norm": 0.6933522773603042, "learning_rate": 3.5787510137875103e-06, "loss": 0.6334, "step": 12188 }, { "epoch": 0.3558728212314969, "grad_norm": 0.8007482935573154, "learning_rate": 3.5785888077858883e-06, "loss": 0.706, "step": 12189 }, { "epoch": 0.35590201745934424, "grad_norm": 0.7722458493314857, "learning_rate": 3.5784266017842663e-06, "loss": 0.6982, "step": 12190 }, { "epoch": 0.3559312136871916, "grad_norm": 0.6935331248301252, "learning_rate": 3.578264395782644e-06, "loss": 0.5888, "step": 12191 }, { "epoch": 0.35596040991503897, "grad_norm": 0.7035574775711642, "learning_rate": 3.578102189781022e-06, "loss": 0.6174, "step": 12192 }, { "epoch": 0.35598960614288633, "grad_norm": 0.7280904095958203, "learning_rate": 3.5779399837794e-06, "loss": 0.6697, "step": 12193 }, { "epoch": 0.3560188023707337, "grad_norm": 0.7137944049816604, "learning_rate": 3.577777777777778e-06, "loss": 0.6318, "step": 12194 }, { "epoch": 0.35604799859858105, "grad_norm": 0.7624932869336744, "learning_rate": 3.5776155717761563e-06, "loss": 0.666, "step": 12195 }, { "epoch": 0.3560771948264284, "grad_norm": 0.707669331322968, "learning_rate": 3.5774533657745343e-06, "loss": 0.6547, "step": 12196 }, { "epoch": 0.3561063910542758, "grad_norm": 0.7501315997396385, "learning_rate": 3.577291159772912e-06, "loss": 0.6252, "step": 12197 }, { "epoch": 0.35613558728212313, "grad_norm": 0.7468365807947198, "learning_rate": 3.57712895377129e-06, "loss": 0.6989, "step": 12198 }, { "epoch": 0.3561647835099705, "grad_norm": 0.7825800142856657, "learning_rate": 3.576966747769668e-06, "loss": 0.7114, "step": 12199 }, { "epoch": 0.35619397973781786, "grad_norm": 0.735699397851288, "learning_rate": 3.576804541768046e-06, "loss": 0.6581, "step": 12200 }, { "epoch": 0.3562231759656652, "grad_norm": 0.7761161413232668, "learning_rate": 3.576642335766424e-06, "loss": 0.7118, "step": 12201 }, { "epoch": 0.3562523721935126, "grad_norm": 0.7854745527928594, "learning_rate": 3.5764801297648015e-06, "loss": 0.7749, "step": 12202 }, { "epoch": 0.35628156842135994, "grad_norm": 0.7733538948539141, "learning_rate": 3.5763179237631795e-06, "loss": 0.7093, "step": 12203 }, { "epoch": 0.3563107646492073, "grad_norm": 0.7439114477266433, "learning_rate": 3.5761557177615575e-06, "loss": 0.6789, "step": 12204 }, { "epoch": 0.35633996087705466, "grad_norm": 0.810253738088891, "learning_rate": 3.5759935117599355e-06, "loss": 0.78, "step": 12205 }, { "epoch": 0.356369157104902, "grad_norm": 0.7271087464838568, "learning_rate": 3.575831305758313e-06, "loss": 0.6606, "step": 12206 }, { "epoch": 0.3563983533327494, "grad_norm": 0.8175241107242264, "learning_rate": 3.575669099756691e-06, "loss": 0.7246, "step": 12207 }, { "epoch": 0.35642754956059675, "grad_norm": 0.7200691906312786, "learning_rate": 3.575506893755069e-06, "loss": 0.6257, "step": 12208 }, { "epoch": 0.3564567457884441, "grad_norm": 0.7859962416689766, "learning_rate": 3.575344687753447e-06, "loss": 0.7149, "step": 12209 }, { "epoch": 0.35648594201629147, "grad_norm": 0.71367554465716, "learning_rate": 3.5751824817518247e-06, "loss": 0.6142, "step": 12210 }, { "epoch": 0.3565151382441389, "grad_norm": 0.8304619405953138, "learning_rate": 3.5750202757502027e-06, "loss": 0.7381, "step": 12211 }, { "epoch": 0.35654433447198625, "grad_norm": 0.8109447265071594, "learning_rate": 3.5748580697485807e-06, "loss": 0.6918, "step": 12212 }, { "epoch": 0.3565735306998336, "grad_norm": 0.8695095817127874, "learning_rate": 3.574695863746959e-06, "loss": 0.6361, "step": 12213 }, { "epoch": 0.35660272692768097, "grad_norm": 0.6968313812575021, "learning_rate": 3.574533657745337e-06, "loss": 0.6017, "step": 12214 }, { "epoch": 0.35663192315552833, "grad_norm": 0.7688733984823298, "learning_rate": 3.574371451743715e-06, "loss": 0.6297, "step": 12215 }, { "epoch": 0.3566611193833757, "grad_norm": 0.793552137505484, "learning_rate": 3.5742092457420928e-06, "loss": 0.7117, "step": 12216 }, { "epoch": 0.35669031561122305, "grad_norm": 0.7247124373566544, "learning_rate": 3.5740470397404708e-06, "loss": 0.6508, "step": 12217 }, { "epoch": 0.3567195118390704, "grad_norm": 0.7387591339954425, "learning_rate": 3.5738848337388488e-06, "loss": 0.6949, "step": 12218 }, { "epoch": 0.3567487080669178, "grad_norm": 0.7314580556101385, "learning_rate": 3.5737226277372268e-06, "loss": 0.6032, "step": 12219 }, { "epoch": 0.35677790429476514, "grad_norm": 0.7063151610256777, "learning_rate": 3.5735604217356048e-06, "loss": 0.6461, "step": 12220 }, { "epoch": 0.3568071005226125, "grad_norm": 0.7671419860777008, "learning_rate": 3.5733982157339824e-06, "loss": 0.6846, "step": 12221 }, { "epoch": 0.35683629675045986, "grad_norm": 0.733565365690966, "learning_rate": 3.5732360097323604e-06, "loss": 0.6483, "step": 12222 }, { "epoch": 0.3568654929783072, "grad_norm": 0.7173639717782827, "learning_rate": 3.5730738037307384e-06, "loss": 0.6486, "step": 12223 }, { "epoch": 0.3568946892061546, "grad_norm": 0.6700585181291739, "learning_rate": 3.5729115977291164e-06, "loss": 0.5768, "step": 12224 }, { "epoch": 0.35692388543400194, "grad_norm": 0.7226536760569006, "learning_rate": 3.572749391727494e-06, "loss": 0.6156, "step": 12225 }, { "epoch": 0.3569530816618493, "grad_norm": 0.7601634133816719, "learning_rate": 3.572587185725872e-06, "loss": 0.6632, "step": 12226 }, { "epoch": 0.35698227788969666, "grad_norm": 0.6965614080827465, "learning_rate": 3.57242497972425e-06, "loss": 0.5847, "step": 12227 }, { "epoch": 0.357011474117544, "grad_norm": 0.6904940641320164, "learning_rate": 3.572262773722628e-06, "loss": 0.636, "step": 12228 }, { "epoch": 0.3570406703453914, "grad_norm": 0.7636989114385404, "learning_rate": 3.5721005677210056e-06, "loss": 0.6728, "step": 12229 }, { "epoch": 0.35706986657323875, "grad_norm": 0.6845467846662311, "learning_rate": 3.5719383617193836e-06, "loss": 0.6108, "step": 12230 }, { "epoch": 0.3570990628010861, "grad_norm": 0.719690715607163, "learning_rate": 3.5717761557177616e-06, "loss": 0.6236, "step": 12231 }, { "epoch": 0.35712825902893347, "grad_norm": 0.7232263121615189, "learning_rate": 3.57161394971614e-06, "loss": 0.6384, "step": 12232 }, { "epoch": 0.35715745525678083, "grad_norm": 0.7240320376993563, "learning_rate": 3.571451743714518e-06, "loss": 0.5792, "step": 12233 }, { "epoch": 0.3571866514846282, "grad_norm": 0.6988097987495726, "learning_rate": 3.571289537712896e-06, "loss": 0.5622, "step": 12234 }, { "epoch": 0.35721584771247555, "grad_norm": 0.7732169505275177, "learning_rate": 3.5711273317112736e-06, "loss": 0.7481, "step": 12235 }, { "epoch": 0.3572450439403229, "grad_norm": 0.7530750917857324, "learning_rate": 3.5709651257096516e-06, "loss": 0.7107, "step": 12236 }, { "epoch": 0.3572742401681703, "grad_norm": 0.6684455733375301, "learning_rate": 3.5708029197080296e-06, "loss": 0.5726, "step": 12237 }, { "epoch": 0.35730343639601764, "grad_norm": 0.7422674601609234, "learning_rate": 3.5706407137064076e-06, "loss": 0.6956, "step": 12238 }, { "epoch": 0.357332632623865, "grad_norm": 0.8143221024006757, "learning_rate": 3.5704785077047856e-06, "loss": 0.802, "step": 12239 }, { "epoch": 0.35736182885171236, "grad_norm": 0.7858053710797279, "learning_rate": 3.5703163017031632e-06, "loss": 0.6942, "step": 12240 }, { "epoch": 0.3573910250795597, "grad_norm": 0.739279544213964, "learning_rate": 3.5701540957015412e-06, "loss": 0.654, "step": 12241 }, { "epoch": 0.3574202213074071, "grad_norm": 0.6726827132113138, "learning_rate": 3.5699918896999192e-06, "loss": 0.5925, "step": 12242 }, { "epoch": 0.35744941753525444, "grad_norm": 0.6474867922989965, "learning_rate": 3.5698296836982972e-06, "loss": 0.5729, "step": 12243 }, { "epoch": 0.3574786137631018, "grad_norm": 0.8070934670434425, "learning_rate": 3.569667477696675e-06, "loss": 0.6656, "step": 12244 }, { "epoch": 0.35750780999094917, "grad_norm": 0.7190671765314515, "learning_rate": 3.569505271695053e-06, "loss": 0.6361, "step": 12245 }, { "epoch": 0.3575370062187965, "grad_norm": 0.7344199506704795, "learning_rate": 3.569343065693431e-06, "loss": 0.6564, "step": 12246 }, { "epoch": 0.3575662024466439, "grad_norm": 0.8077422388963968, "learning_rate": 3.569180859691809e-06, "loss": 0.6506, "step": 12247 }, { "epoch": 0.35759539867449125, "grad_norm": 0.7150105469349031, "learning_rate": 3.5690186536901864e-06, "loss": 0.6848, "step": 12248 }, { "epoch": 0.3576245949023386, "grad_norm": 0.7772062196207045, "learning_rate": 3.5688564476885644e-06, "loss": 0.718, "step": 12249 }, { "epoch": 0.35765379113018597, "grad_norm": 0.8538572292298086, "learning_rate": 3.5686942416869424e-06, "loss": 0.6223, "step": 12250 }, { "epoch": 0.35768298735803333, "grad_norm": 0.774206144592227, "learning_rate": 3.568532035685321e-06, "loss": 0.683, "step": 12251 }, { "epoch": 0.3577121835858807, "grad_norm": 0.7466551907085948, "learning_rate": 3.568369829683699e-06, "loss": 0.6876, "step": 12252 }, { "epoch": 0.35774137981372806, "grad_norm": 0.7629201120716731, "learning_rate": 3.568207623682077e-06, "loss": 0.7125, "step": 12253 }, { "epoch": 0.3577705760415754, "grad_norm": 0.7540987595831717, "learning_rate": 3.5680454176804545e-06, "loss": 0.7015, "step": 12254 }, { "epoch": 0.3577997722694228, "grad_norm": 0.7431556266278007, "learning_rate": 3.5678832116788325e-06, "loss": 0.6565, "step": 12255 }, { "epoch": 0.35782896849727014, "grad_norm": 0.7576097016705602, "learning_rate": 3.5677210056772105e-06, "loss": 0.6313, "step": 12256 }, { "epoch": 0.3578581647251175, "grad_norm": 0.6937086881909027, "learning_rate": 3.5675587996755885e-06, "loss": 0.6582, "step": 12257 }, { "epoch": 0.35788736095296486, "grad_norm": 0.7244198036998472, "learning_rate": 3.5673965936739665e-06, "loss": 0.6814, "step": 12258 }, { "epoch": 0.3579165571808122, "grad_norm": 0.7450464052049056, "learning_rate": 3.567234387672344e-06, "loss": 0.6829, "step": 12259 }, { "epoch": 0.3579457534086596, "grad_norm": 0.6886438520506512, "learning_rate": 3.567072181670722e-06, "loss": 0.5721, "step": 12260 }, { "epoch": 0.35797494963650694, "grad_norm": 0.7239262202453909, "learning_rate": 3.5669099756691e-06, "loss": 0.5909, "step": 12261 }, { "epoch": 0.3580041458643543, "grad_norm": 0.7296016183809293, "learning_rate": 3.566747769667478e-06, "loss": 0.5929, "step": 12262 }, { "epoch": 0.35803334209220167, "grad_norm": 0.7355101875713819, "learning_rate": 3.5665855636658557e-06, "loss": 0.6766, "step": 12263 }, { "epoch": 0.35806253832004903, "grad_norm": 0.7416963795875806, "learning_rate": 3.5664233576642337e-06, "loss": 0.6552, "step": 12264 }, { "epoch": 0.3580917345478964, "grad_norm": 0.6814043144533956, "learning_rate": 3.5662611516626117e-06, "loss": 0.5698, "step": 12265 }, { "epoch": 0.35812093077574375, "grad_norm": 0.743101537446438, "learning_rate": 3.5660989456609897e-06, "loss": 0.6771, "step": 12266 }, { "epoch": 0.3581501270035911, "grad_norm": 0.7373862361291386, "learning_rate": 3.5659367396593673e-06, "loss": 0.6649, "step": 12267 }, { "epoch": 0.3581793232314385, "grad_norm": 0.6807290003985152, "learning_rate": 3.5657745336577453e-06, "loss": 0.6011, "step": 12268 }, { "epoch": 0.35820851945928583, "grad_norm": 0.7012447738417915, "learning_rate": 3.5656123276561233e-06, "loss": 0.6417, "step": 12269 }, { "epoch": 0.3582377156871332, "grad_norm": 0.7368427193868033, "learning_rate": 3.5654501216545017e-06, "loss": 0.6434, "step": 12270 }, { "epoch": 0.35826691191498056, "grad_norm": 0.7627818725230572, "learning_rate": 3.5652879156528797e-06, "loss": 0.6455, "step": 12271 }, { "epoch": 0.358296108142828, "grad_norm": 0.7868853925285528, "learning_rate": 3.5651257096512577e-06, "loss": 0.7128, "step": 12272 }, { "epoch": 0.35832530437067533, "grad_norm": 0.8485345302421909, "learning_rate": 3.5649635036496353e-06, "loss": 0.6211, "step": 12273 }, { "epoch": 0.3583545005985227, "grad_norm": 0.743959184658889, "learning_rate": 3.5648012976480133e-06, "loss": 0.5863, "step": 12274 }, { "epoch": 0.35838369682637006, "grad_norm": 0.8602744604464808, "learning_rate": 3.5646390916463913e-06, "loss": 0.6638, "step": 12275 }, { "epoch": 0.3584128930542174, "grad_norm": 0.8719082214626848, "learning_rate": 3.5644768856447694e-06, "loss": 0.744, "step": 12276 }, { "epoch": 0.3584420892820648, "grad_norm": 0.6927319864996231, "learning_rate": 3.5643146796431474e-06, "loss": 0.6268, "step": 12277 }, { "epoch": 0.35847128550991214, "grad_norm": 0.8140715335345248, "learning_rate": 3.564152473641525e-06, "loss": 0.7537, "step": 12278 }, { "epoch": 0.3585004817377595, "grad_norm": 0.7714858950099001, "learning_rate": 3.563990267639903e-06, "loss": 0.718, "step": 12279 }, { "epoch": 0.35852967796560686, "grad_norm": 0.767258522591139, "learning_rate": 3.563828061638281e-06, "loss": 0.6967, "step": 12280 }, { "epoch": 0.3585588741934542, "grad_norm": 0.7245375993584557, "learning_rate": 3.563665855636659e-06, "loss": 0.6748, "step": 12281 }, { "epoch": 0.3585880704213016, "grad_norm": 0.7883622721161034, "learning_rate": 3.5635036496350365e-06, "loss": 0.6645, "step": 12282 }, { "epoch": 0.35861726664914895, "grad_norm": 0.7798687345979863, "learning_rate": 3.5633414436334146e-06, "loss": 0.6158, "step": 12283 }, { "epoch": 0.3586464628769963, "grad_norm": 0.7458285767844868, "learning_rate": 3.5631792376317926e-06, "loss": 0.6922, "step": 12284 }, { "epoch": 0.35867565910484367, "grad_norm": 0.7464301751678115, "learning_rate": 3.5630170316301706e-06, "loss": 0.6984, "step": 12285 }, { "epoch": 0.35870485533269103, "grad_norm": 0.8098932193558044, "learning_rate": 3.562854825628548e-06, "loss": 0.7193, "step": 12286 }, { "epoch": 0.3587340515605384, "grad_norm": 0.7512232030646703, "learning_rate": 3.562692619626926e-06, "loss": 0.6978, "step": 12287 }, { "epoch": 0.35876324778838575, "grad_norm": 0.720482369964334, "learning_rate": 3.562530413625304e-06, "loss": 0.6635, "step": 12288 }, { "epoch": 0.3587924440162331, "grad_norm": 0.7105729391037603, "learning_rate": 3.5623682076236826e-06, "loss": 0.6214, "step": 12289 }, { "epoch": 0.3588216402440805, "grad_norm": 0.728826969056362, "learning_rate": 3.5622060016220606e-06, "loss": 0.659, "step": 12290 }, { "epoch": 0.35885083647192784, "grad_norm": 0.7117804216970464, "learning_rate": 3.5620437956204386e-06, "loss": 0.6607, "step": 12291 }, { "epoch": 0.3588800326997752, "grad_norm": 0.7312492766813428, "learning_rate": 3.561881589618816e-06, "loss": 0.6783, "step": 12292 }, { "epoch": 0.35890922892762256, "grad_norm": 0.9129630141426576, "learning_rate": 3.561719383617194e-06, "loss": 0.7309, "step": 12293 }, { "epoch": 0.3589384251554699, "grad_norm": 0.8243637028915326, "learning_rate": 3.561557177615572e-06, "loss": 0.7114, "step": 12294 }, { "epoch": 0.3589676213833173, "grad_norm": 0.7110205825046887, "learning_rate": 3.56139497161395e-06, "loss": 0.6709, "step": 12295 }, { "epoch": 0.35899681761116464, "grad_norm": 0.749776225790473, "learning_rate": 3.561232765612328e-06, "loss": 0.6881, "step": 12296 }, { "epoch": 0.359026013839012, "grad_norm": 0.7682285372982997, "learning_rate": 3.561070559610706e-06, "loss": 0.7246, "step": 12297 }, { "epoch": 0.35905521006685936, "grad_norm": 0.7174470208233421, "learning_rate": 3.560908353609084e-06, "loss": 0.6541, "step": 12298 }, { "epoch": 0.3590844062947067, "grad_norm": 0.710006237154717, "learning_rate": 3.560746147607462e-06, "loss": 0.6214, "step": 12299 }, { "epoch": 0.3591136025225541, "grad_norm": 0.7968549795037746, "learning_rate": 3.56058394160584e-06, "loss": 0.7082, "step": 12300 }, { "epoch": 0.35914279875040145, "grad_norm": 0.7717930864273919, "learning_rate": 3.5604217356042174e-06, "loss": 0.7418, "step": 12301 }, { "epoch": 0.3591719949782488, "grad_norm": 0.7080479936849313, "learning_rate": 3.5602595296025954e-06, "loss": 0.6162, "step": 12302 }, { "epoch": 0.35920119120609617, "grad_norm": 0.7376138468930128, "learning_rate": 3.5600973236009734e-06, "loss": 0.6791, "step": 12303 }, { "epoch": 0.35923038743394353, "grad_norm": 0.7623508180674885, "learning_rate": 3.5599351175993514e-06, "loss": 0.7051, "step": 12304 }, { "epoch": 0.3592595836617909, "grad_norm": 0.7595239140326432, "learning_rate": 3.559772911597729e-06, "loss": 0.5761, "step": 12305 }, { "epoch": 0.35928877988963825, "grad_norm": 0.6919238724515547, "learning_rate": 3.559610705596107e-06, "loss": 0.5882, "step": 12306 }, { "epoch": 0.3593179761174856, "grad_norm": 0.7673901263038797, "learning_rate": 3.559448499594485e-06, "loss": 0.6554, "step": 12307 }, { "epoch": 0.359347172345333, "grad_norm": 0.7865371097916676, "learning_rate": 3.5592862935928634e-06, "loss": 0.7198, "step": 12308 }, { "epoch": 0.35937636857318034, "grad_norm": 0.6933020118184879, "learning_rate": 3.5591240875912415e-06, "loss": 0.6412, "step": 12309 }, { "epoch": 0.3594055648010277, "grad_norm": 0.7022339894282116, "learning_rate": 3.5589618815896195e-06, "loss": 0.6047, "step": 12310 }, { "epoch": 0.35943476102887506, "grad_norm": 0.8021531927722363, "learning_rate": 3.558799675587997e-06, "loss": 0.7229, "step": 12311 }, { "epoch": 0.3594639572567224, "grad_norm": 0.7315044367785288, "learning_rate": 3.558637469586375e-06, "loss": 0.6289, "step": 12312 }, { "epoch": 0.3594931534845698, "grad_norm": 0.7466686680044226, "learning_rate": 3.558475263584753e-06, "loss": 0.7098, "step": 12313 }, { "epoch": 0.35952234971241714, "grad_norm": 0.7505112527879997, "learning_rate": 3.558313057583131e-06, "loss": 0.6668, "step": 12314 }, { "epoch": 0.3595515459402645, "grad_norm": 0.7513134827052462, "learning_rate": 3.5581508515815087e-06, "loss": 0.6353, "step": 12315 }, { "epoch": 0.35958074216811187, "grad_norm": 0.8652145858932228, "learning_rate": 3.5579886455798867e-06, "loss": 0.6319, "step": 12316 }, { "epoch": 0.3596099383959592, "grad_norm": 0.7404295449119807, "learning_rate": 3.5578264395782647e-06, "loss": 0.6456, "step": 12317 }, { "epoch": 0.3596391346238066, "grad_norm": 0.9313223577929111, "learning_rate": 3.5576642335766427e-06, "loss": 0.678, "step": 12318 }, { "epoch": 0.35966833085165395, "grad_norm": 0.7156726541916804, "learning_rate": 3.5575020275750207e-06, "loss": 0.667, "step": 12319 }, { "epoch": 0.3596975270795013, "grad_norm": 0.7576674984300169, "learning_rate": 3.5573398215733983e-06, "loss": 0.702, "step": 12320 }, { "epoch": 0.3597267233073487, "grad_norm": 0.7798738850402255, "learning_rate": 3.5571776155717763e-06, "loss": 0.765, "step": 12321 }, { "epoch": 0.35975591953519603, "grad_norm": 0.7274994751314252, "learning_rate": 3.5570154095701543e-06, "loss": 0.6915, "step": 12322 }, { "epoch": 0.3597851157630434, "grad_norm": 0.7030135828719106, "learning_rate": 3.5568532035685323e-06, "loss": 0.63, "step": 12323 }, { "epoch": 0.35981431199089076, "grad_norm": 0.8160827015889823, "learning_rate": 3.55669099756691e-06, "loss": 0.7474, "step": 12324 }, { "epoch": 0.3598435082187381, "grad_norm": 0.7239806375608006, "learning_rate": 3.556528791565288e-06, "loss": 0.6734, "step": 12325 }, { "epoch": 0.3598727044465855, "grad_norm": 0.7631017363772659, "learning_rate": 3.556366585563666e-06, "loss": 0.6712, "step": 12326 }, { "epoch": 0.35990190067443284, "grad_norm": 0.737440041987294, "learning_rate": 3.5562043795620443e-06, "loss": 0.649, "step": 12327 }, { "epoch": 0.3599310969022802, "grad_norm": 0.6546279147908064, "learning_rate": 3.5560421735604223e-06, "loss": 0.5832, "step": 12328 }, { "epoch": 0.35996029313012756, "grad_norm": 0.7428868270335411, "learning_rate": 3.5558799675588003e-06, "loss": 0.6711, "step": 12329 }, { "epoch": 0.3599894893579749, "grad_norm": 0.7760560065580646, "learning_rate": 3.555717761557178e-06, "loss": 0.6812, "step": 12330 }, { "epoch": 0.3600186855858223, "grad_norm": 0.7234447213789407, "learning_rate": 3.555555555555556e-06, "loss": 0.6476, "step": 12331 }, { "epoch": 0.3600478818136697, "grad_norm": 0.6771796547819986, "learning_rate": 3.555393349553934e-06, "loss": 0.5745, "step": 12332 }, { "epoch": 0.36007707804151706, "grad_norm": 0.7390465778548024, "learning_rate": 3.555231143552312e-06, "loss": 0.7146, "step": 12333 }, { "epoch": 0.3601062742693644, "grad_norm": 0.8048861307378435, "learning_rate": 3.5550689375506895e-06, "loss": 0.6859, "step": 12334 }, { "epoch": 0.3601354704972118, "grad_norm": 0.7023391801575728, "learning_rate": 3.5549067315490675e-06, "loss": 0.6294, "step": 12335 }, { "epoch": 0.36016466672505915, "grad_norm": 0.7907334331385207, "learning_rate": 3.5547445255474455e-06, "loss": 0.7478, "step": 12336 }, { "epoch": 0.3601938629529065, "grad_norm": 0.7206520617325153, "learning_rate": 3.5545823195458235e-06, "loss": 0.633, "step": 12337 }, { "epoch": 0.36022305918075387, "grad_norm": 0.8388304075640226, "learning_rate": 3.5544201135442015e-06, "loss": 0.6923, "step": 12338 }, { "epoch": 0.36025225540860123, "grad_norm": 0.7426023517438, "learning_rate": 3.554257907542579e-06, "loss": 0.6398, "step": 12339 }, { "epoch": 0.3602814516364486, "grad_norm": 0.7617381380811756, "learning_rate": 3.554095701540957e-06, "loss": 0.7307, "step": 12340 }, { "epoch": 0.36031064786429595, "grad_norm": 0.7263371379406478, "learning_rate": 3.553933495539335e-06, "loss": 0.6108, "step": 12341 }, { "epoch": 0.3603398440921433, "grad_norm": 0.7868595976570184, "learning_rate": 3.553771289537713e-06, "loss": 0.7172, "step": 12342 }, { "epoch": 0.3603690403199907, "grad_norm": 0.7442796638895561, "learning_rate": 3.5536090835360907e-06, "loss": 0.7323, "step": 12343 }, { "epoch": 0.36039823654783804, "grad_norm": 0.7140670296721926, "learning_rate": 3.5534468775344687e-06, "loss": 0.6307, "step": 12344 }, { "epoch": 0.3604274327756854, "grad_norm": 0.7466985651030443, "learning_rate": 3.5532846715328467e-06, "loss": 0.6094, "step": 12345 }, { "epoch": 0.36045662900353276, "grad_norm": 0.8151452790254106, "learning_rate": 3.553122465531225e-06, "loss": 0.6926, "step": 12346 }, { "epoch": 0.3604858252313801, "grad_norm": 0.7051054949406892, "learning_rate": 3.552960259529603e-06, "loss": 0.6482, "step": 12347 }, { "epoch": 0.3605150214592275, "grad_norm": 0.6949451029736579, "learning_rate": 3.552798053527981e-06, "loss": 0.591, "step": 12348 }, { "epoch": 0.36054421768707484, "grad_norm": 0.7021909774867969, "learning_rate": 3.5526358475263588e-06, "loss": 0.6315, "step": 12349 }, { "epoch": 0.3605734139149222, "grad_norm": 0.7123549007794963, "learning_rate": 3.5524736415247368e-06, "loss": 0.6046, "step": 12350 }, { "epoch": 0.36060261014276956, "grad_norm": 0.744232335423189, "learning_rate": 3.5523114355231148e-06, "loss": 0.655, "step": 12351 }, { "epoch": 0.3606318063706169, "grad_norm": 0.8869252609795159, "learning_rate": 3.5521492295214928e-06, "loss": 0.7021, "step": 12352 }, { "epoch": 0.3606610025984643, "grad_norm": 0.7404858628915835, "learning_rate": 3.5519870235198704e-06, "loss": 0.6787, "step": 12353 }, { "epoch": 0.36069019882631165, "grad_norm": 0.7099016499113642, "learning_rate": 3.5518248175182484e-06, "loss": 0.6419, "step": 12354 }, { "epoch": 0.360719395054159, "grad_norm": 0.8037742016034287, "learning_rate": 3.5516626115166264e-06, "loss": 0.6931, "step": 12355 }, { "epoch": 0.36074859128200637, "grad_norm": 0.7256792497470703, "learning_rate": 3.5515004055150044e-06, "loss": 0.6205, "step": 12356 }, { "epoch": 0.36077778750985373, "grad_norm": 0.6842664372993048, "learning_rate": 3.5513381995133824e-06, "loss": 0.5909, "step": 12357 }, { "epoch": 0.3608069837377011, "grad_norm": 0.7288262874642174, "learning_rate": 3.55117599351176e-06, "loss": 0.7088, "step": 12358 }, { "epoch": 0.36083617996554845, "grad_norm": 0.7190606922511006, "learning_rate": 3.551013787510138e-06, "loss": 0.62, "step": 12359 }, { "epoch": 0.3608653761933958, "grad_norm": 0.7867727416585218, "learning_rate": 3.550851581508516e-06, "loss": 0.6741, "step": 12360 }, { "epoch": 0.3608945724212432, "grad_norm": 0.7374808057093178, "learning_rate": 3.550689375506894e-06, "loss": 0.6685, "step": 12361 }, { "epoch": 0.36092376864909054, "grad_norm": 0.6870623655804895, "learning_rate": 3.5505271695052716e-06, "loss": 0.5718, "step": 12362 }, { "epoch": 0.3609529648769379, "grad_norm": 0.7514118325375345, "learning_rate": 3.5503649635036496e-06, "loss": 0.71, "step": 12363 }, { "epoch": 0.36098216110478526, "grad_norm": 0.7182314191050908, "learning_rate": 3.550202757502028e-06, "loss": 0.6423, "step": 12364 }, { "epoch": 0.3610113573326326, "grad_norm": 0.742830960421046, "learning_rate": 3.550040551500406e-06, "loss": 0.7082, "step": 12365 }, { "epoch": 0.36104055356048, "grad_norm": 0.8337649139619617, "learning_rate": 3.549878345498784e-06, "loss": 0.6363, "step": 12366 }, { "epoch": 0.36106974978832734, "grad_norm": 0.7271429775809681, "learning_rate": 3.549716139497162e-06, "loss": 0.66, "step": 12367 }, { "epoch": 0.3610989460161747, "grad_norm": 0.7519252453057294, "learning_rate": 3.5495539334955396e-06, "loss": 0.6275, "step": 12368 }, { "epoch": 0.36112814224402207, "grad_norm": 0.7127603512729046, "learning_rate": 3.5493917274939176e-06, "loss": 0.6471, "step": 12369 }, { "epoch": 0.3611573384718694, "grad_norm": 0.8103095730227399, "learning_rate": 3.5492295214922956e-06, "loss": 0.7206, "step": 12370 }, { "epoch": 0.3611865346997168, "grad_norm": 0.7116211321643803, "learning_rate": 3.5490673154906736e-06, "loss": 0.6167, "step": 12371 }, { "epoch": 0.36121573092756415, "grad_norm": 0.8031573447790225, "learning_rate": 3.5489051094890512e-06, "loss": 0.7549, "step": 12372 }, { "epoch": 0.3612449271554115, "grad_norm": 0.8564555271843421, "learning_rate": 3.5487429034874292e-06, "loss": 0.6988, "step": 12373 }, { "epoch": 0.36127412338325887, "grad_norm": 0.7663644246049787, "learning_rate": 3.5485806974858072e-06, "loss": 0.6932, "step": 12374 }, { "epoch": 0.36130331961110623, "grad_norm": 0.7528303742061364, "learning_rate": 3.5484184914841852e-06, "loss": 0.6724, "step": 12375 }, { "epoch": 0.3613325158389536, "grad_norm": 0.7823195003389389, "learning_rate": 3.5482562854825633e-06, "loss": 0.7239, "step": 12376 }, { "epoch": 0.36136171206680096, "grad_norm": 0.7237757529002441, "learning_rate": 3.548094079480941e-06, "loss": 0.6208, "step": 12377 }, { "epoch": 0.3613909082946483, "grad_norm": 0.8241400390071871, "learning_rate": 3.547931873479319e-06, "loss": 0.7279, "step": 12378 }, { "epoch": 0.3614201045224957, "grad_norm": 0.6928119898392832, "learning_rate": 3.547769667477697e-06, "loss": 0.6094, "step": 12379 }, { "epoch": 0.36144930075034304, "grad_norm": 0.7750273771519922, "learning_rate": 3.547607461476075e-06, "loss": 0.6596, "step": 12380 }, { "epoch": 0.3614784969781904, "grad_norm": 0.6802457737397037, "learning_rate": 3.5474452554744524e-06, "loss": 0.5669, "step": 12381 }, { "epoch": 0.36150769320603776, "grad_norm": 0.7644823317342143, "learning_rate": 3.5472830494728304e-06, "loss": 0.7038, "step": 12382 }, { "epoch": 0.3615368894338851, "grad_norm": 0.6990321268626638, "learning_rate": 3.547120843471209e-06, "loss": 0.6025, "step": 12383 }, { "epoch": 0.3615660856617325, "grad_norm": 0.7024026366906877, "learning_rate": 3.546958637469587e-06, "loss": 0.6048, "step": 12384 }, { "epoch": 0.36159528188957984, "grad_norm": 0.788526565473485, "learning_rate": 3.546796431467965e-06, "loss": 0.7382, "step": 12385 }, { "epoch": 0.3616244781174272, "grad_norm": 0.761351232653329, "learning_rate": 3.546634225466343e-06, "loss": 0.6644, "step": 12386 }, { "epoch": 0.36165367434527457, "grad_norm": 0.6855966781776708, "learning_rate": 3.5464720194647205e-06, "loss": 0.5763, "step": 12387 }, { "epoch": 0.36168287057312193, "grad_norm": 0.7032597005404901, "learning_rate": 3.5463098134630985e-06, "loss": 0.6494, "step": 12388 }, { "epoch": 0.3617120668009693, "grad_norm": 0.747697412120385, "learning_rate": 3.5461476074614765e-06, "loss": 0.6925, "step": 12389 }, { "epoch": 0.36174126302881665, "grad_norm": 0.7590072989401286, "learning_rate": 3.5459854014598545e-06, "loss": 0.6724, "step": 12390 }, { "epoch": 0.361770459256664, "grad_norm": 0.720605576238571, "learning_rate": 3.545823195458232e-06, "loss": 0.6717, "step": 12391 }, { "epoch": 0.36179965548451143, "grad_norm": 0.7554708264092231, "learning_rate": 3.54566098945661e-06, "loss": 0.6598, "step": 12392 }, { "epoch": 0.3618288517123588, "grad_norm": 0.7352798796213535, "learning_rate": 3.545498783454988e-06, "loss": 0.652, "step": 12393 }, { "epoch": 0.36185804794020615, "grad_norm": 0.7974563964898174, "learning_rate": 3.545336577453366e-06, "loss": 0.7671, "step": 12394 }, { "epoch": 0.3618872441680535, "grad_norm": 0.7815253153712539, "learning_rate": 3.545174371451744e-06, "loss": 0.6675, "step": 12395 }, { "epoch": 0.3619164403959009, "grad_norm": 0.7578130719948796, "learning_rate": 3.5450121654501217e-06, "loss": 0.6888, "step": 12396 }, { "epoch": 0.36194563662374823, "grad_norm": 0.7270701031991306, "learning_rate": 3.5448499594484997e-06, "loss": 0.6555, "step": 12397 }, { "epoch": 0.3619748328515956, "grad_norm": 0.707093434649568, "learning_rate": 3.5446877534468777e-06, "loss": 0.6334, "step": 12398 }, { "epoch": 0.36200402907944296, "grad_norm": 0.7392999959625662, "learning_rate": 3.5445255474452557e-06, "loss": 0.7126, "step": 12399 }, { "epoch": 0.3620332253072903, "grad_norm": 0.769722929439523, "learning_rate": 3.5443633414436333e-06, "loss": 0.6845, "step": 12400 }, { "epoch": 0.3620624215351377, "grad_norm": 0.6813269486346555, "learning_rate": 3.5442011354420113e-06, "loss": 0.599, "step": 12401 }, { "epoch": 0.36209161776298504, "grad_norm": 0.7085912297142941, "learning_rate": 3.5440389294403897e-06, "loss": 0.6401, "step": 12402 }, { "epoch": 0.3621208139908324, "grad_norm": 0.754520192068635, "learning_rate": 3.5438767234387677e-06, "loss": 0.6893, "step": 12403 }, { "epoch": 0.36215001021867976, "grad_norm": 0.7771204472337152, "learning_rate": 3.5437145174371457e-06, "loss": 0.6447, "step": 12404 }, { "epoch": 0.3621792064465271, "grad_norm": 0.7583121300704431, "learning_rate": 3.5435523114355238e-06, "loss": 0.698, "step": 12405 }, { "epoch": 0.3622084026743745, "grad_norm": 0.7249776366180768, "learning_rate": 3.5433901054339013e-06, "loss": 0.6451, "step": 12406 }, { "epoch": 0.36223759890222185, "grad_norm": 0.7635355911300744, "learning_rate": 3.5432278994322793e-06, "loss": 0.7144, "step": 12407 }, { "epoch": 0.3622667951300692, "grad_norm": 0.7512440151782234, "learning_rate": 3.5430656934306574e-06, "loss": 0.6189, "step": 12408 }, { "epoch": 0.36229599135791657, "grad_norm": 0.8708708779234409, "learning_rate": 3.5429034874290354e-06, "loss": 0.7531, "step": 12409 }, { "epoch": 0.36232518758576393, "grad_norm": 0.7447918019349824, "learning_rate": 3.542741281427413e-06, "loss": 0.661, "step": 12410 }, { "epoch": 0.3623543838136113, "grad_norm": 0.6752665579472711, "learning_rate": 3.542579075425791e-06, "loss": 0.6045, "step": 12411 }, { "epoch": 0.36238358004145865, "grad_norm": 0.8391101041212201, "learning_rate": 3.542416869424169e-06, "loss": 0.743, "step": 12412 }, { "epoch": 0.362412776269306, "grad_norm": 0.7344756873037857, "learning_rate": 3.542254663422547e-06, "loss": 0.6743, "step": 12413 }, { "epoch": 0.3624419724971534, "grad_norm": 0.7086577621292867, "learning_rate": 3.542092457420925e-06, "loss": 0.6494, "step": 12414 }, { "epoch": 0.36247116872500074, "grad_norm": 0.7896573520506373, "learning_rate": 3.5419302514193026e-06, "loss": 0.7344, "step": 12415 }, { "epoch": 0.3625003649528481, "grad_norm": 0.6691589393776265, "learning_rate": 3.5417680454176806e-06, "loss": 0.6319, "step": 12416 }, { "epoch": 0.36252956118069546, "grad_norm": 0.731905963227778, "learning_rate": 3.5416058394160586e-06, "loss": 0.6499, "step": 12417 }, { "epoch": 0.3625587574085428, "grad_norm": 0.6765029067734759, "learning_rate": 3.5414436334144366e-06, "loss": 0.5788, "step": 12418 }, { "epoch": 0.3625879536363902, "grad_norm": 0.7979097966149652, "learning_rate": 3.541281427412814e-06, "loss": 0.7042, "step": 12419 }, { "epoch": 0.36261714986423754, "grad_norm": 0.8650580673430299, "learning_rate": 3.541119221411192e-06, "loss": 0.655, "step": 12420 }, { "epoch": 0.3626463460920849, "grad_norm": 0.9254329170182217, "learning_rate": 3.5409570154095706e-06, "loss": 0.6121, "step": 12421 }, { "epoch": 0.36267554231993226, "grad_norm": 0.7174465427814345, "learning_rate": 3.5407948094079486e-06, "loss": 0.6385, "step": 12422 }, { "epoch": 0.3627047385477796, "grad_norm": 0.8545968552839583, "learning_rate": 3.5406326034063266e-06, "loss": 0.6863, "step": 12423 }, { "epoch": 0.362733934775627, "grad_norm": 0.7038303902746452, "learning_rate": 3.5404703974047046e-06, "loss": 0.6287, "step": 12424 }, { "epoch": 0.36276313100347435, "grad_norm": 0.6983699082041449, "learning_rate": 3.540308191403082e-06, "loss": 0.5727, "step": 12425 }, { "epoch": 0.3627923272313217, "grad_norm": 0.695720872958868, "learning_rate": 3.54014598540146e-06, "loss": 0.6226, "step": 12426 }, { "epoch": 0.36282152345916907, "grad_norm": 0.7104373546804377, "learning_rate": 3.5399837793998382e-06, "loss": 0.6089, "step": 12427 }, { "epoch": 0.36285071968701643, "grad_norm": 0.7407240179439366, "learning_rate": 3.5398215733982162e-06, "loss": 0.6557, "step": 12428 }, { "epoch": 0.3628799159148638, "grad_norm": 0.8269993287982341, "learning_rate": 3.539659367396594e-06, "loss": 0.6285, "step": 12429 }, { "epoch": 0.36290911214271115, "grad_norm": 0.7759447154663487, "learning_rate": 3.539497161394972e-06, "loss": 0.7163, "step": 12430 }, { "epoch": 0.3629383083705585, "grad_norm": 0.7734829168832316, "learning_rate": 3.53933495539335e-06, "loss": 0.7022, "step": 12431 }, { "epoch": 0.3629675045984059, "grad_norm": 0.7738885204904979, "learning_rate": 3.539172749391728e-06, "loss": 0.6356, "step": 12432 }, { "epoch": 0.36299670082625324, "grad_norm": 0.6518275403883212, "learning_rate": 3.539010543390106e-06, "loss": 0.5406, "step": 12433 }, { "epoch": 0.3630258970541006, "grad_norm": 0.8949491516660679, "learning_rate": 3.5388483373884834e-06, "loss": 0.6492, "step": 12434 }, { "epoch": 0.36305509328194796, "grad_norm": 0.732898140032396, "learning_rate": 3.5386861313868614e-06, "loss": 0.6557, "step": 12435 }, { "epoch": 0.3630842895097953, "grad_norm": 0.7987867152911714, "learning_rate": 3.5385239253852394e-06, "loss": 0.7621, "step": 12436 }, { "epoch": 0.3631134857376427, "grad_norm": 0.6955593419532703, "learning_rate": 3.5383617193836174e-06, "loss": 0.6061, "step": 12437 }, { "epoch": 0.36314268196549004, "grad_norm": 1.0961251791754747, "learning_rate": 3.538199513381995e-06, "loss": 0.6734, "step": 12438 }, { "epoch": 0.3631718781933374, "grad_norm": 0.759830573023102, "learning_rate": 3.538037307380373e-06, "loss": 0.7124, "step": 12439 }, { "epoch": 0.36320107442118477, "grad_norm": 0.7470815296761141, "learning_rate": 3.5378751013787515e-06, "loss": 0.726, "step": 12440 }, { "epoch": 0.3632302706490321, "grad_norm": 0.7890571379966567, "learning_rate": 3.5377128953771295e-06, "loss": 0.7786, "step": 12441 }, { "epoch": 0.3632594668768795, "grad_norm": 0.7319013427526403, "learning_rate": 3.5375506893755075e-06, "loss": 0.6621, "step": 12442 }, { "epoch": 0.36328866310472685, "grad_norm": 0.7318123070531792, "learning_rate": 3.5373884833738855e-06, "loss": 0.6614, "step": 12443 }, { "epoch": 0.3633178593325742, "grad_norm": 0.7486464816487597, "learning_rate": 3.537226277372263e-06, "loss": 0.6495, "step": 12444 }, { "epoch": 0.3633470555604216, "grad_norm": 0.7073349700755008, "learning_rate": 3.537064071370641e-06, "loss": 0.6174, "step": 12445 }, { "epoch": 0.36337625178826893, "grad_norm": 0.7465425442396975, "learning_rate": 3.536901865369019e-06, "loss": 0.6969, "step": 12446 }, { "epoch": 0.3634054480161163, "grad_norm": 0.7403450971582224, "learning_rate": 3.536739659367397e-06, "loss": 0.697, "step": 12447 }, { "epoch": 0.36343464424396366, "grad_norm": 0.7353370959309736, "learning_rate": 3.5365774533657747e-06, "loss": 0.6933, "step": 12448 }, { "epoch": 0.363463840471811, "grad_norm": 0.743471917461143, "learning_rate": 3.5364152473641527e-06, "loss": 0.6789, "step": 12449 }, { "epoch": 0.3634930366996584, "grad_norm": 0.7604890661456183, "learning_rate": 3.5362530413625307e-06, "loss": 0.6979, "step": 12450 }, { "epoch": 0.36352223292750574, "grad_norm": 0.7415137647627144, "learning_rate": 3.5360908353609087e-06, "loss": 0.676, "step": 12451 }, { "epoch": 0.3635514291553531, "grad_norm": 0.7410143766416323, "learning_rate": 3.5359286293592867e-06, "loss": 0.6305, "step": 12452 }, { "epoch": 0.3635806253832005, "grad_norm": 0.7460669366246396, "learning_rate": 3.5357664233576643e-06, "loss": 0.644, "step": 12453 }, { "epoch": 0.3636098216110479, "grad_norm": 0.688116499114475, "learning_rate": 3.5356042173560423e-06, "loss": 0.5842, "step": 12454 }, { "epoch": 0.36363901783889524, "grad_norm": 0.7103835579808213, "learning_rate": 3.5354420113544203e-06, "loss": 0.6145, "step": 12455 }, { "epoch": 0.3636682140667426, "grad_norm": 0.7483208136657529, "learning_rate": 3.5352798053527983e-06, "loss": 0.6818, "step": 12456 }, { "epoch": 0.36369741029458996, "grad_norm": 0.7420918699906118, "learning_rate": 3.535117599351176e-06, "loss": 0.6878, "step": 12457 }, { "epoch": 0.3637266065224373, "grad_norm": 1.0273716930257084, "learning_rate": 3.534955393349554e-06, "loss": 0.6433, "step": 12458 }, { "epoch": 0.3637558027502847, "grad_norm": 0.7096477635400609, "learning_rate": 3.5347931873479323e-06, "loss": 0.5921, "step": 12459 }, { "epoch": 0.36378499897813205, "grad_norm": 0.724220951752342, "learning_rate": 3.5346309813463103e-06, "loss": 0.6288, "step": 12460 }, { "epoch": 0.3638141952059794, "grad_norm": 0.717184095822007, "learning_rate": 3.5344687753446883e-06, "loss": 0.6609, "step": 12461 }, { "epoch": 0.36384339143382677, "grad_norm": 0.7365383468979615, "learning_rate": 3.5343065693430663e-06, "loss": 0.6349, "step": 12462 }, { "epoch": 0.36387258766167413, "grad_norm": 0.7397665599594829, "learning_rate": 3.534144363341444e-06, "loss": 0.6587, "step": 12463 }, { "epoch": 0.3639017838895215, "grad_norm": 0.7043626479573405, "learning_rate": 3.533982157339822e-06, "loss": 0.603, "step": 12464 }, { "epoch": 0.36393098011736885, "grad_norm": 0.7111053138554296, "learning_rate": 3.5338199513382e-06, "loss": 0.6343, "step": 12465 }, { "epoch": 0.3639601763452162, "grad_norm": 0.7477007841798877, "learning_rate": 3.533657745336578e-06, "loss": 0.6486, "step": 12466 }, { "epoch": 0.3639893725730636, "grad_norm": 0.786799107685349, "learning_rate": 3.5334955393349555e-06, "loss": 0.7882, "step": 12467 }, { "epoch": 0.36401856880091094, "grad_norm": 0.7893236805334519, "learning_rate": 3.5333333333333335e-06, "loss": 0.7429, "step": 12468 }, { "epoch": 0.3640477650287583, "grad_norm": 0.6935857859064379, "learning_rate": 3.5331711273317115e-06, "loss": 0.581, "step": 12469 }, { "epoch": 0.36407696125660566, "grad_norm": 0.7022734106207555, "learning_rate": 3.5330089213300895e-06, "loss": 0.6336, "step": 12470 }, { "epoch": 0.364106157484453, "grad_norm": 0.7231216032316551, "learning_rate": 3.5328467153284675e-06, "loss": 0.6636, "step": 12471 }, { "epoch": 0.3641353537123004, "grad_norm": 0.6908039941440623, "learning_rate": 3.532684509326845e-06, "loss": 0.5663, "step": 12472 }, { "epoch": 0.36416454994014774, "grad_norm": 0.7783212617473388, "learning_rate": 3.532522303325223e-06, "loss": 0.6252, "step": 12473 }, { "epoch": 0.3641937461679951, "grad_norm": 0.72179290567722, "learning_rate": 3.532360097323601e-06, "loss": 0.6237, "step": 12474 }, { "epoch": 0.36422294239584246, "grad_norm": 0.7422677194186295, "learning_rate": 3.532197891321979e-06, "loss": 0.6842, "step": 12475 }, { "epoch": 0.3642521386236898, "grad_norm": 0.6592152284649927, "learning_rate": 3.5320356853203567e-06, "loss": 0.5404, "step": 12476 }, { "epoch": 0.3642813348515372, "grad_norm": 0.9825033130677093, "learning_rate": 3.5318734793187347e-06, "loss": 0.6855, "step": 12477 }, { "epoch": 0.36431053107938455, "grad_norm": 0.7299505452086354, "learning_rate": 3.531711273317113e-06, "loss": 0.683, "step": 12478 }, { "epoch": 0.3643397273072319, "grad_norm": 0.7617297433318613, "learning_rate": 3.531549067315491e-06, "loss": 0.7005, "step": 12479 }, { "epoch": 0.36436892353507927, "grad_norm": 0.7968514132297426, "learning_rate": 3.531386861313869e-06, "loss": 0.6644, "step": 12480 }, { "epoch": 0.36439811976292663, "grad_norm": 0.6699773134551165, "learning_rate": 3.531224655312247e-06, "loss": 0.5443, "step": 12481 }, { "epoch": 0.364427315990774, "grad_norm": 0.737439082121448, "learning_rate": 3.5310624493106248e-06, "loss": 0.6866, "step": 12482 }, { "epoch": 0.36445651221862135, "grad_norm": 0.7807869075382899, "learning_rate": 3.5309002433090028e-06, "loss": 0.7227, "step": 12483 }, { "epoch": 0.3644857084464687, "grad_norm": 0.7718053068176091, "learning_rate": 3.5307380373073808e-06, "loss": 0.7019, "step": 12484 }, { "epoch": 0.3645149046743161, "grad_norm": 0.6959273535543579, "learning_rate": 3.530575831305759e-06, "loss": 0.6038, "step": 12485 }, { "epoch": 0.36454410090216344, "grad_norm": 0.8149123383493435, "learning_rate": 3.5304136253041364e-06, "loss": 0.726, "step": 12486 }, { "epoch": 0.3645732971300108, "grad_norm": 0.7331482392987727, "learning_rate": 3.5302514193025144e-06, "loss": 0.5724, "step": 12487 }, { "epoch": 0.36460249335785816, "grad_norm": 0.7347231066210553, "learning_rate": 3.5300892133008924e-06, "loss": 0.6688, "step": 12488 }, { "epoch": 0.3646316895857055, "grad_norm": 0.743070260179741, "learning_rate": 3.5299270072992704e-06, "loss": 0.6617, "step": 12489 }, { "epoch": 0.3646608858135529, "grad_norm": 0.873876490160085, "learning_rate": 3.5297648012976484e-06, "loss": 0.672, "step": 12490 }, { "epoch": 0.36469008204140024, "grad_norm": 0.7611405892633031, "learning_rate": 3.529602595296026e-06, "loss": 0.7032, "step": 12491 }, { "epoch": 0.3647192782692476, "grad_norm": 0.7240845624737327, "learning_rate": 3.529440389294404e-06, "loss": 0.6572, "step": 12492 }, { "epoch": 0.36474847449709497, "grad_norm": 0.7737629223416163, "learning_rate": 3.529278183292782e-06, "loss": 0.7308, "step": 12493 }, { "epoch": 0.3647776707249423, "grad_norm": 0.7517616108191125, "learning_rate": 3.52911597729116e-06, "loss": 0.6816, "step": 12494 }, { "epoch": 0.3648068669527897, "grad_norm": 1.005262665323193, "learning_rate": 3.5289537712895376e-06, "loss": 0.7364, "step": 12495 }, { "epoch": 0.36483606318063705, "grad_norm": 0.6959631200296791, "learning_rate": 3.5287915652879156e-06, "loss": 0.6217, "step": 12496 }, { "epoch": 0.3648652594084844, "grad_norm": 0.7177947961941329, "learning_rate": 3.528629359286294e-06, "loss": 0.6694, "step": 12497 }, { "epoch": 0.36489445563633177, "grad_norm": 0.7396825219260417, "learning_rate": 3.528467153284672e-06, "loss": 0.6872, "step": 12498 }, { "epoch": 0.36492365186417913, "grad_norm": 0.8016183559368489, "learning_rate": 3.52830494728305e-06, "loss": 0.7513, "step": 12499 }, { "epoch": 0.3649528480920265, "grad_norm": 0.7401534112796823, "learning_rate": 3.528142741281428e-06, "loss": 0.6585, "step": 12500 }, { "epoch": 0.36498204431987386, "grad_norm": 0.6744045054204738, "learning_rate": 3.5279805352798056e-06, "loss": 0.5544, "step": 12501 }, { "epoch": 0.3650112405477212, "grad_norm": 0.8526114577196795, "learning_rate": 3.5278183292781836e-06, "loss": 0.7937, "step": 12502 }, { "epoch": 0.3650404367755686, "grad_norm": 0.8739049698882829, "learning_rate": 3.5276561232765616e-06, "loss": 0.6371, "step": 12503 }, { "epoch": 0.36506963300341594, "grad_norm": 0.7016707965000858, "learning_rate": 3.5274939172749397e-06, "loss": 0.6196, "step": 12504 }, { "epoch": 0.3650988292312633, "grad_norm": 0.7522281285778231, "learning_rate": 3.5273317112733172e-06, "loss": 0.707, "step": 12505 }, { "epoch": 0.36512802545911066, "grad_norm": 0.7099416511947947, "learning_rate": 3.5271695052716952e-06, "loss": 0.6662, "step": 12506 }, { "epoch": 0.365157221686958, "grad_norm": 0.7052713585049153, "learning_rate": 3.5270072992700733e-06, "loss": 0.5541, "step": 12507 }, { "epoch": 0.3651864179148054, "grad_norm": 0.7754583248083589, "learning_rate": 3.5268450932684513e-06, "loss": 0.6483, "step": 12508 }, { "epoch": 0.36521561414265274, "grad_norm": 0.7317442043123242, "learning_rate": 3.5266828872668293e-06, "loss": 0.6424, "step": 12509 }, { "epoch": 0.3652448103705001, "grad_norm": 0.7753744560115323, "learning_rate": 3.526520681265207e-06, "loss": 0.6932, "step": 12510 }, { "epoch": 0.36527400659834747, "grad_norm": 0.7863986454207453, "learning_rate": 3.526358475263585e-06, "loss": 0.6226, "step": 12511 }, { "epoch": 0.36530320282619483, "grad_norm": 0.7436134062174035, "learning_rate": 3.526196269261963e-06, "loss": 0.6694, "step": 12512 }, { "epoch": 0.36533239905404225, "grad_norm": 0.7345604832848701, "learning_rate": 3.526034063260341e-06, "loss": 0.6459, "step": 12513 }, { "epoch": 0.3653615952818896, "grad_norm": 0.7580688221744974, "learning_rate": 3.5258718572587185e-06, "loss": 0.6757, "step": 12514 }, { "epoch": 0.36539079150973697, "grad_norm": 0.7541591313758702, "learning_rate": 3.525709651257097e-06, "loss": 0.7058, "step": 12515 }, { "epoch": 0.36541998773758433, "grad_norm": 0.7029495024261435, "learning_rate": 3.525547445255475e-06, "loss": 0.5892, "step": 12516 }, { "epoch": 0.3654491839654317, "grad_norm": 0.7710197662058554, "learning_rate": 3.525385239253853e-06, "loss": 0.748, "step": 12517 }, { "epoch": 0.36547838019327905, "grad_norm": 0.7115212322938668, "learning_rate": 3.525223033252231e-06, "loss": 0.6291, "step": 12518 }, { "epoch": 0.3655075764211264, "grad_norm": 0.7193357972011892, "learning_rate": 3.525060827250609e-06, "loss": 0.632, "step": 12519 }, { "epoch": 0.3655367726489738, "grad_norm": 0.7595336271513724, "learning_rate": 3.5248986212489865e-06, "loss": 0.6698, "step": 12520 }, { "epoch": 0.36556596887682113, "grad_norm": 0.65925844232698, "learning_rate": 3.5247364152473645e-06, "loss": 0.5769, "step": 12521 }, { "epoch": 0.3655951651046685, "grad_norm": 0.8396216794661889, "learning_rate": 3.5245742092457425e-06, "loss": 0.7063, "step": 12522 }, { "epoch": 0.36562436133251586, "grad_norm": 0.7783107599276488, "learning_rate": 3.5244120032441205e-06, "loss": 0.6316, "step": 12523 }, { "epoch": 0.3656535575603632, "grad_norm": 0.7177227874059828, "learning_rate": 3.524249797242498e-06, "loss": 0.609, "step": 12524 }, { "epoch": 0.3656827537882106, "grad_norm": 0.802953746491086, "learning_rate": 3.524087591240876e-06, "loss": 0.6956, "step": 12525 }, { "epoch": 0.36571195001605794, "grad_norm": 0.8233082518759723, "learning_rate": 3.523925385239254e-06, "loss": 0.7009, "step": 12526 }, { "epoch": 0.3657411462439053, "grad_norm": 0.7584397540230947, "learning_rate": 3.523763179237632e-06, "loss": 0.6992, "step": 12527 }, { "epoch": 0.36577034247175266, "grad_norm": 0.7948774299622596, "learning_rate": 3.52360097323601e-06, "loss": 0.7459, "step": 12528 }, { "epoch": 0.3657995386996, "grad_norm": 0.7936391934086175, "learning_rate": 3.5234387672343877e-06, "loss": 0.7515, "step": 12529 }, { "epoch": 0.3658287349274474, "grad_norm": 0.753199619775106, "learning_rate": 3.5232765612327657e-06, "loss": 0.7118, "step": 12530 }, { "epoch": 0.36585793115529475, "grad_norm": 0.725184109907027, "learning_rate": 3.5231143552311437e-06, "loss": 0.6429, "step": 12531 }, { "epoch": 0.3658871273831421, "grad_norm": 0.7149864386666945, "learning_rate": 3.5229521492295217e-06, "loss": 0.6571, "step": 12532 }, { "epoch": 0.36591632361098947, "grad_norm": 0.7922015598852508, "learning_rate": 3.5227899432278993e-06, "loss": 0.7637, "step": 12533 }, { "epoch": 0.36594551983883683, "grad_norm": 0.750782811604766, "learning_rate": 3.5226277372262777e-06, "loss": 0.6375, "step": 12534 }, { "epoch": 0.3659747160666842, "grad_norm": 0.8131406508926741, "learning_rate": 3.5224655312246557e-06, "loss": 0.6458, "step": 12535 }, { "epoch": 0.36600391229453155, "grad_norm": 0.7813017530059112, "learning_rate": 3.5223033252230338e-06, "loss": 0.7591, "step": 12536 }, { "epoch": 0.3660331085223789, "grad_norm": 0.7048973916716178, "learning_rate": 3.5221411192214118e-06, "loss": 0.6293, "step": 12537 }, { "epoch": 0.3660623047502263, "grad_norm": 0.7882314093693036, "learning_rate": 3.5219789132197898e-06, "loss": 0.7422, "step": 12538 }, { "epoch": 0.36609150097807364, "grad_norm": 0.8050508833492042, "learning_rate": 3.5218167072181674e-06, "loss": 0.768, "step": 12539 }, { "epoch": 0.366120697205921, "grad_norm": 0.7815977798551508, "learning_rate": 3.5216545012165454e-06, "loss": 0.7623, "step": 12540 }, { "epoch": 0.36614989343376836, "grad_norm": 0.6804731875899517, "learning_rate": 3.5214922952149234e-06, "loss": 0.5987, "step": 12541 }, { "epoch": 0.3661790896616157, "grad_norm": 0.7406589304460658, "learning_rate": 3.5213300892133014e-06, "loss": 0.6515, "step": 12542 }, { "epoch": 0.3662082858894631, "grad_norm": 0.7354555481501963, "learning_rate": 3.521167883211679e-06, "loss": 0.7182, "step": 12543 }, { "epoch": 0.36623748211731044, "grad_norm": 0.7165061405939982, "learning_rate": 3.521005677210057e-06, "loss": 0.6364, "step": 12544 }, { "epoch": 0.3662666783451578, "grad_norm": 0.7753037434376033, "learning_rate": 3.520843471208435e-06, "loss": 0.5906, "step": 12545 }, { "epoch": 0.36629587457300516, "grad_norm": 0.6762473277523201, "learning_rate": 3.520681265206813e-06, "loss": 0.6073, "step": 12546 }, { "epoch": 0.3663250708008525, "grad_norm": 0.6867526749329244, "learning_rate": 3.520519059205191e-06, "loss": 0.5738, "step": 12547 }, { "epoch": 0.3663542670286999, "grad_norm": 0.7230317801214952, "learning_rate": 3.5203568532035686e-06, "loss": 0.6523, "step": 12548 }, { "epoch": 0.36638346325654725, "grad_norm": 0.7842581080309379, "learning_rate": 3.5201946472019466e-06, "loss": 0.717, "step": 12549 }, { "epoch": 0.3664126594843946, "grad_norm": 0.7427994533124782, "learning_rate": 3.5200324412003246e-06, "loss": 0.6598, "step": 12550 }, { "epoch": 0.36644185571224197, "grad_norm": 0.6734800524827829, "learning_rate": 3.5198702351987026e-06, "loss": 0.576, "step": 12551 }, { "epoch": 0.36647105194008933, "grad_norm": 0.7616255558575343, "learning_rate": 3.51970802919708e-06, "loss": 0.6976, "step": 12552 }, { "epoch": 0.3665002481679367, "grad_norm": 0.7619110222546998, "learning_rate": 3.5195458231954586e-06, "loss": 0.6313, "step": 12553 }, { "epoch": 0.36652944439578405, "grad_norm": 0.7326331882431498, "learning_rate": 3.5193836171938366e-06, "loss": 0.6803, "step": 12554 }, { "epoch": 0.3665586406236314, "grad_norm": 0.7848258391610098, "learning_rate": 3.5192214111922146e-06, "loss": 0.6278, "step": 12555 }, { "epoch": 0.3665878368514788, "grad_norm": 0.7459198559079078, "learning_rate": 3.5190592051905926e-06, "loss": 0.6916, "step": 12556 }, { "epoch": 0.36661703307932614, "grad_norm": 0.6999685216252032, "learning_rate": 3.5188969991889706e-06, "loss": 0.6055, "step": 12557 }, { "epoch": 0.3666462293071735, "grad_norm": 0.6572736260695565, "learning_rate": 3.518734793187348e-06, "loss": 0.5767, "step": 12558 }, { "epoch": 0.36667542553502086, "grad_norm": 0.7407014825103737, "learning_rate": 3.5185725871857262e-06, "loss": 0.6622, "step": 12559 }, { "epoch": 0.3667046217628682, "grad_norm": 0.7170830497609588, "learning_rate": 3.5184103811841042e-06, "loss": 0.6758, "step": 12560 }, { "epoch": 0.3667338179907156, "grad_norm": 0.7573621267274923, "learning_rate": 3.5182481751824822e-06, "loss": 0.6951, "step": 12561 }, { "epoch": 0.36676301421856294, "grad_norm": 0.7116482915755176, "learning_rate": 3.51808596918086e-06, "loss": 0.6517, "step": 12562 }, { "epoch": 0.3667922104464103, "grad_norm": 0.772877070823114, "learning_rate": 3.517923763179238e-06, "loss": 0.7193, "step": 12563 }, { "epoch": 0.36682140667425767, "grad_norm": 0.6700359850367038, "learning_rate": 3.517761557177616e-06, "loss": 0.5265, "step": 12564 }, { "epoch": 0.366850602902105, "grad_norm": 0.7431503045369646, "learning_rate": 3.517599351175994e-06, "loss": 0.6618, "step": 12565 }, { "epoch": 0.3668797991299524, "grad_norm": 0.7324910351887328, "learning_rate": 3.5174371451743714e-06, "loss": 0.6848, "step": 12566 }, { "epoch": 0.36690899535779975, "grad_norm": 0.7985364473252283, "learning_rate": 3.5172749391727494e-06, "loss": 0.7155, "step": 12567 }, { "epoch": 0.3669381915856471, "grad_norm": 0.7932440926025577, "learning_rate": 3.5171127331711274e-06, "loss": 0.6842, "step": 12568 }, { "epoch": 0.36696738781349447, "grad_norm": 0.7040305880751845, "learning_rate": 3.5169505271695054e-06, "loss": 0.614, "step": 12569 }, { "epoch": 0.36699658404134183, "grad_norm": 0.6845418571964997, "learning_rate": 3.5167883211678834e-06, "loss": 0.5907, "step": 12570 }, { "epoch": 0.3670257802691892, "grad_norm": 0.7136124039743427, "learning_rate": 3.516626115166261e-06, "loss": 0.5956, "step": 12571 }, { "epoch": 0.36705497649703656, "grad_norm": 0.7081705337583948, "learning_rate": 3.5164639091646395e-06, "loss": 0.6266, "step": 12572 }, { "epoch": 0.367084172724884, "grad_norm": 0.7093199636111834, "learning_rate": 3.5163017031630175e-06, "loss": 0.643, "step": 12573 }, { "epoch": 0.36711336895273133, "grad_norm": 0.7674912525898459, "learning_rate": 3.5161394971613955e-06, "loss": 0.678, "step": 12574 }, { "epoch": 0.3671425651805787, "grad_norm": 0.7641354583445308, "learning_rate": 3.5159772911597735e-06, "loss": 0.7142, "step": 12575 }, { "epoch": 0.36717176140842606, "grad_norm": 0.7180716391904601, "learning_rate": 3.5158150851581515e-06, "loss": 0.6426, "step": 12576 }, { "epoch": 0.3672009576362734, "grad_norm": 0.7711298450053613, "learning_rate": 3.515652879156529e-06, "loss": 0.7037, "step": 12577 }, { "epoch": 0.3672301538641208, "grad_norm": 0.75736926267235, "learning_rate": 3.515490673154907e-06, "loss": 0.6258, "step": 12578 }, { "epoch": 0.36725935009196814, "grad_norm": 0.7005915558743563, "learning_rate": 3.515328467153285e-06, "loss": 0.6412, "step": 12579 }, { "epoch": 0.3672885463198155, "grad_norm": 0.7803354860973316, "learning_rate": 3.515166261151663e-06, "loss": 0.7565, "step": 12580 }, { "epoch": 0.36731774254766286, "grad_norm": 0.6955932133043523, "learning_rate": 3.5150040551500407e-06, "loss": 0.6064, "step": 12581 }, { "epoch": 0.3673469387755102, "grad_norm": 0.698409383598462, "learning_rate": 3.5148418491484187e-06, "loss": 0.6338, "step": 12582 }, { "epoch": 0.3673761350033576, "grad_norm": 0.752859867614006, "learning_rate": 3.5146796431467967e-06, "loss": 0.6704, "step": 12583 }, { "epoch": 0.36740533123120495, "grad_norm": 0.7297802480838783, "learning_rate": 3.5145174371451747e-06, "loss": 0.6947, "step": 12584 }, { "epoch": 0.3674345274590523, "grad_norm": 0.8335810210514886, "learning_rate": 3.5143552311435523e-06, "loss": 0.7484, "step": 12585 }, { "epoch": 0.36746372368689967, "grad_norm": 0.7580049491346227, "learning_rate": 3.5141930251419303e-06, "loss": 0.7294, "step": 12586 }, { "epoch": 0.36749291991474703, "grad_norm": 0.7275216631321935, "learning_rate": 3.5140308191403083e-06, "loss": 0.6637, "step": 12587 }, { "epoch": 0.3675221161425944, "grad_norm": 0.7018574216503131, "learning_rate": 3.5138686131386863e-06, "loss": 0.6096, "step": 12588 }, { "epoch": 0.36755131237044175, "grad_norm": 0.7402971726355092, "learning_rate": 3.5137064071370643e-06, "loss": 0.6492, "step": 12589 }, { "epoch": 0.3675805085982891, "grad_norm": 0.7521269783621864, "learning_rate": 3.513544201135442e-06, "loss": 0.6867, "step": 12590 }, { "epoch": 0.3676097048261365, "grad_norm": 0.819470132293748, "learning_rate": 3.5133819951338203e-06, "loss": 0.7184, "step": 12591 }, { "epoch": 0.36763890105398384, "grad_norm": 0.8280572834231339, "learning_rate": 3.5132197891321983e-06, "loss": 0.7191, "step": 12592 }, { "epoch": 0.3676680972818312, "grad_norm": 0.7337873861941516, "learning_rate": 3.5130575831305763e-06, "loss": 0.6394, "step": 12593 }, { "epoch": 0.36769729350967856, "grad_norm": 0.722457092492375, "learning_rate": 3.5128953771289543e-06, "loss": 0.6349, "step": 12594 }, { "epoch": 0.3677264897375259, "grad_norm": 0.7369410865597068, "learning_rate": 3.5127331711273323e-06, "loss": 0.7161, "step": 12595 }, { "epoch": 0.3677556859653733, "grad_norm": 0.778548595680391, "learning_rate": 3.51257096512571e-06, "loss": 0.764, "step": 12596 }, { "epoch": 0.36778488219322064, "grad_norm": 0.7670951877441079, "learning_rate": 3.512408759124088e-06, "loss": 0.719, "step": 12597 }, { "epoch": 0.367814078421068, "grad_norm": 0.7521504705230427, "learning_rate": 3.512246553122466e-06, "loss": 0.6704, "step": 12598 }, { "epoch": 0.36784327464891536, "grad_norm": 0.7699197321182855, "learning_rate": 3.512084347120844e-06, "loss": 0.7196, "step": 12599 }, { "epoch": 0.3678724708767627, "grad_norm": 0.7748804811252585, "learning_rate": 3.5119221411192215e-06, "loss": 0.7574, "step": 12600 }, { "epoch": 0.3679016671046101, "grad_norm": 0.7368663855148329, "learning_rate": 3.5117599351175995e-06, "loss": 0.648, "step": 12601 }, { "epoch": 0.36793086333245745, "grad_norm": 0.7364135996572428, "learning_rate": 3.5115977291159775e-06, "loss": 0.6788, "step": 12602 }, { "epoch": 0.3679600595603048, "grad_norm": 0.7434715484013702, "learning_rate": 3.5114355231143556e-06, "loss": 0.5983, "step": 12603 }, { "epoch": 0.36798925578815217, "grad_norm": 0.7190810643034037, "learning_rate": 3.511273317112733e-06, "loss": 0.6246, "step": 12604 }, { "epoch": 0.36801845201599953, "grad_norm": 0.8429376537987469, "learning_rate": 3.511111111111111e-06, "loss": 0.7402, "step": 12605 }, { "epoch": 0.3680476482438469, "grad_norm": 0.8182726513355109, "learning_rate": 3.510948905109489e-06, "loss": 0.6683, "step": 12606 }, { "epoch": 0.36807684447169425, "grad_norm": 0.7210640941020925, "learning_rate": 3.510786699107867e-06, "loss": 0.6299, "step": 12607 }, { "epoch": 0.3681060406995416, "grad_norm": 0.7479852711860533, "learning_rate": 3.510624493106245e-06, "loss": 0.6912, "step": 12608 }, { "epoch": 0.368135236927389, "grad_norm": 0.6865092378023578, "learning_rate": 3.5104622871046227e-06, "loss": 0.5309, "step": 12609 }, { "epoch": 0.36816443315523634, "grad_norm": 0.7762672176778316, "learning_rate": 3.510300081103001e-06, "loss": 0.7283, "step": 12610 }, { "epoch": 0.3681936293830837, "grad_norm": 0.7504270679228595, "learning_rate": 3.510137875101379e-06, "loss": 0.6407, "step": 12611 }, { "epoch": 0.36822282561093106, "grad_norm": 0.7656321030875395, "learning_rate": 3.509975669099757e-06, "loss": 0.7344, "step": 12612 }, { "epoch": 0.3682520218387784, "grad_norm": 0.7394987420063112, "learning_rate": 3.509813463098135e-06, "loss": 0.6616, "step": 12613 }, { "epoch": 0.3682812180666258, "grad_norm": 0.692305839045574, "learning_rate": 3.509651257096513e-06, "loss": 0.6163, "step": 12614 }, { "epoch": 0.36831041429447314, "grad_norm": 0.7543763388071675, "learning_rate": 3.5094890510948908e-06, "loss": 0.666, "step": 12615 }, { "epoch": 0.3683396105223205, "grad_norm": 0.8346290748643805, "learning_rate": 3.509326845093269e-06, "loss": 0.7267, "step": 12616 }, { "epoch": 0.36836880675016787, "grad_norm": 0.725022622111995, "learning_rate": 3.509164639091647e-06, "loss": 0.5399, "step": 12617 }, { "epoch": 0.3683980029780152, "grad_norm": 0.7644219325770837, "learning_rate": 3.509002433090025e-06, "loss": 0.6893, "step": 12618 }, { "epoch": 0.3684271992058626, "grad_norm": 0.8441052022265934, "learning_rate": 3.5088402270884024e-06, "loss": 0.6182, "step": 12619 }, { "epoch": 0.36845639543370995, "grad_norm": 0.764469809001716, "learning_rate": 3.5086780210867804e-06, "loss": 0.656, "step": 12620 }, { "epoch": 0.3684855916615573, "grad_norm": 0.7676927209149309, "learning_rate": 3.5085158150851584e-06, "loss": 0.6681, "step": 12621 }, { "epoch": 0.36851478788940467, "grad_norm": 1.0042166180042347, "learning_rate": 3.5083536090835364e-06, "loss": 0.8348, "step": 12622 }, { "epoch": 0.36854398411725203, "grad_norm": 0.7126128942681319, "learning_rate": 3.508191403081914e-06, "loss": 0.571, "step": 12623 }, { "epoch": 0.3685731803450994, "grad_norm": 0.7150749179801096, "learning_rate": 3.508029197080292e-06, "loss": 0.654, "step": 12624 }, { "epoch": 0.36860237657294675, "grad_norm": 0.733022669146135, "learning_rate": 3.50786699107867e-06, "loss": 0.6288, "step": 12625 }, { "epoch": 0.3686315728007941, "grad_norm": 0.7347626492353105, "learning_rate": 3.507704785077048e-06, "loss": 0.6752, "step": 12626 }, { "epoch": 0.3686607690286415, "grad_norm": 0.7217146368769346, "learning_rate": 3.507542579075426e-06, "loss": 0.6427, "step": 12627 }, { "epoch": 0.36868996525648884, "grad_norm": 0.7261091134876683, "learning_rate": 3.5073803730738036e-06, "loss": 0.6413, "step": 12628 }, { "epoch": 0.3687191614843362, "grad_norm": 0.7543833917061731, "learning_rate": 3.507218167072182e-06, "loss": 0.7159, "step": 12629 }, { "epoch": 0.36874835771218356, "grad_norm": 0.7334894508930822, "learning_rate": 3.50705596107056e-06, "loss": 0.6462, "step": 12630 }, { "epoch": 0.3687775539400309, "grad_norm": 0.6870262620371232, "learning_rate": 3.506893755068938e-06, "loss": 0.6173, "step": 12631 }, { "epoch": 0.3688067501678783, "grad_norm": 0.7058232159961713, "learning_rate": 3.506731549067316e-06, "loss": 0.5983, "step": 12632 }, { "epoch": 0.36883594639572564, "grad_norm": 0.7420839764488932, "learning_rate": 3.506569343065694e-06, "loss": 0.6903, "step": 12633 }, { "epoch": 0.36886514262357306, "grad_norm": 0.7465640452626776, "learning_rate": 3.5064071370640716e-06, "loss": 0.6524, "step": 12634 }, { "epoch": 0.3688943388514204, "grad_norm": 0.8166199649976791, "learning_rate": 3.5062449310624497e-06, "loss": 0.7476, "step": 12635 }, { "epoch": 0.3689235350792678, "grad_norm": 0.7447541076849824, "learning_rate": 3.5060827250608277e-06, "loss": 0.6074, "step": 12636 }, { "epoch": 0.36895273130711514, "grad_norm": 0.7642990953982621, "learning_rate": 3.5059205190592057e-06, "loss": 0.6917, "step": 12637 }, { "epoch": 0.3689819275349625, "grad_norm": 0.7001000836152863, "learning_rate": 3.5057583130575832e-06, "loss": 0.5807, "step": 12638 }, { "epoch": 0.36901112376280987, "grad_norm": 0.8082286903113277, "learning_rate": 3.5055961070559613e-06, "loss": 0.7776, "step": 12639 }, { "epoch": 0.36904031999065723, "grad_norm": 0.7620968187911005, "learning_rate": 3.5054339010543393e-06, "loss": 0.6966, "step": 12640 }, { "epoch": 0.3690695162185046, "grad_norm": 0.8119845193660593, "learning_rate": 3.5052716950527173e-06, "loss": 0.8496, "step": 12641 }, { "epoch": 0.36909871244635195, "grad_norm": 0.7539538405076763, "learning_rate": 3.505109489051095e-06, "loss": 0.7259, "step": 12642 }, { "epoch": 0.3691279086741993, "grad_norm": 0.751498591110755, "learning_rate": 3.504947283049473e-06, "loss": 0.6966, "step": 12643 }, { "epoch": 0.3691571049020467, "grad_norm": 0.8652101464916566, "learning_rate": 3.504785077047851e-06, "loss": 0.7202, "step": 12644 }, { "epoch": 0.36918630112989403, "grad_norm": 0.7862985706392569, "learning_rate": 3.504622871046229e-06, "loss": 0.6939, "step": 12645 }, { "epoch": 0.3692154973577414, "grad_norm": 0.7114897102184153, "learning_rate": 3.504460665044607e-06, "loss": 0.6, "step": 12646 }, { "epoch": 0.36924469358558876, "grad_norm": 0.7993906994179097, "learning_rate": 3.5042984590429845e-06, "loss": 0.7228, "step": 12647 }, { "epoch": 0.3692738898134361, "grad_norm": 0.7536647059422522, "learning_rate": 3.504136253041363e-06, "loss": 0.6273, "step": 12648 }, { "epoch": 0.3693030860412835, "grad_norm": 0.7452106487561833, "learning_rate": 3.503974047039741e-06, "loss": 0.6668, "step": 12649 }, { "epoch": 0.36933228226913084, "grad_norm": 0.7387996447649909, "learning_rate": 3.503811841038119e-06, "loss": 0.632, "step": 12650 }, { "epoch": 0.3693614784969782, "grad_norm": 0.7432321581673881, "learning_rate": 3.503649635036497e-06, "loss": 0.6619, "step": 12651 }, { "epoch": 0.36939067472482556, "grad_norm": 0.7405685081796877, "learning_rate": 3.503487429034875e-06, "loss": 0.6135, "step": 12652 }, { "epoch": 0.3694198709526729, "grad_norm": 0.8192375080390856, "learning_rate": 3.5033252230332525e-06, "loss": 0.6874, "step": 12653 }, { "epoch": 0.3694490671805203, "grad_norm": 0.6320391077146561, "learning_rate": 3.5031630170316305e-06, "loss": 0.5048, "step": 12654 }, { "epoch": 0.36947826340836765, "grad_norm": 0.7295577401815874, "learning_rate": 3.5030008110300085e-06, "loss": 0.6931, "step": 12655 }, { "epoch": 0.369507459636215, "grad_norm": 0.7660596824711668, "learning_rate": 3.5028386050283865e-06, "loss": 0.6921, "step": 12656 }, { "epoch": 0.36953665586406237, "grad_norm": 0.87207638307492, "learning_rate": 3.502676399026764e-06, "loss": 0.817, "step": 12657 }, { "epoch": 0.36956585209190973, "grad_norm": 0.7152993103257751, "learning_rate": 3.502514193025142e-06, "loss": 0.6109, "step": 12658 }, { "epoch": 0.3695950483197571, "grad_norm": 0.756503210590964, "learning_rate": 3.50235198702352e-06, "loss": 0.7328, "step": 12659 }, { "epoch": 0.36962424454760445, "grad_norm": 0.7875349103028678, "learning_rate": 3.502189781021898e-06, "loss": 0.8093, "step": 12660 }, { "epoch": 0.3696534407754518, "grad_norm": 0.6881608299382805, "learning_rate": 3.5020275750202757e-06, "loss": 0.6088, "step": 12661 }, { "epoch": 0.3696826370032992, "grad_norm": 0.7283845124408298, "learning_rate": 3.5018653690186537e-06, "loss": 0.6029, "step": 12662 }, { "epoch": 0.36971183323114654, "grad_norm": 0.8196443086979356, "learning_rate": 3.5017031630170317e-06, "loss": 0.5979, "step": 12663 }, { "epoch": 0.3697410294589939, "grad_norm": 0.7522940786071531, "learning_rate": 3.5015409570154097e-06, "loss": 0.6914, "step": 12664 }, { "epoch": 0.36977022568684126, "grad_norm": 0.7063088298271327, "learning_rate": 3.5013787510137877e-06, "loss": 0.6395, "step": 12665 }, { "epoch": 0.3697994219146886, "grad_norm": 0.7118391490116234, "learning_rate": 3.501216545012166e-06, "loss": 0.6019, "step": 12666 }, { "epoch": 0.369828618142536, "grad_norm": 0.7309416362824359, "learning_rate": 3.5010543390105438e-06, "loss": 0.6244, "step": 12667 }, { "epoch": 0.36985781437038334, "grad_norm": 0.7411199074419477, "learning_rate": 3.5008921330089218e-06, "loss": 0.6655, "step": 12668 }, { "epoch": 0.3698870105982307, "grad_norm": 0.7499322481741133, "learning_rate": 3.5007299270072998e-06, "loss": 0.6864, "step": 12669 }, { "epoch": 0.36991620682607806, "grad_norm": 0.781283980455388, "learning_rate": 3.5005677210056778e-06, "loss": 0.6425, "step": 12670 }, { "epoch": 0.3699454030539254, "grad_norm": 0.7216587127259178, "learning_rate": 3.5004055150040554e-06, "loss": 0.6281, "step": 12671 }, { "epoch": 0.3699745992817728, "grad_norm": 0.7111296864265855, "learning_rate": 3.5002433090024334e-06, "loss": 0.6561, "step": 12672 }, { "epoch": 0.37000379550962015, "grad_norm": 0.6920846485068335, "learning_rate": 3.5000811030008114e-06, "loss": 0.535, "step": 12673 }, { "epoch": 0.3700329917374675, "grad_norm": 0.7242162798518907, "learning_rate": 3.4999188969991894e-06, "loss": 0.6226, "step": 12674 }, { "epoch": 0.37006218796531487, "grad_norm": 0.7668147526783866, "learning_rate": 3.4997566909975674e-06, "loss": 0.6725, "step": 12675 }, { "epoch": 0.37009138419316223, "grad_norm": 0.764298637609638, "learning_rate": 3.499594484995945e-06, "loss": 0.7368, "step": 12676 }, { "epoch": 0.3701205804210096, "grad_norm": 0.6964337126918083, "learning_rate": 3.499432278994323e-06, "loss": 0.5878, "step": 12677 }, { "epoch": 0.37014977664885695, "grad_norm": 0.7680328881888457, "learning_rate": 3.499270072992701e-06, "loss": 0.6548, "step": 12678 }, { "epoch": 0.3701789728767043, "grad_norm": 0.7293683760274472, "learning_rate": 3.499107866991079e-06, "loss": 0.6425, "step": 12679 }, { "epoch": 0.3702081691045517, "grad_norm": 0.7145438086156659, "learning_rate": 3.4989456609894566e-06, "loss": 0.5992, "step": 12680 }, { "epoch": 0.37023736533239904, "grad_norm": 0.7571260156698657, "learning_rate": 3.4987834549878346e-06, "loss": 0.664, "step": 12681 }, { "epoch": 0.3702665615602464, "grad_norm": 0.7389568379440464, "learning_rate": 3.4986212489862126e-06, "loss": 0.6606, "step": 12682 }, { "epoch": 0.37029575778809376, "grad_norm": 0.8397549290754486, "learning_rate": 3.4984590429845906e-06, "loss": 0.6785, "step": 12683 }, { "epoch": 0.3703249540159411, "grad_norm": 0.753398049843855, "learning_rate": 3.4982968369829686e-06, "loss": 0.7062, "step": 12684 }, { "epoch": 0.3703541502437885, "grad_norm": 0.7499039610053323, "learning_rate": 3.498134630981347e-06, "loss": 0.6695, "step": 12685 }, { "epoch": 0.37038334647163584, "grad_norm": 0.7491720478412452, "learning_rate": 3.4979724249797246e-06, "loss": 0.6891, "step": 12686 }, { "epoch": 0.3704125426994832, "grad_norm": 0.7875843922448638, "learning_rate": 3.4978102189781026e-06, "loss": 0.795, "step": 12687 }, { "epoch": 0.37044173892733057, "grad_norm": 0.7162507948532228, "learning_rate": 3.4976480129764806e-06, "loss": 0.6151, "step": 12688 }, { "epoch": 0.3704709351551779, "grad_norm": 0.7621189649465056, "learning_rate": 3.4974858069748586e-06, "loss": 0.6774, "step": 12689 }, { "epoch": 0.3705001313830253, "grad_norm": 0.7895247398530957, "learning_rate": 3.4973236009732362e-06, "loss": 0.7202, "step": 12690 }, { "epoch": 0.37052932761087265, "grad_norm": 0.7447921774104153, "learning_rate": 3.4971613949716142e-06, "loss": 0.6831, "step": 12691 }, { "epoch": 0.37055852383872, "grad_norm": 0.8793944283129397, "learning_rate": 3.4969991889699922e-06, "loss": 0.7139, "step": 12692 }, { "epoch": 0.37058772006656737, "grad_norm": 0.741515390128989, "learning_rate": 3.4968369829683702e-06, "loss": 0.6904, "step": 12693 }, { "epoch": 0.3706169162944148, "grad_norm": 0.7318284615750982, "learning_rate": 3.4966747769667482e-06, "loss": 0.6535, "step": 12694 }, { "epoch": 0.37064611252226215, "grad_norm": 0.7010096541806827, "learning_rate": 3.496512570965126e-06, "loss": 0.5713, "step": 12695 }, { "epoch": 0.3706753087501095, "grad_norm": 0.7291457112915317, "learning_rate": 3.496350364963504e-06, "loss": 0.6404, "step": 12696 }, { "epoch": 0.3707045049779569, "grad_norm": 0.7173009796706202, "learning_rate": 3.496188158961882e-06, "loss": 0.6638, "step": 12697 }, { "epoch": 0.37073370120580423, "grad_norm": 0.7362067802312727, "learning_rate": 3.49602595296026e-06, "loss": 0.7129, "step": 12698 }, { "epoch": 0.3707628974336516, "grad_norm": 0.6800291198748365, "learning_rate": 3.4958637469586374e-06, "loss": 0.6207, "step": 12699 }, { "epoch": 0.37079209366149896, "grad_norm": 0.7495192580923337, "learning_rate": 3.4957015409570154e-06, "loss": 0.7076, "step": 12700 }, { "epoch": 0.3708212898893463, "grad_norm": 0.7162402304919041, "learning_rate": 3.4955393349553934e-06, "loss": 0.6382, "step": 12701 }, { "epoch": 0.3708504861171937, "grad_norm": 0.8862806420062677, "learning_rate": 3.4953771289537714e-06, "loss": 0.6944, "step": 12702 }, { "epoch": 0.37087968234504104, "grad_norm": 0.7000616541825434, "learning_rate": 3.4952149229521495e-06, "loss": 0.6046, "step": 12703 }, { "epoch": 0.3709088785728884, "grad_norm": 0.7256003552060935, "learning_rate": 3.495052716950528e-06, "loss": 0.6804, "step": 12704 }, { "epoch": 0.37093807480073576, "grad_norm": 0.71679004956447, "learning_rate": 3.4948905109489055e-06, "loss": 0.6363, "step": 12705 }, { "epoch": 0.3709672710285831, "grad_norm": 0.7461170067770516, "learning_rate": 3.4947283049472835e-06, "loss": 0.6737, "step": 12706 }, { "epoch": 0.3709964672564305, "grad_norm": 0.6692651909235094, "learning_rate": 3.4945660989456615e-06, "loss": 0.5595, "step": 12707 }, { "epoch": 0.37102566348427785, "grad_norm": 0.7706407535886217, "learning_rate": 3.4944038929440395e-06, "loss": 0.7099, "step": 12708 }, { "epoch": 0.3710548597121252, "grad_norm": 0.7125242005372107, "learning_rate": 3.494241686942417e-06, "loss": 0.6568, "step": 12709 }, { "epoch": 0.37108405593997257, "grad_norm": 0.7524879292656215, "learning_rate": 3.494079480940795e-06, "loss": 0.7042, "step": 12710 }, { "epoch": 0.37111325216781993, "grad_norm": 0.7815650655569459, "learning_rate": 3.493917274939173e-06, "loss": 0.6969, "step": 12711 }, { "epoch": 0.3711424483956673, "grad_norm": 0.8215242508559388, "learning_rate": 3.493755068937551e-06, "loss": 0.7001, "step": 12712 }, { "epoch": 0.37117164462351465, "grad_norm": 0.8265168589314792, "learning_rate": 3.493592862935929e-06, "loss": 0.7437, "step": 12713 }, { "epoch": 0.371200840851362, "grad_norm": 0.77218601653616, "learning_rate": 3.4934306569343067e-06, "loss": 0.6961, "step": 12714 }, { "epoch": 0.3712300370792094, "grad_norm": 0.7262069208896192, "learning_rate": 3.4932684509326847e-06, "loss": 0.6504, "step": 12715 }, { "epoch": 0.37125923330705674, "grad_norm": 0.7145148292495369, "learning_rate": 3.4931062449310627e-06, "loss": 0.6, "step": 12716 }, { "epoch": 0.3712884295349041, "grad_norm": 0.6959714769199887, "learning_rate": 3.4929440389294407e-06, "loss": 0.6229, "step": 12717 }, { "epoch": 0.37131762576275146, "grad_norm": 0.7767427063397816, "learning_rate": 3.4927818329278183e-06, "loss": 0.6992, "step": 12718 }, { "epoch": 0.3713468219905988, "grad_norm": 0.7589896061211068, "learning_rate": 3.4926196269261963e-06, "loss": 0.7213, "step": 12719 }, { "epoch": 0.3713760182184462, "grad_norm": 0.805049016342469, "learning_rate": 3.4924574209245743e-06, "loss": 0.7857, "step": 12720 }, { "epoch": 0.37140521444629354, "grad_norm": 0.6872819178070732, "learning_rate": 3.4922952149229523e-06, "loss": 0.5686, "step": 12721 }, { "epoch": 0.3714344106741409, "grad_norm": 0.7710515202621264, "learning_rate": 3.4921330089213303e-06, "loss": 0.7344, "step": 12722 }, { "epoch": 0.37146360690198826, "grad_norm": 0.7340488402820018, "learning_rate": 3.4919708029197087e-06, "loss": 0.6139, "step": 12723 }, { "epoch": 0.3714928031298356, "grad_norm": 0.7101109338143412, "learning_rate": 3.4918085969180863e-06, "loss": 0.6213, "step": 12724 }, { "epoch": 0.371521999357683, "grad_norm": 0.6974467752481304, "learning_rate": 3.4916463909164643e-06, "loss": 0.6528, "step": 12725 }, { "epoch": 0.37155119558553035, "grad_norm": 0.7250018292880422, "learning_rate": 3.4914841849148423e-06, "loss": 0.6852, "step": 12726 }, { "epoch": 0.3715803918133777, "grad_norm": 0.6890714558165033, "learning_rate": 3.4913219789132203e-06, "loss": 0.5731, "step": 12727 }, { "epoch": 0.37160958804122507, "grad_norm": 0.7402994758892029, "learning_rate": 3.491159772911598e-06, "loss": 0.6355, "step": 12728 }, { "epoch": 0.37163878426907243, "grad_norm": 0.639969271670561, "learning_rate": 3.490997566909976e-06, "loss": 0.4837, "step": 12729 }, { "epoch": 0.3716679804969198, "grad_norm": 0.7032575861208596, "learning_rate": 3.490835360908354e-06, "loss": 0.579, "step": 12730 }, { "epoch": 0.37169717672476715, "grad_norm": 0.755306396399961, "learning_rate": 3.490673154906732e-06, "loss": 0.7331, "step": 12731 }, { "epoch": 0.3717263729526145, "grad_norm": 1.4089401948441342, "learning_rate": 3.49051094890511e-06, "loss": 0.7622, "step": 12732 }, { "epoch": 0.3717555691804619, "grad_norm": 0.702281997152991, "learning_rate": 3.4903487429034875e-06, "loss": 0.5988, "step": 12733 }, { "epoch": 0.37178476540830924, "grad_norm": 0.8155746168718647, "learning_rate": 3.4901865369018655e-06, "loss": 0.7774, "step": 12734 }, { "epoch": 0.3718139616361566, "grad_norm": 0.6924308333303334, "learning_rate": 3.4900243309002436e-06, "loss": 0.639, "step": 12735 }, { "epoch": 0.37184315786400396, "grad_norm": 0.7149636629192775, "learning_rate": 3.4898621248986216e-06, "loss": 0.609, "step": 12736 }, { "epoch": 0.3718723540918513, "grad_norm": 0.7306497636327937, "learning_rate": 3.489699918896999e-06, "loss": 0.6462, "step": 12737 }, { "epoch": 0.3719015503196987, "grad_norm": 0.7753478801674097, "learning_rate": 3.489537712895377e-06, "loss": 0.7254, "step": 12738 }, { "epoch": 0.37193074654754604, "grad_norm": 0.7541617922608544, "learning_rate": 3.489375506893755e-06, "loss": 0.6242, "step": 12739 }, { "epoch": 0.3719599427753934, "grad_norm": 0.7065349069121244, "learning_rate": 3.489213300892133e-06, "loss": 0.6273, "step": 12740 }, { "epoch": 0.37198913900324077, "grad_norm": 0.6848238278375055, "learning_rate": 3.489051094890511e-06, "loss": 0.5689, "step": 12741 }, { "epoch": 0.3720183352310881, "grad_norm": 0.7487117255860252, "learning_rate": 3.4888888888888896e-06, "loss": 0.718, "step": 12742 }, { "epoch": 0.3720475314589355, "grad_norm": 0.7346804607549321, "learning_rate": 3.488726682887267e-06, "loss": 0.6506, "step": 12743 }, { "epoch": 0.37207672768678285, "grad_norm": 0.7987249536681758, "learning_rate": 3.488564476885645e-06, "loss": 0.8182, "step": 12744 }, { "epoch": 0.3721059239146302, "grad_norm": 0.7882676588969134, "learning_rate": 3.488402270884023e-06, "loss": 0.6827, "step": 12745 }, { "epoch": 0.37213512014247757, "grad_norm": 0.7387617845528227, "learning_rate": 3.488240064882401e-06, "loss": 0.6667, "step": 12746 }, { "epoch": 0.37216431637032493, "grad_norm": 0.8368417133184584, "learning_rate": 3.488077858880779e-06, "loss": 0.6955, "step": 12747 }, { "epoch": 0.3721935125981723, "grad_norm": 0.7630857778568897, "learning_rate": 3.487915652879157e-06, "loss": 0.6994, "step": 12748 }, { "epoch": 0.37222270882601965, "grad_norm": 0.7062082325472203, "learning_rate": 3.487753446877535e-06, "loss": 0.6365, "step": 12749 }, { "epoch": 0.372251905053867, "grad_norm": 0.7779399222197804, "learning_rate": 3.487591240875913e-06, "loss": 0.7507, "step": 12750 }, { "epoch": 0.3722811012817144, "grad_norm": 0.7302761694235004, "learning_rate": 3.487429034874291e-06, "loss": 0.6671, "step": 12751 }, { "epoch": 0.37231029750956174, "grad_norm": 0.7867744625984536, "learning_rate": 3.4872668288726684e-06, "loss": 0.714, "step": 12752 }, { "epoch": 0.3723394937374091, "grad_norm": 0.8007301448708771, "learning_rate": 3.4871046228710464e-06, "loss": 0.7248, "step": 12753 }, { "epoch": 0.3723686899652565, "grad_norm": 0.6837283921325963, "learning_rate": 3.4869424168694244e-06, "loss": 0.5698, "step": 12754 }, { "epoch": 0.3723978861931039, "grad_norm": 0.7128919937094976, "learning_rate": 3.4867802108678024e-06, "loss": 0.6096, "step": 12755 }, { "epoch": 0.37242708242095124, "grad_norm": 0.6841581179735188, "learning_rate": 3.48661800486618e-06, "loss": 0.5894, "step": 12756 }, { "epoch": 0.3724562786487986, "grad_norm": 0.7609505950112679, "learning_rate": 3.486455798864558e-06, "loss": 0.7009, "step": 12757 }, { "epoch": 0.37248547487664596, "grad_norm": 0.6550899579588979, "learning_rate": 3.486293592862936e-06, "loss": 0.5224, "step": 12758 }, { "epoch": 0.3725146711044933, "grad_norm": 0.6660886559413998, "learning_rate": 3.486131386861314e-06, "loss": 0.5953, "step": 12759 }, { "epoch": 0.3725438673323407, "grad_norm": 0.7595704699872138, "learning_rate": 3.485969180859692e-06, "loss": 0.6743, "step": 12760 }, { "epoch": 0.37257306356018804, "grad_norm": 0.7015879695798691, "learning_rate": 3.4858069748580705e-06, "loss": 0.6078, "step": 12761 }, { "epoch": 0.3726022597880354, "grad_norm": 0.7351680098456794, "learning_rate": 3.485644768856448e-06, "loss": 0.6648, "step": 12762 }, { "epoch": 0.37263145601588277, "grad_norm": 0.7888658409572294, "learning_rate": 3.485482562854826e-06, "loss": 0.7172, "step": 12763 }, { "epoch": 0.37266065224373013, "grad_norm": 0.7536135036533106, "learning_rate": 3.485320356853204e-06, "loss": 0.644, "step": 12764 }, { "epoch": 0.3726898484715775, "grad_norm": 0.7262802389379668, "learning_rate": 3.485158150851582e-06, "loss": 0.6662, "step": 12765 }, { "epoch": 0.37271904469942485, "grad_norm": 0.740839541461912, "learning_rate": 3.4849959448499596e-06, "loss": 0.6876, "step": 12766 }, { "epoch": 0.3727482409272722, "grad_norm": 0.6924670738506029, "learning_rate": 3.4848337388483377e-06, "loss": 0.5956, "step": 12767 }, { "epoch": 0.3727774371551196, "grad_norm": 0.7571538871648423, "learning_rate": 3.4846715328467157e-06, "loss": 0.7006, "step": 12768 }, { "epoch": 0.37280663338296693, "grad_norm": 0.7638779392362355, "learning_rate": 3.4845093268450937e-06, "loss": 0.7271, "step": 12769 }, { "epoch": 0.3728358296108143, "grad_norm": 0.7936348810291651, "learning_rate": 3.4843471208434717e-06, "loss": 0.6685, "step": 12770 }, { "epoch": 0.37286502583866166, "grad_norm": 0.7745568786468736, "learning_rate": 3.4841849148418493e-06, "loss": 0.7018, "step": 12771 }, { "epoch": 0.372894222066509, "grad_norm": 0.7504758732151127, "learning_rate": 3.4840227088402273e-06, "loss": 0.6741, "step": 12772 }, { "epoch": 0.3729234182943564, "grad_norm": 0.7231892692809541, "learning_rate": 3.4838605028386053e-06, "loss": 0.6242, "step": 12773 }, { "epoch": 0.37295261452220374, "grad_norm": 0.872501540139382, "learning_rate": 3.4836982968369833e-06, "loss": 0.8022, "step": 12774 }, { "epoch": 0.3729818107500511, "grad_norm": 0.7260921165191015, "learning_rate": 3.483536090835361e-06, "loss": 0.618, "step": 12775 }, { "epoch": 0.37301100697789846, "grad_norm": 1.0597347481943193, "learning_rate": 3.483373884833739e-06, "loss": 0.6894, "step": 12776 }, { "epoch": 0.3730402032057458, "grad_norm": 0.7694070529620028, "learning_rate": 3.483211678832117e-06, "loss": 0.6374, "step": 12777 }, { "epoch": 0.3730693994335932, "grad_norm": 0.6754459220256487, "learning_rate": 3.483049472830495e-06, "loss": 0.5343, "step": 12778 }, { "epoch": 0.37309859566144055, "grad_norm": 0.7492247111287059, "learning_rate": 3.482887266828873e-06, "loss": 0.7231, "step": 12779 }, { "epoch": 0.3731277918892879, "grad_norm": 0.7267310994929302, "learning_rate": 3.4827250608272513e-06, "loss": 0.7004, "step": 12780 }, { "epoch": 0.37315698811713527, "grad_norm": 0.6897310849925876, "learning_rate": 3.482562854825629e-06, "loss": 0.571, "step": 12781 }, { "epoch": 0.37318618434498263, "grad_norm": 0.7944579824165783, "learning_rate": 3.482400648824007e-06, "loss": 0.7343, "step": 12782 }, { "epoch": 0.37321538057283, "grad_norm": 0.7104535554199648, "learning_rate": 3.482238442822385e-06, "loss": 0.674, "step": 12783 }, { "epoch": 0.37324457680067735, "grad_norm": 0.7690390596677316, "learning_rate": 3.482076236820763e-06, "loss": 0.7374, "step": 12784 }, { "epoch": 0.3732737730285247, "grad_norm": 0.7212326867098351, "learning_rate": 3.4819140308191405e-06, "loss": 0.6399, "step": 12785 }, { "epoch": 0.3733029692563721, "grad_norm": 0.659454435333676, "learning_rate": 3.4817518248175185e-06, "loss": 0.5582, "step": 12786 }, { "epoch": 0.37333216548421944, "grad_norm": 0.7013054098223813, "learning_rate": 3.4815896188158965e-06, "loss": 0.6206, "step": 12787 }, { "epoch": 0.3733613617120668, "grad_norm": 0.6391310474438774, "learning_rate": 3.4814274128142745e-06, "loss": 0.5107, "step": 12788 }, { "epoch": 0.37339055793991416, "grad_norm": 0.731986682762063, "learning_rate": 3.4812652068126525e-06, "loss": 0.6557, "step": 12789 }, { "epoch": 0.3734197541677615, "grad_norm": 0.7817194260924275, "learning_rate": 3.48110300081103e-06, "loss": 0.713, "step": 12790 }, { "epoch": 0.3734489503956089, "grad_norm": 0.777636996416986, "learning_rate": 3.480940794809408e-06, "loss": 0.7017, "step": 12791 }, { "epoch": 0.37347814662345624, "grad_norm": 0.6560660902980668, "learning_rate": 3.480778588807786e-06, "loss": 0.5414, "step": 12792 }, { "epoch": 0.3735073428513036, "grad_norm": 0.7592964971912524, "learning_rate": 3.480616382806164e-06, "loss": 0.7119, "step": 12793 }, { "epoch": 0.37353653907915096, "grad_norm": 0.7736609170699441, "learning_rate": 3.4804541768045417e-06, "loss": 0.6393, "step": 12794 }, { "epoch": 0.3735657353069983, "grad_norm": 0.7983207268129363, "learning_rate": 3.4802919708029197e-06, "loss": 0.6631, "step": 12795 }, { "epoch": 0.3735949315348457, "grad_norm": 0.7610552271797537, "learning_rate": 3.4801297648012977e-06, "loss": 0.6837, "step": 12796 }, { "epoch": 0.37362412776269305, "grad_norm": 0.832692640965643, "learning_rate": 3.4799675587996757e-06, "loss": 0.79, "step": 12797 }, { "epoch": 0.3736533239905404, "grad_norm": 0.7697262642352154, "learning_rate": 3.4798053527980537e-06, "loss": 0.7338, "step": 12798 }, { "epoch": 0.37368252021838777, "grad_norm": 0.6872649701589278, "learning_rate": 3.479643146796432e-06, "loss": 0.6321, "step": 12799 }, { "epoch": 0.37371171644623513, "grad_norm": 0.7072907003265412, "learning_rate": 3.4794809407948098e-06, "loss": 0.6271, "step": 12800 }, { "epoch": 0.3737409126740825, "grad_norm": 0.6861456806430196, "learning_rate": 3.4793187347931878e-06, "loss": 0.5966, "step": 12801 }, { "epoch": 0.37377010890192985, "grad_norm": 0.7704111031788112, "learning_rate": 3.4791565287915658e-06, "loss": 0.7353, "step": 12802 }, { "epoch": 0.3737993051297772, "grad_norm": 0.7253202867493354, "learning_rate": 3.4789943227899438e-06, "loss": 0.6399, "step": 12803 }, { "epoch": 0.3738285013576246, "grad_norm": 0.6845382406535901, "learning_rate": 3.4788321167883214e-06, "loss": 0.6306, "step": 12804 }, { "epoch": 0.37385769758547194, "grad_norm": 0.7076629416215678, "learning_rate": 3.4786699107866994e-06, "loss": 0.624, "step": 12805 }, { "epoch": 0.3738868938133193, "grad_norm": 0.7076783582442704, "learning_rate": 3.4785077047850774e-06, "loss": 0.6122, "step": 12806 }, { "epoch": 0.37391609004116666, "grad_norm": 0.7444331960960727, "learning_rate": 3.4783454987834554e-06, "loss": 0.6401, "step": 12807 }, { "epoch": 0.373945286269014, "grad_norm": 0.953712613984336, "learning_rate": 3.4781832927818334e-06, "loss": 0.7383, "step": 12808 }, { "epoch": 0.3739744824968614, "grad_norm": 0.7065562793861432, "learning_rate": 3.478021086780211e-06, "loss": 0.6274, "step": 12809 }, { "epoch": 0.37400367872470874, "grad_norm": 0.7863525113365702, "learning_rate": 3.477858880778589e-06, "loss": 0.634, "step": 12810 }, { "epoch": 0.3740328749525561, "grad_norm": 0.7241951496483966, "learning_rate": 3.477696674776967e-06, "loss": 0.6583, "step": 12811 }, { "epoch": 0.37406207118040347, "grad_norm": 0.7182963629013, "learning_rate": 3.477534468775345e-06, "loss": 0.6225, "step": 12812 }, { "epoch": 0.3740912674082508, "grad_norm": 0.7761531427375644, "learning_rate": 3.4773722627737226e-06, "loss": 0.7547, "step": 12813 }, { "epoch": 0.37412046363609824, "grad_norm": 1.021801286436721, "learning_rate": 3.4772100567721006e-06, "loss": 0.8195, "step": 12814 }, { "epoch": 0.3741496598639456, "grad_norm": 0.7528538694897133, "learning_rate": 3.4770478507704786e-06, "loss": 0.6218, "step": 12815 }, { "epoch": 0.37417885609179297, "grad_norm": 0.7367446222785385, "learning_rate": 3.4768856447688566e-06, "loss": 0.6498, "step": 12816 }, { "epoch": 0.3742080523196403, "grad_norm": 0.7113513838996582, "learning_rate": 3.476723438767235e-06, "loss": 0.599, "step": 12817 }, { "epoch": 0.3742372485474877, "grad_norm": 0.7596357386345594, "learning_rate": 3.476561232765613e-06, "loss": 0.6913, "step": 12818 }, { "epoch": 0.37426644477533505, "grad_norm": 0.7733947051851431, "learning_rate": 3.4763990267639906e-06, "loss": 0.7086, "step": 12819 }, { "epoch": 0.3742956410031824, "grad_norm": 0.7267524707744095, "learning_rate": 3.4762368207623686e-06, "loss": 0.663, "step": 12820 }, { "epoch": 0.37432483723102977, "grad_norm": 0.829157341145388, "learning_rate": 3.4760746147607466e-06, "loss": 0.6705, "step": 12821 }, { "epoch": 0.37435403345887713, "grad_norm": 0.7442462756194609, "learning_rate": 3.4759124087591246e-06, "loss": 0.6472, "step": 12822 }, { "epoch": 0.3743832296867245, "grad_norm": 0.7473651442635377, "learning_rate": 3.4757502027575022e-06, "loss": 0.6674, "step": 12823 }, { "epoch": 0.37441242591457186, "grad_norm": 0.7107840175567085, "learning_rate": 3.4755879967558802e-06, "loss": 0.6032, "step": 12824 }, { "epoch": 0.3744416221424192, "grad_norm": 0.6803602899837097, "learning_rate": 3.4754257907542582e-06, "loss": 0.5917, "step": 12825 }, { "epoch": 0.3744708183702666, "grad_norm": 0.8162576260135701, "learning_rate": 3.4752635847526362e-06, "loss": 0.6825, "step": 12826 }, { "epoch": 0.37450001459811394, "grad_norm": 0.78949554744782, "learning_rate": 3.4751013787510143e-06, "loss": 0.7904, "step": 12827 }, { "epoch": 0.3745292108259613, "grad_norm": 0.7068282729576937, "learning_rate": 3.474939172749392e-06, "loss": 0.5856, "step": 12828 }, { "epoch": 0.37455840705380866, "grad_norm": 0.7207658687802567, "learning_rate": 3.47477696674777e-06, "loss": 0.6233, "step": 12829 }, { "epoch": 0.374587603281656, "grad_norm": 0.7312755320382515, "learning_rate": 3.474614760746148e-06, "loss": 0.6727, "step": 12830 }, { "epoch": 0.3746167995095034, "grad_norm": 0.6870304912352646, "learning_rate": 3.474452554744526e-06, "loss": 0.5929, "step": 12831 }, { "epoch": 0.37464599573735075, "grad_norm": 0.9460135686747275, "learning_rate": 3.4742903487429034e-06, "loss": 0.6691, "step": 12832 }, { "epoch": 0.3746751919651981, "grad_norm": 0.865791628358567, "learning_rate": 3.4741281427412814e-06, "loss": 0.6093, "step": 12833 }, { "epoch": 0.37470438819304547, "grad_norm": 0.7109346157811298, "learning_rate": 3.4739659367396595e-06, "loss": 0.6402, "step": 12834 }, { "epoch": 0.37473358442089283, "grad_norm": 0.7378808899251053, "learning_rate": 3.4738037307380375e-06, "loss": 0.628, "step": 12835 }, { "epoch": 0.3747627806487402, "grad_norm": 0.7053863923053562, "learning_rate": 3.473641524736416e-06, "loss": 0.6321, "step": 12836 }, { "epoch": 0.37479197687658755, "grad_norm": 0.7486053910703119, "learning_rate": 3.473479318734794e-06, "loss": 0.696, "step": 12837 }, { "epoch": 0.3748211731044349, "grad_norm": 0.7457566956079608, "learning_rate": 3.4733171127331715e-06, "loss": 0.6676, "step": 12838 }, { "epoch": 0.3748503693322823, "grad_norm": 0.9013011480662573, "learning_rate": 3.4731549067315495e-06, "loss": 0.7386, "step": 12839 }, { "epoch": 0.37487956556012964, "grad_norm": 0.7475711075773293, "learning_rate": 3.4729927007299275e-06, "loss": 0.6845, "step": 12840 }, { "epoch": 0.374908761787977, "grad_norm": 0.7340821815132663, "learning_rate": 3.4728304947283055e-06, "loss": 0.6931, "step": 12841 }, { "epoch": 0.37493795801582436, "grad_norm": 0.7704113862846368, "learning_rate": 3.472668288726683e-06, "loss": 0.6762, "step": 12842 }, { "epoch": 0.3749671542436717, "grad_norm": 0.9458256055641594, "learning_rate": 3.472506082725061e-06, "loss": 0.6598, "step": 12843 }, { "epoch": 0.3749963504715191, "grad_norm": 0.7605301740019542, "learning_rate": 3.472343876723439e-06, "loss": 0.6969, "step": 12844 }, { "epoch": 0.37502554669936644, "grad_norm": 0.7119813941631099, "learning_rate": 3.472181670721817e-06, "loss": 0.6515, "step": 12845 }, { "epoch": 0.3750547429272138, "grad_norm": 0.7278685358073332, "learning_rate": 3.472019464720195e-06, "loss": 0.6235, "step": 12846 }, { "epoch": 0.37508393915506116, "grad_norm": 0.6529646691428299, "learning_rate": 3.4718572587185727e-06, "loss": 0.5509, "step": 12847 }, { "epoch": 0.3751131353829085, "grad_norm": 0.7319848345085442, "learning_rate": 3.4716950527169507e-06, "loss": 0.6208, "step": 12848 }, { "epoch": 0.3751423316107559, "grad_norm": 0.7675026822578764, "learning_rate": 3.4715328467153287e-06, "loss": 0.7262, "step": 12849 }, { "epoch": 0.37517152783860325, "grad_norm": 0.7823180677598197, "learning_rate": 3.4713706407137067e-06, "loss": 0.7202, "step": 12850 }, { "epoch": 0.3752007240664506, "grad_norm": 0.7335966004482256, "learning_rate": 3.4712084347120843e-06, "loss": 0.658, "step": 12851 }, { "epoch": 0.37522992029429797, "grad_norm": 0.7737116174721003, "learning_rate": 3.4710462287104623e-06, "loss": 0.6847, "step": 12852 }, { "epoch": 0.37525911652214533, "grad_norm": 0.7496165702348901, "learning_rate": 3.4708840227088403e-06, "loss": 0.6279, "step": 12853 }, { "epoch": 0.3752883127499927, "grad_norm": 0.7391279047869278, "learning_rate": 3.4707218167072183e-06, "loss": 0.6403, "step": 12854 }, { "epoch": 0.37531750897784005, "grad_norm": 0.9198260652262087, "learning_rate": 3.4705596107055967e-06, "loss": 0.6484, "step": 12855 }, { "epoch": 0.3753467052056874, "grad_norm": 0.8170771523677797, "learning_rate": 3.4703974047039748e-06, "loss": 0.6944, "step": 12856 }, { "epoch": 0.3753759014335348, "grad_norm": 0.6954161444832274, "learning_rate": 3.4702351987023523e-06, "loss": 0.6202, "step": 12857 }, { "epoch": 0.37540509766138214, "grad_norm": 0.802176665650547, "learning_rate": 3.4700729927007303e-06, "loss": 0.7475, "step": 12858 }, { "epoch": 0.3754342938892295, "grad_norm": 0.757081260387342, "learning_rate": 3.4699107866991084e-06, "loss": 0.732, "step": 12859 }, { "epoch": 0.37546349011707686, "grad_norm": 0.8910139601658966, "learning_rate": 3.4697485806974864e-06, "loss": 0.7495, "step": 12860 }, { "epoch": 0.3754926863449242, "grad_norm": 0.71701016572647, "learning_rate": 3.469586374695864e-06, "loss": 0.6893, "step": 12861 }, { "epoch": 0.3755218825727716, "grad_norm": 0.7532795716437307, "learning_rate": 3.469424168694242e-06, "loss": 0.7049, "step": 12862 }, { "epoch": 0.37555107880061894, "grad_norm": 0.7337418053373935, "learning_rate": 3.46926196269262e-06, "loss": 0.6723, "step": 12863 }, { "epoch": 0.3755802750284663, "grad_norm": 0.6488149494375306, "learning_rate": 3.469099756690998e-06, "loss": 0.5044, "step": 12864 }, { "epoch": 0.37560947125631367, "grad_norm": 0.6866064887969026, "learning_rate": 3.468937550689376e-06, "loss": 0.6169, "step": 12865 }, { "epoch": 0.375638667484161, "grad_norm": 0.7644229874787564, "learning_rate": 3.4687753446877536e-06, "loss": 0.6813, "step": 12866 }, { "epoch": 0.3756678637120084, "grad_norm": 0.7219377405656907, "learning_rate": 3.4686131386861316e-06, "loss": 0.6634, "step": 12867 }, { "epoch": 0.37569705993985575, "grad_norm": 0.7180641323394114, "learning_rate": 3.4684509326845096e-06, "loss": 0.6142, "step": 12868 }, { "epoch": 0.3757262561677031, "grad_norm": 0.7074818252671646, "learning_rate": 3.4682887266828876e-06, "loss": 0.6559, "step": 12869 }, { "epoch": 0.37575545239555047, "grad_norm": 0.7199525973391111, "learning_rate": 3.468126520681265e-06, "loss": 0.647, "step": 12870 }, { "epoch": 0.37578464862339783, "grad_norm": 0.6804719274264898, "learning_rate": 3.467964314679643e-06, "loss": 0.6092, "step": 12871 }, { "epoch": 0.3758138448512452, "grad_norm": 0.7709715711416807, "learning_rate": 3.467802108678021e-06, "loss": 0.7121, "step": 12872 }, { "epoch": 0.37584304107909255, "grad_norm": 0.7640024814318045, "learning_rate": 3.467639902676399e-06, "loss": 0.674, "step": 12873 }, { "epoch": 0.3758722373069399, "grad_norm": 0.7307877179486065, "learning_rate": 3.4674776966747776e-06, "loss": 0.6652, "step": 12874 }, { "epoch": 0.37590143353478733, "grad_norm": 0.6949077222069397, "learning_rate": 3.4673154906731556e-06, "loss": 0.6246, "step": 12875 }, { "epoch": 0.3759306297626347, "grad_norm": 0.7196349401502931, "learning_rate": 3.467153284671533e-06, "loss": 0.6955, "step": 12876 }, { "epoch": 0.37595982599048205, "grad_norm": 0.7369753008811213, "learning_rate": 3.466991078669911e-06, "loss": 0.7376, "step": 12877 }, { "epoch": 0.3759890222183294, "grad_norm": 0.7477356979213481, "learning_rate": 3.466828872668289e-06, "loss": 0.6916, "step": 12878 }, { "epoch": 0.3760182184461768, "grad_norm": 0.7214890390382648, "learning_rate": 3.4666666666666672e-06, "loss": 0.6719, "step": 12879 }, { "epoch": 0.37604741467402414, "grad_norm": 0.7970883299339363, "learning_rate": 3.466504460665045e-06, "loss": 0.7328, "step": 12880 }, { "epoch": 0.3760766109018715, "grad_norm": 0.7430151002287823, "learning_rate": 3.466342254663423e-06, "loss": 0.6988, "step": 12881 }, { "epoch": 0.37610580712971886, "grad_norm": 0.8004098787161961, "learning_rate": 3.466180048661801e-06, "loss": 0.6727, "step": 12882 }, { "epoch": 0.3761350033575662, "grad_norm": 0.688984817909649, "learning_rate": 3.466017842660179e-06, "loss": 0.5322, "step": 12883 }, { "epoch": 0.3761641995854136, "grad_norm": 0.7544854008529055, "learning_rate": 3.465855636658557e-06, "loss": 0.7164, "step": 12884 }, { "epoch": 0.37619339581326094, "grad_norm": 0.7343418089212311, "learning_rate": 3.4656934306569344e-06, "loss": 0.6332, "step": 12885 }, { "epoch": 0.3762225920411083, "grad_norm": 0.8394560006113857, "learning_rate": 3.4655312246553124e-06, "loss": 0.74, "step": 12886 }, { "epoch": 0.37625178826895567, "grad_norm": 0.6742514520232525, "learning_rate": 3.4653690186536904e-06, "loss": 0.5658, "step": 12887 }, { "epoch": 0.37628098449680303, "grad_norm": 0.7704920072598856, "learning_rate": 3.4652068126520684e-06, "loss": 0.6849, "step": 12888 }, { "epoch": 0.3763101807246504, "grad_norm": 0.7353154469464027, "learning_rate": 3.465044606650446e-06, "loss": 0.6637, "step": 12889 }, { "epoch": 0.37633937695249775, "grad_norm": 0.6806963561131205, "learning_rate": 3.464882400648824e-06, "loss": 0.5588, "step": 12890 }, { "epoch": 0.3763685731803451, "grad_norm": 0.7268645875263677, "learning_rate": 3.464720194647202e-06, "loss": 0.6682, "step": 12891 }, { "epoch": 0.3763977694081925, "grad_norm": 0.70423230823616, "learning_rate": 3.46455798864558e-06, "loss": 0.6041, "step": 12892 }, { "epoch": 0.37642696563603983, "grad_norm": 0.737951799625394, "learning_rate": 3.4643957826439585e-06, "loss": 0.6819, "step": 12893 }, { "epoch": 0.3764561618638872, "grad_norm": 0.8215903523502438, "learning_rate": 3.4642335766423365e-06, "loss": 0.6473, "step": 12894 }, { "epoch": 0.37648535809173456, "grad_norm": 0.7566543783034247, "learning_rate": 3.464071370640714e-06, "loss": 0.6993, "step": 12895 }, { "epoch": 0.3765145543195819, "grad_norm": 0.7252908669277969, "learning_rate": 3.463909164639092e-06, "loss": 0.6665, "step": 12896 }, { "epoch": 0.3765437505474293, "grad_norm": 0.7405268799928197, "learning_rate": 3.46374695863747e-06, "loss": 0.6575, "step": 12897 }, { "epoch": 0.37657294677527664, "grad_norm": 0.7638211575096744, "learning_rate": 3.463584752635848e-06, "loss": 0.7155, "step": 12898 }, { "epoch": 0.376602143003124, "grad_norm": 0.7489906212174883, "learning_rate": 3.4634225466342257e-06, "loss": 0.687, "step": 12899 }, { "epoch": 0.37663133923097136, "grad_norm": 0.702410463380222, "learning_rate": 3.4632603406326037e-06, "loss": 0.5896, "step": 12900 }, { "epoch": 0.3766605354588187, "grad_norm": 0.6925077879821245, "learning_rate": 3.4630981346309817e-06, "loss": 0.6644, "step": 12901 }, { "epoch": 0.3766897316866661, "grad_norm": 0.7680949798613275, "learning_rate": 3.4629359286293597e-06, "loss": 0.6562, "step": 12902 }, { "epoch": 0.37671892791451345, "grad_norm": 0.7268792888381804, "learning_rate": 3.4627737226277377e-06, "loss": 0.6213, "step": 12903 }, { "epoch": 0.3767481241423608, "grad_norm": 0.6991460030500573, "learning_rate": 3.4626115166261153e-06, "loss": 0.5992, "step": 12904 }, { "epoch": 0.37677732037020817, "grad_norm": 0.7082550704951707, "learning_rate": 3.4624493106244933e-06, "loss": 0.6457, "step": 12905 }, { "epoch": 0.37680651659805553, "grad_norm": 0.8265249511161896, "learning_rate": 3.4622871046228713e-06, "loss": 0.7079, "step": 12906 }, { "epoch": 0.3768357128259029, "grad_norm": 0.6945818951882091, "learning_rate": 3.4621248986212493e-06, "loss": 0.6346, "step": 12907 }, { "epoch": 0.37686490905375025, "grad_norm": 0.698376053094385, "learning_rate": 3.461962692619627e-06, "loss": 0.6225, "step": 12908 }, { "epoch": 0.3768941052815976, "grad_norm": 0.7370015139862289, "learning_rate": 3.461800486618005e-06, "loss": 0.6727, "step": 12909 }, { "epoch": 0.376923301509445, "grad_norm": 0.7164700459074577, "learning_rate": 3.461638280616383e-06, "loss": 0.566, "step": 12910 }, { "epoch": 0.37695249773729234, "grad_norm": 0.8034885835323297, "learning_rate": 3.461476074614761e-06, "loss": 0.7977, "step": 12911 }, { "epoch": 0.3769816939651397, "grad_norm": 0.7186493146172018, "learning_rate": 3.4613138686131393e-06, "loss": 0.6357, "step": 12912 }, { "epoch": 0.37701089019298706, "grad_norm": 0.8001096311726268, "learning_rate": 3.4611516626115173e-06, "loss": 0.7414, "step": 12913 }, { "epoch": 0.3770400864208344, "grad_norm": 0.7253405267419338, "learning_rate": 3.460989456609895e-06, "loss": 0.6546, "step": 12914 }, { "epoch": 0.3770692826486818, "grad_norm": 1.1086172375325267, "learning_rate": 3.460827250608273e-06, "loss": 0.7128, "step": 12915 }, { "epoch": 0.37709847887652914, "grad_norm": 0.7215022199592559, "learning_rate": 3.460665044606651e-06, "loss": 0.6482, "step": 12916 }, { "epoch": 0.3771276751043765, "grad_norm": 0.6892570024774612, "learning_rate": 3.460502838605029e-06, "loss": 0.5885, "step": 12917 }, { "epoch": 0.37715687133222386, "grad_norm": 0.8347910755671222, "learning_rate": 3.4603406326034065e-06, "loss": 0.6155, "step": 12918 }, { "epoch": 0.3771860675600712, "grad_norm": 0.7256364914463123, "learning_rate": 3.4601784266017845e-06, "loss": 0.6791, "step": 12919 }, { "epoch": 0.3772152637879186, "grad_norm": 0.8103686228718512, "learning_rate": 3.4600162206001625e-06, "loss": 0.7595, "step": 12920 }, { "epoch": 0.37724446001576595, "grad_norm": 0.7114857123849052, "learning_rate": 3.4598540145985405e-06, "loss": 0.6081, "step": 12921 }, { "epoch": 0.3772736562436133, "grad_norm": 0.7115393739141248, "learning_rate": 3.4596918085969185e-06, "loss": 0.6085, "step": 12922 }, { "epoch": 0.37730285247146067, "grad_norm": 0.695103400450689, "learning_rate": 3.459529602595296e-06, "loss": 0.6324, "step": 12923 }, { "epoch": 0.37733204869930803, "grad_norm": 0.782829413958098, "learning_rate": 3.459367396593674e-06, "loss": 0.7331, "step": 12924 }, { "epoch": 0.3773612449271554, "grad_norm": 0.6984903823992253, "learning_rate": 3.459205190592052e-06, "loss": 0.625, "step": 12925 }, { "epoch": 0.37739044115500275, "grad_norm": 0.7200413310907372, "learning_rate": 3.45904298459043e-06, "loss": 0.617, "step": 12926 }, { "epoch": 0.3774196373828501, "grad_norm": 1.1516760834338602, "learning_rate": 3.4588807785888077e-06, "loss": 0.7793, "step": 12927 }, { "epoch": 0.3774488336106975, "grad_norm": 0.8542378157446767, "learning_rate": 3.4587185725871857e-06, "loss": 0.6694, "step": 12928 }, { "epoch": 0.37747802983854484, "grad_norm": 0.7805273063986522, "learning_rate": 3.4585563665855637e-06, "loss": 0.6397, "step": 12929 }, { "epoch": 0.3775072260663922, "grad_norm": 0.6989564419982033, "learning_rate": 3.4583941605839418e-06, "loss": 0.6427, "step": 12930 }, { "epoch": 0.37753642229423956, "grad_norm": 0.8016620373107216, "learning_rate": 3.45823195458232e-06, "loss": 0.7297, "step": 12931 }, { "epoch": 0.3775656185220869, "grad_norm": 0.6883636812614472, "learning_rate": 3.458069748580698e-06, "loss": 0.6018, "step": 12932 }, { "epoch": 0.3775948147499343, "grad_norm": 0.7300415447253722, "learning_rate": 3.4579075425790758e-06, "loss": 0.675, "step": 12933 }, { "epoch": 0.37762401097778164, "grad_norm": 0.6860826521251943, "learning_rate": 3.4577453365774538e-06, "loss": 0.6277, "step": 12934 }, { "epoch": 0.37765320720562906, "grad_norm": 0.6602869997828174, "learning_rate": 3.4575831305758318e-06, "loss": 0.5451, "step": 12935 }, { "epoch": 0.3776824034334764, "grad_norm": 0.7401087190826121, "learning_rate": 3.45742092457421e-06, "loss": 0.6173, "step": 12936 }, { "epoch": 0.3777115996613238, "grad_norm": 0.6694914762797588, "learning_rate": 3.4572587185725874e-06, "loss": 0.5678, "step": 12937 }, { "epoch": 0.37774079588917114, "grad_norm": 0.6792060929558977, "learning_rate": 3.4570965125709654e-06, "loss": 0.5572, "step": 12938 }, { "epoch": 0.3777699921170185, "grad_norm": 0.7151476474041082, "learning_rate": 3.4569343065693434e-06, "loss": 0.663, "step": 12939 }, { "epoch": 0.37779918834486587, "grad_norm": 0.7680763001291979, "learning_rate": 3.4567721005677214e-06, "loss": 0.7297, "step": 12940 }, { "epoch": 0.3778283845727132, "grad_norm": 0.7677423559317431, "learning_rate": 3.456609894566099e-06, "loss": 0.6651, "step": 12941 }, { "epoch": 0.3778575808005606, "grad_norm": 0.7092914872505851, "learning_rate": 3.456447688564477e-06, "loss": 0.6389, "step": 12942 }, { "epoch": 0.37788677702840795, "grad_norm": 0.7758474247652881, "learning_rate": 3.456285482562855e-06, "loss": 0.7274, "step": 12943 }, { "epoch": 0.3779159732562553, "grad_norm": 0.7233968706346094, "learning_rate": 3.456123276561233e-06, "loss": 0.638, "step": 12944 }, { "epoch": 0.37794516948410267, "grad_norm": 0.7065438137915573, "learning_rate": 3.455961070559611e-06, "loss": 0.6673, "step": 12945 }, { "epoch": 0.37797436571195003, "grad_norm": 0.8415282032273914, "learning_rate": 3.4557988645579886e-06, "loss": 0.7661, "step": 12946 }, { "epoch": 0.3780035619397974, "grad_norm": 0.758452320998194, "learning_rate": 3.4556366585563666e-06, "loss": 0.6607, "step": 12947 }, { "epoch": 0.37803275816764476, "grad_norm": 0.6850200185134214, "learning_rate": 3.4554744525547446e-06, "loss": 0.579, "step": 12948 }, { "epoch": 0.3780619543954921, "grad_norm": 0.7443779493830442, "learning_rate": 3.4553122465531226e-06, "loss": 0.6734, "step": 12949 }, { "epoch": 0.3780911506233395, "grad_norm": 0.7810943903965349, "learning_rate": 3.455150040551501e-06, "loss": 0.7613, "step": 12950 }, { "epoch": 0.37812034685118684, "grad_norm": 0.8593652337133689, "learning_rate": 3.454987834549879e-06, "loss": 0.7557, "step": 12951 }, { "epoch": 0.3781495430790342, "grad_norm": 0.7539260499272694, "learning_rate": 3.4548256285482566e-06, "loss": 0.6633, "step": 12952 }, { "epoch": 0.37817873930688156, "grad_norm": 0.6973111323202218, "learning_rate": 3.4546634225466346e-06, "loss": 0.6319, "step": 12953 }, { "epoch": 0.3782079355347289, "grad_norm": 0.848263599991923, "learning_rate": 3.4545012165450126e-06, "loss": 0.703, "step": 12954 }, { "epoch": 0.3782371317625763, "grad_norm": 0.7775595114816277, "learning_rate": 3.4543390105433907e-06, "loss": 0.7129, "step": 12955 }, { "epoch": 0.37826632799042365, "grad_norm": 0.6809924754298458, "learning_rate": 3.4541768045417682e-06, "loss": 0.6058, "step": 12956 }, { "epoch": 0.378295524218271, "grad_norm": 0.7985694436712178, "learning_rate": 3.4540145985401462e-06, "loss": 0.7621, "step": 12957 }, { "epoch": 0.37832472044611837, "grad_norm": 0.7380418334080905, "learning_rate": 3.4538523925385242e-06, "loss": 0.6947, "step": 12958 }, { "epoch": 0.37835391667396573, "grad_norm": 0.7518854172170297, "learning_rate": 3.4536901865369023e-06, "loss": 0.6933, "step": 12959 }, { "epoch": 0.3783831129018131, "grad_norm": 0.7702635257971728, "learning_rate": 3.45352798053528e-06, "loss": 0.7045, "step": 12960 }, { "epoch": 0.37841230912966045, "grad_norm": 0.9660381427530405, "learning_rate": 3.453365774533658e-06, "loss": 0.6254, "step": 12961 }, { "epoch": 0.3784415053575078, "grad_norm": 0.7611388033789039, "learning_rate": 3.453203568532036e-06, "loss": 0.7288, "step": 12962 }, { "epoch": 0.3784707015853552, "grad_norm": 0.7390890747631021, "learning_rate": 3.453041362530414e-06, "loss": 0.6687, "step": 12963 }, { "epoch": 0.37849989781320253, "grad_norm": 0.7132761995193388, "learning_rate": 3.452879156528792e-06, "loss": 0.5983, "step": 12964 }, { "epoch": 0.3785290940410499, "grad_norm": 0.7230747186188873, "learning_rate": 3.4527169505271694e-06, "loss": 0.6699, "step": 12965 }, { "epoch": 0.37855829026889726, "grad_norm": 0.6696046320184237, "learning_rate": 3.4525547445255475e-06, "loss": 0.5832, "step": 12966 }, { "epoch": 0.3785874864967446, "grad_norm": 0.7798374840836512, "learning_rate": 3.4523925385239255e-06, "loss": 0.7006, "step": 12967 }, { "epoch": 0.378616682724592, "grad_norm": 0.7785768310964953, "learning_rate": 3.452230332522304e-06, "loss": 0.6562, "step": 12968 }, { "epoch": 0.37864587895243934, "grad_norm": 0.7307676192208308, "learning_rate": 3.452068126520682e-06, "loss": 0.6369, "step": 12969 }, { "epoch": 0.3786750751802867, "grad_norm": 0.7241194654759704, "learning_rate": 3.45190592051906e-06, "loss": 0.6563, "step": 12970 }, { "epoch": 0.37870427140813406, "grad_norm": 0.7501534869174354, "learning_rate": 3.4517437145174375e-06, "loss": 0.6553, "step": 12971 }, { "epoch": 0.3787334676359814, "grad_norm": 0.7666648961370917, "learning_rate": 3.4515815085158155e-06, "loss": 0.7308, "step": 12972 }, { "epoch": 0.3787626638638288, "grad_norm": 0.7685903836440169, "learning_rate": 3.4514193025141935e-06, "loss": 0.6493, "step": 12973 }, { "epoch": 0.37879186009167615, "grad_norm": 0.7060411520933607, "learning_rate": 3.4512570965125715e-06, "loss": 0.6332, "step": 12974 }, { "epoch": 0.3788210563195235, "grad_norm": 0.8164064364535681, "learning_rate": 3.451094890510949e-06, "loss": 0.7002, "step": 12975 }, { "epoch": 0.37885025254737087, "grad_norm": 0.7349458328595377, "learning_rate": 3.450932684509327e-06, "loss": 0.6545, "step": 12976 }, { "epoch": 0.37887944877521823, "grad_norm": 0.7175549435326464, "learning_rate": 3.450770478507705e-06, "loss": 0.6371, "step": 12977 }, { "epoch": 0.3789086450030656, "grad_norm": 0.7411186153529988, "learning_rate": 3.450608272506083e-06, "loss": 0.6955, "step": 12978 }, { "epoch": 0.37893784123091295, "grad_norm": 0.718705475384518, "learning_rate": 3.4504460665044607e-06, "loss": 0.647, "step": 12979 }, { "epoch": 0.3789670374587603, "grad_norm": 0.7247672609467694, "learning_rate": 3.4502838605028387e-06, "loss": 0.6625, "step": 12980 }, { "epoch": 0.3789962336866077, "grad_norm": 0.9494791542367315, "learning_rate": 3.4501216545012167e-06, "loss": 0.6865, "step": 12981 }, { "epoch": 0.37902542991445504, "grad_norm": 0.7703540450407277, "learning_rate": 3.4499594484995947e-06, "loss": 0.7525, "step": 12982 }, { "epoch": 0.3790546261423024, "grad_norm": 0.6962178198808052, "learning_rate": 3.4497972424979727e-06, "loss": 0.6184, "step": 12983 }, { "epoch": 0.37908382237014976, "grad_norm": 0.7703555405879857, "learning_rate": 3.4496350364963503e-06, "loss": 0.6957, "step": 12984 }, { "epoch": 0.3791130185979971, "grad_norm": 0.9150500420624296, "learning_rate": 3.4494728304947283e-06, "loss": 0.754, "step": 12985 }, { "epoch": 0.3791422148258445, "grad_norm": 0.747927030346189, "learning_rate": 3.4493106244931063e-06, "loss": 0.6819, "step": 12986 }, { "epoch": 0.37917141105369184, "grad_norm": 0.8379623137096462, "learning_rate": 3.4491484184914848e-06, "loss": 0.7572, "step": 12987 }, { "epoch": 0.3792006072815392, "grad_norm": 0.7742350637216576, "learning_rate": 3.4489862124898628e-06, "loss": 0.7302, "step": 12988 }, { "epoch": 0.37922980350938656, "grad_norm": 0.8444004082887945, "learning_rate": 3.4488240064882408e-06, "loss": 0.6234, "step": 12989 }, { "epoch": 0.3792589997372339, "grad_norm": 0.66613021207907, "learning_rate": 3.4486618004866183e-06, "loss": 0.6138, "step": 12990 }, { "epoch": 0.3792881959650813, "grad_norm": 0.7946351067024378, "learning_rate": 3.4484995944849964e-06, "loss": 0.6908, "step": 12991 }, { "epoch": 0.37931739219292865, "grad_norm": 0.7074187056705508, "learning_rate": 3.4483373884833744e-06, "loss": 0.5825, "step": 12992 }, { "epoch": 0.379346588420776, "grad_norm": 0.7334382523865789, "learning_rate": 3.4481751824817524e-06, "loss": 0.6667, "step": 12993 }, { "epoch": 0.37937578464862337, "grad_norm": 0.766405101689738, "learning_rate": 3.44801297648013e-06, "loss": 0.704, "step": 12994 }, { "epoch": 0.3794049808764708, "grad_norm": 0.7268962119440701, "learning_rate": 3.447850770478508e-06, "loss": 0.6882, "step": 12995 }, { "epoch": 0.37943417710431815, "grad_norm": 0.767561243198097, "learning_rate": 3.447688564476886e-06, "loss": 0.6816, "step": 12996 }, { "epoch": 0.3794633733321655, "grad_norm": 0.6590859819165801, "learning_rate": 3.447526358475264e-06, "loss": 0.5598, "step": 12997 }, { "epoch": 0.37949256956001287, "grad_norm": 0.7408238587395851, "learning_rate": 3.4473641524736416e-06, "loss": 0.6442, "step": 12998 }, { "epoch": 0.37952176578786023, "grad_norm": 0.8185122115446672, "learning_rate": 3.4472019464720196e-06, "loss": 0.7697, "step": 12999 }, { "epoch": 0.3795509620157076, "grad_norm": 0.7609162479452041, "learning_rate": 3.4470397404703976e-06, "loss": 0.7222, "step": 13000 }, { "epoch": 0.37958015824355495, "grad_norm": 0.7109729750042775, "learning_rate": 3.4468775344687756e-06, "loss": 0.6012, "step": 13001 }, { "epoch": 0.3796093544714023, "grad_norm": 0.8617907398625212, "learning_rate": 3.4467153284671536e-06, "loss": 0.7036, "step": 13002 }, { "epoch": 0.3796385506992497, "grad_norm": 0.7712859838615801, "learning_rate": 3.446553122465531e-06, "loss": 0.7058, "step": 13003 }, { "epoch": 0.37966774692709704, "grad_norm": 0.6916382517621613, "learning_rate": 3.446390916463909e-06, "loss": 0.6035, "step": 13004 }, { "epoch": 0.3796969431549444, "grad_norm": 0.7930529805250788, "learning_rate": 3.446228710462287e-06, "loss": 0.7103, "step": 13005 }, { "epoch": 0.37972613938279176, "grad_norm": 0.7738966274207916, "learning_rate": 3.4460665044606656e-06, "loss": 0.6871, "step": 13006 }, { "epoch": 0.3797553356106391, "grad_norm": 0.7583746257355913, "learning_rate": 3.4459042984590436e-06, "loss": 0.7446, "step": 13007 }, { "epoch": 0.3797845318384865, "grad_norm": 0.8589683368210828, "learning_rate": 3.4457420924574216e-06, "loss": 0.7026, "step": 13008 }, { "epoch": 0.37981372806633384, "grad_norm": 0.6863024101762134, "learning_rate": 3.445579886455799e-06, "loss": 0.5681, "step": 13009 }, { "epoch": 0.3798429242941812, "grad_norm": 0.6751248226602191, "learning_rate": 3.4454176804541772e-06, "loss": 0.5821, "step": 13010 }, { "epoch": 0.37987212052202857, "grad_norm": 0.6982994077699273, "learning_rate": 3.4452554744525552e-06, "loss": 0.6174, "step": 13011 }, { "epoch": 0.37990131674987593, "grad_norm": 0.7269678467969105, "learning_rate": 3.4450932684509332e-06, "loss": 0.6479, "step": 13012 }, { "epoch": 0.3799305129777233, "grad_norm": 0.749015914237623, "learning_rate": 3.444931062449311e-06, "loss": 0.6683, "step": 13013 }, { "epoch": 0.37995970920557065, "grad_norm": 0.8697921210782694, "learning_rate": 3.444768856447689e-06, "loss": 0.7756, "step": 13014 }, { "epoch": 0.379988905433418, "grad_norm": 0.8079614160363127, "learning_rate": 3.444606650446067e-06, "loss": 0.6422, "step": 13015 }, { "epoch": 0.3800181016612654, "grad_norm": 0.7198758551149672, "learning_rate": 3.444444444444445e-06, "loss": 0.6402, "step": 13016 }, { "epoch": 0.38004729788911273, "grad_norm": 0.7505862012957942, "learning_rate": 3.4442822384428224e-06, "loss": 0.619, "step": 13017 }, { "epoch": 0.3800764941169601, "grad_norm": 0.7528933688611475, "learning_rate": 3.4441200324412004e-06, "loss": 0.6803, "step": 13018 }, { "epoch": 0.38010569034480746, "grad_norm": 0.837391766690981, "learning_rate": 3.4439578264395784e-06, "loss": 0.7327, "step": 13019 }, { "epoch": 0.3801348865726548, "grad_norm": 0.7939689538880104, "learning_rate": 3.4437956204379564e-06, "loss": 0.6732, "step": 13020 }, { "epoch": 0.3801640828005022, "grad_norm": 0.6985071994432588, "learning_rate": 3.4436334144363344e-06, "loss": 0.6414, "step": 13021 }, { "epoch": 0.38019327902834954, "grad_norm": 0.7869434388871265, "learning_rate": 3.443471208434712e-06, "loss": 0.7583, "step": 13022 }, { "epoch": 0.3802224752561969, "grad_norm": 0.7668833057313503, "learning_rate": 3.44330900243309e-06, "loss": 0.735, "step": 13023 }, { "epoch": 0.38025167148404426, "grad_norm": 0.6900192146173981, "learning_rate": 3.443146796431468e-06, "loss": 0.6091, "step": 13024 }, { "epoch": 0.3802808677118916, "grad_norm": 0.7242826278373709, "learning_rate": 3.4429845904298465e-06, "loss": 0.6821, "step": 13025 }, { "epoch": 0.380310063939739, "grad_norm": 0.7492946894418898, "learning_rate": 3.4428223844282245e-06, "loss": 0.6866, "step": 13026 }, { "epoch": 0.38033926016758635, "grad_norm": 0.7283506286935145, "learning_rate": 3.4426601784266025e-06, "loss": 0.6667, "step": 13027 }, { "epoch": 0.3803684563954337, "grad_norm": 0.7437834179505269, "learning_rate": 3.44249797242498e-06, "loss": 0.6645, "step": 13028 }, { "epoch": 0.38039765262328107, "grad_norm": 0.7383240670961789, "learning_rate": 3.442335766423358e-06, "loss": 0.6526, "step": 13029 }, { "epoch": 0.38042684885112843, "grad_norm": 0.7096231804191235, "learning_rate": 3.442173560421736e-06, "loss": 0.5968, "step": 13030 }, { "epoch": 0.3804560450789758, "grad_norm": 0.6866670024644399, "learning_rate": 3.442011354420114e-06, "loss": 0.6596, "step": 13031 }, { "epoch": 0.38048524130682315, "grad_norm": 0.7547011056337763, "learning_rate": 3.4418491484184917e-06, "loss": 0.6399, "step": 13032 }, { "epoch": 0.3805144375346705, "grad_norm": 0.7632029784980549, "learning_rate": 3.4416869424168697e-06, "loss": 0.6337, "step": 13033 }, { "epoch": 0.3805436337625179, "grad_norm": 0.8611185698005099, "learning_rate": 3.4415247364152477e-06, "loss": 0.7508, "step": 13034 }, { "epoch": 0.38057282999036524, "grad_norm": 0.8096717046523814, "learning_rate": 3.4413625304136257e-06, "loss": 0.6582, "step": 13035 }, { "epoch": 0.3806020262182126, "grad_norm": 0.7001255388311103, "learning_rate": 3.4412003244120033e-06, "loss": 0.6564, "step": 13036 }, { "epoch": 0.38063122244605996, "grad_norm": 0.7441349803538972, "learning_rate": 3.4410381184103813e-06, "loss": 0.6669, "step": 13037 }, { "epoch": 0.3806604186739073, "grad_norm": 0.7623798487837454, "learning_rate": 3.4408759124087593e-06, "loss": 0.6968, "step": 13038 }, { "epoch": 0.3806896149017547, "grad_norm": 0.6643843208243017, "learning_rate": 3.4407137064071373e-06, "loss": 0.5855, "step": 13039 }, { "epoch": 0.38071881112960204, "grad_norm": 0.7145105744655438, "learning_rate": 3.4405515004055153e-06, "loss": 0.6434, "step": 13040 }, { "epoch": 0.3807480073574494, "grad_norm": 0.7354303450462591, "learning_rate": 3.440389294403893e-06, "loss": 0.6412, "step": 13041 }, { "epoch": 0.38077720358529676, "grad_norm": 0.735818155680512, "learning_rate": 3.440227088402271e-06, "loss": 0.6825, "step": 13042 }, { "epoch": 0.3808063998131441, "grad_norm": 0.8097500403678705, "learning_rate": 3.440064882400649e-06, "loss": 0.7134, "step": 13043 }, { "epoch": 0.3808355960409915, "grad_norm": 0.8322143728692794, "learning_rate": 3.4399026763990273e-06, "loss": 0.7502, "step": 13044 }, { "epoch": 0.38086479226883885, "grad_norm": 0.9047128175074153, "learning_rate": 3.4397404703974053e-06, "loss": 0.7378, "step": 13045 }, { "epoch": 0.3808939884966862, "grad_norm": 0.7521856048763618, "learning_rate": 3.4395782643957833e-06, "loss": 0.6876, "step": 13046 }, { "epoch": 0.38092318472453357, "grad_norm": 0.7511801422381604, "learning_rate": 3.439416058394161e-06, "loss": 0.7102, "step": 13047 }, { "epoch": 0.38095238095238093, "grad_norm": 0.7262605865668166, "learning_rate": 3.439253852392539e-06, "loss": 0.6518, "step": 13048 }, { "epoch": 0.3809815771802283, "grad_norm": 0.6776224129380345, "learning_rate": 3.439091646390917e-06, "loss": 0.5713, "step": 13049 }, { "epoch": 0.38101077340807565, "grad_norm": 0.7408853454145841, "learning_rate": 3.438929440389295e-06, "loss": 0.7074, "step": 13050 }, { "epoch": 0.381039969635923, "grad_norm": 0.6775599185910575, "learning_rate": 3.4387672343876725e-06, "loss": 0.5629, "step": 13051 }, { "epoch": 0.3810691658637704, "grad_norm": 0.709274073740694, "learning_rate": 3.4386050283860505e-06, "loss": 0.6413, "step": 13052 }, { "epoch": 0.38109836209161774, "grad_norm": 0.7502129867667227, "learning_rate": 3.4384428223844285e-06, "loss": 0.7105, "step": 13053 }, { "epoch": 0.3811275583194651, "grad_norm": 0.75788582777005, "learning_rate": 3.4382806163828065e-06, "loss": 0.6896, "step": 13054 }, { "epoch": 0.38115675454731246, "grad_norm": 0.7871457487537249, "learning_rate": 3.438118410381184e-06, "loss": 0.6409, "step": 13055 }, { "epoch": 0.3811859507751599, "grad_norm": 0.8614627847298396, "learning_rate": 3.437956204379562e-06, "loss": 0.6673, "step": 13056 }, { "epoch": 0.38121514700300724, "grad_norm": 0.7103782895029255, "learning_rate": 3.43779399837794e-06, "loss": 0.6097, "step": 13057 }, { "epoch": 0.3812443432308546, "grad_norm": 0.7288452524562786, "learning_rate": 3.437631792376318e-06, "loss": 0.6496, "step": 13058 }, { "epoch": 0.38127353945870196, "grad_norm": 0.7829888512834764, "learning_rate": 3.437469586374696e-06, "loss": 0.6657, "step": 13059 }, { "epoch": 0.3813027356865493, "grad_norm": 0.7835260410817351, "learning_rate": 3.4373073803730737e-06, "loss": 0.7161, "step": 13060 }, { "epoch": 0.3813319319143967, "grad_norm": 0.8504837704357744, "learning_rate": 3.4371451743714517e-06, "loss": 0.653, "step": 13061 }, { "epoch": 0.38136112814224404, "grad_norm": 0.7712915532954511, "learning_rate": 3.4369829683698298e-06, "loss": 0.6795, "step": 13062 }, { "epoch": 0.3813903243700914, "grad_norm": 0.6843666423881554, "learning_rate": 3.436820762368208e-06, "loss": 0.5778, "step": 13063 }, { "epoch": 0.38141952059793877, "grad_norm": 0.755138136339036, "learning_rate": 3.436658556366586e-06, "loss": 0.7222, "step": 13064 }, { "epoch": 0.3814487168257861, "grad_norm": 0.7534279328464559, "learning_rate": 3.4364963503649638e-06, "loss": 0.7063, "step": 13065 }, { "epoch": 0.3814779130536335, "grad_norm": 0.7115354998509603, "learning_rate": 3.4363341443633418e-06, "loss": 0.6229, "step": 13066 }, { "epoch": 0.38150710928148085, "grad_norm": 0.7988216183926938, "learning_rate": 3.43617193836172e-06, "loss": 0.6595, "step": 13067 }, { "epoch": 0.3815363055093282, "grad_norm": 0.7581353541847167, "learning_rate": 3.436009732360098e-06, "loss": 0.6891, "step": 13068 }, { "epoch": 0.38156550173717557, "grad_norm": 0.7411168344138207, "learning_rate": 3.435847526358476e-06, "loss": 0.6648, "step": 13069 }, { "epoch": 0.38159469796502293, "grad_norm": 0.7331621824065845, "learning_rate": 3.4356853203568534e-06, "loss": 0.6524, "step": 13070 }, { "epoch": 0.3816238941928703, "grad_norm": 0.6730673811084925, "learning_rate": 3.4355231143552314e-06, "loss": 0.6024, "step": 13071 }, { "epoch": 0.38165309042071766, "grad_norm": 0.820623737792715, "learning_rate": 3.4353609083536094e-06, "loss": 0.6998, "step": 13072 }, { "epoch": 0.381682286648565, "grad_norm": 0.7231469170503325, "learning_rate": 3.4351987023519874e-06, "loss": 0.6972, "step": 13073 }, { "epoch": 0.3817114828764124, "grad_norm": 0.6794170162768722, "learning_rate": 3.435036496350365e-06, "loss": 0.6059, "step": 13074 }, { "epoch": 0.38174067910425974, "grad_norm": 0.7189447333461982, "learning_rate": 3.434874290348743e-06, "loss": 0.5925, "step": 13075 }, { "epoch": 0.3817698753321071, "grad_norm": 0.7307186933690604, "learning_rate": 3.434712084347121e-06, "loss": 0.682, "step": 13076 }, { "epoch": 0.38179907155995446, "grad_norm": 0.7197548525009582, "learning_rate": 3.434549878345499e-06, "loss": 0.6317, "step": 13077 }, { "epoch": 0.3818282677878018, "grad_norm": 0.7546351458496346, "learning_rate": 3.434387672343877e-06, "loss": 0.6835, "step": 13078 }, { "epoch": 0.3818574640156492, "grad_norm": 0.7459007673652325, "learning_rate": 3.4342254663422546e-06, "loss": 0.6475, "step": 13079 }, { "epoch": 0.38188666024349655, "grad_norm": 0.8138988780457841, "learning_rate": 3.4340632603406326e-06, "loss": 0.7626, "step": 13080 }, { "epoch": 0.3819158564713439, "grad_norm": 0.6771036656384241, "learning_rate": 3.4339010543390106e-06, "loss": 0.5232, "step": 13081 }, { "epoch": 0.38194505269919127, "grad_norm": 0.6903218912031234, "learning_rate": 3.433738848337389e-06, "loss": 0.6044, "step": 13082 }, { "epoch": 0.38197424892703863, "grad_norm": 0.6795620949183466, "learning_rate": 3.433576642335767e-06, "loss": 0.6287, "step": 13083 }, { "epoch": 0.382003445154886, "grad_norm": 0.6841610590369885, "learning_rate": 3.4334144363341446e-06, "loss": 0.5936, "step": 13084 }, { "epoch": 0.38203264138273335, "grad_norm": 0.7392639453178811, "learning_rate": 3.4332522303325226e-06, "loss": 0.64, "step": 13085 }, { "epoch": 0.3820618376105807, "grad_norm": 0.8487711751897258, "learning_rate": 3.4330900243309006e-06, "loss": 0.7821, "step": 13086 }, { "epoch": 0.3820910338384281, "grad_norm": 0.7306638980593951, "learning_rate": 3.4329278183292787e-06, "loss": 0.6089, "step": 13087 }, { "epoch": 0.38212023006627543, "grad_norm": 0.7342192959470265, "learning_rate": 3.4327656123276567e-06, "loss": 0.6791, "step": 13088 }, { "epoch": 0.3821494262941228, "grad_norm": 0.7434073536875891, "learning_rate": 3.4326034063260342e-06, "loss": 0.674, "step": 13089 }, { "epoch": 0.38217862252197016, "grad_norm": 0.736885492479351, "learning_rate": 3.4324412003244123e-06, "loss": 0.6482, "step": 13090 }, { "epoch": 0.3822078187498175, "grad_norm": 0.7305585569722695, "learning_rate": 3.4322789943227903e-06, "loss": 0.6458, "step": 13091 }, { "epoch": 0.3822370149776649, "grad_norm": 0.7485895377182064, "learning_rate": 3.4321167883211683e-06, "loss": 0.7108, "step": 13092 }, { "epoch": 0.38226621120551224, "grad_norm": 0.7361834231695072, "learning_rate": 3.431954582319546e-06, "loss": 0.6937, "step": 13093 }, { "epoch": 0.3822954074333596, "grad_norm": 0.9210134920345072, "learning_rate": 3.431792376317924e-06, "loss": 0.7672, "step": 13094 }, { "epoch": 0.38232460366120696, "grad_norm": 0.7344792091408737, "learning_rate": 3.431630170316302e-06, "loss": 0.6742, "step": 13095 }, { "epoch": 0.3823537998890543, "grad_norm": 0.7048489893158264, "learning_rate": 3.43146796431468e-06, "loss": 0.6223, "step": 13096 }, { "epoch": 0.3823829961169017, "grad_norm": 0.7311681109474063, "learning_rate": 3.431305758313058e-06, "loss": 0.6601, "step": 13097 }, { "epoch": 0.38241219234474905, "grad_norm": 0.681864137724934, "learning_rate": 3.4311435523114355e-06, "loss": 0.5845, "step": 13098 }, { "epoch": 0.3824413885725964, "grad_norm": 0.7358294838175137, "learning_rate": 3.4309813463098135e-06, "loss": 0.702, "step": 13099 }, { "epoch": 0.38247058480044377, "grad_norm": 0.6790696405143766, "learning_rate": 3.4308191403081915e-06, "loss": 0.5786, "step": 13100 }, { "epoch": 0.38249978102829113, "grad_norm": 0.6894173132198397, "learning_rate": 3.43065693430657e-06, "loss": 0.5177, "step": 13101 }, { "epoch": 0.3825289772561385, "grad_norm": 0.696545922181942, "learning_rate": 3.430494728304948e-06, "loss": 0.6071, "step": 13102 }, { "epoch": 0.38255817348398585, "grad_norm": 0.7172722735640998, "learning_rate": 3.4303325223033255e-06, "loss": 0.5885, "step": 13103 }, { "epoch": 0.3825873697118332, "grad_norm": 0.8032567952203568, "learning_rate": 3.4301703163017035e-06, "loss": 0.7376, "step": 13104 }, { "epoch": 0.3826165659396806, "grad_norm": 0.7270071151484693, "learning_rate": 3.4300081103000815e-06, "loss": 0.6899, "step": 13105 }, { "epoch": 0.38264576216752794, "grad_norm": 0.692860572914981, "learning_rate": 3.4298459042984595e-06, "loss": 0.5791, "step": 13106 }, { "epoch": 0.3826749583953753, "grad_norm": 0.737673899276648, "learning_rate": 3.4296836982968375e-06, "loss": 0.732, "step": 13107 }, { "epoch": 0.38270415462322266, "grad_norm": 0.7667485420454428, "learning_rate": 3.429521492295215e-06, "loss": 0.673, "step": 13108 }, { "epoch": 0.38273335085107, "grad_norm": 0.7528156397945401, "learning_rate": 3.429359286293593e-06, "loss": 0.6754, "step": 13109 }, { "epoch": 0.3827625470789174, "grad_norm": 0.6779069183468509, "learning_rate": 3.429197080291971e-06, "loss": 0.5832, "step": 13110 }, { "epoch": 0.38279174330676474, "grad_norm": 0.6150864122579116, "learning_rate": 3.429034874290349e-06, "loss": 0.4691, "step": 13111 }, { "epoch": 0.3828209395346121, "grad_norm": 0.7674223753334356, "learning_rate": 3.4288726682887267e-06, "loss": 0.6749, "step": 13112 }, { "epoch": 0.38285013576245946, "grad_norm": 0.7483483974161181, "learning_rate": 3.4287104622871047e-06, "loss": 0.6519, "step": 13113 }, { "epoch": 0.3828793319903068, "grad_norm": 0.7333680475963231, "learning_rate": 3.4285482562854827e-06, "loss": 0.6733, "step": 13114 }, { "epoch": 0.3829085282181542, "grad_norm": 0.8015151278191107, "learning_rate": 3.4283860502838607e-06, "loss": 0.8132, "step": 13115 }, { "epoch": 0.3829377244460016, "grad_norm": 0.8004999262278173, "learning_rate": 3.4282238442822387e-06, "loss": 0.6903, "step": 13116 }, { "epoch": 0.38296692067384897, "grad_norm": 0.7691739386449138, "learning_rate": 3.4280616382806163e-06, "loss": 0.6381, "step": 13117 }, { "epoch": 0.3829961169016963, "grad_norm": 0.7059387634666794, "learning_rate": 3.4278994322789943e-06, "loss": 0.6452, "step": 13118 }, { "epoch": 0.3830253131295437, "grad_norm": 0.6678268950224026, "learning_rate": 3.4277372262773728e-06, "loss": 0.5555, "step": 13119 }, { "epoch": 0.38305450935739105, "grad_norm": 0.7069971073396744, "learning_rate": 3.4275750202757508e-06, "loss": 0.5974, "step": 13120 }, { "epoch": 0.3830837055852384, "grad_norm": 0.7221712161160352, "learning_rate": 3.4274128142741288e-06, "loss": 0.6525, "step": 13121 }, { "epoch": 0.38311290181308577, "grad_norm": 0.7505588585143013, "learning_rate": 3.4272506082725064e-06, "loss": 0.6884, "step": 13122 }, { "epoch": 0.38314209804093313, "grad_norm": 0.7858574548667889, "learning_rate": 3.4270884022708844e-06, "loss": 0.715, "step": 13123 }, { "epoch": 0.3831712942687805, "grad_norm": 0.7481672612301928, "learning_rate": 3.4269261962692624e-06, "loss": 0.7049, "step": 13124 }, { "epoch": 0.38320049049662785, "grad_norm": 0.7512744524273256, "learning_rate": 3.4267639902676404e-06, "loss": 0.6891, "step": 13125 }, { "epoch": 0.3832296867244752, "grad_norm": 0.7578371756922855, "learning_rate": 3.4266017842660184e-06, "loss": 0.6981, "step": 13126 }, { "epoch": 0.3832588829523226, "grad_norm": 0.7433650928166798, "learning_rate": 3.426439578264396e-06, "loss": 0.645, "step": 13127 }, { "epoch": 0.38328807918016994, "grad_norm": 0.7103467689620084, "learning_rate": 3.426277372262774e-06, "loss": 0.5994, "step": 13128 }, { "epoch": 0.3833172754080173, "grad_norm": 0.8017484090218435, "learning_rate": 3.426115166261152e-06, "loss": 0.7551, "step": 13129 }, { "epoch": 0.38334647163586466, "grad_norm": 0.7610778218270218, "learning_rate": 3.42595296025953e-06, "loss": 0.6689, "step": 13130 }, { "epoch": 0.383375667863712, "grad_norm": 0.6986535722733493, "learning_rate": 3.4257907542579076e-06, "loss": 0.6093, "step": 13131 }, { "epoch": 0.3834048640915594, "grad_norm": 0.7418848905455302, "learning_rate": 3.4256285482562856e-06, "loss": 0.6341, "step": 13132 }, { "epoch": 0.38343406031940674, "grad_norm": 0.7177267398156844, "learning_rate": 3.4254663422546636e-06, "loss": 0.5962, "step": 13133 }, { "epoch": 0.3834632565472541, "grad_norm": 0.6907574016849096, "learning_rate": 3.4253041362530416e-06, "loss": 0.6041, "step": 13134 }, { "epoch": 0.38349245277510147, "grad_norm": 0.765534650918183, "learning_rate": 3.4251419302514196e-06, "loss": 0.7415, "step": 13135 }, { "epoch": 0.38352164900294883, "grad_norm": 0.6834323218953441, "learning_rate": 3.424979724249797e-06, "loss": 0.5817, "step": 13136 }, { "epoch": 0.3835508452307962, "grad_norm": 0.7347094736261583, "learning_rate": 3.424817518248175e-06, "loss": 0.6838, "step": 13137 }, { "epoch": 0.38358004145864355, "grad_norm": 0.7406650935863414, "learning_rate": 3.4246553122465536e-06, "loss": 0.6895, "step": 13138 }, { "epoch": 0.3836092376864909, "grad_norm": 0.7010984637461963, "learning_rate": 3.4244931062449316e-06, "loss": 0.6121, "step": 13139 }, { "epoch": 0.3836384339143383, "grad_norm": 0.7109064359159928, "learning_rate": 3.4243309002433096e-06, "loss": 0.6595, "step": 13140 }, { "epoch": 0.38366763014218563, "grad_norm": 0.6901199111544444, "learning_rate": 3.424168694241687e-06, "loss": 0.6144, "step": 13141 }, { "epoch": 0.383696826370033, "grad_norm": 0.7179880125025474, "learning_rate": 3.4240064882400652e-06, "loss": 0.634, "step": 13142 }, { "epoch": 0.38372602259788036, "grad_norm": 0.8501756840458125, "learning_rate": 3.4238442822384432e-06, "loss": 0.7639, "step": 13143 }, { "epoch": 0.3837552188257277, "grad_norm": 0.6468974467630687, "learning_rate": 3.4236820762368212e-06, "loss": 0.5045, "step": 13144 }, { "epoch": 0.3837844150535751, "grad_norm": 0.7440302612062709, "learning_rate": 3.4235198702351992e-06, "loss": 0.6922, "step": 13145 }, { "epoch": 0.38381361128142244, "grad_norm": 0.7830796337523039, "learning_rate": 3.423357664233577e-06, "loss": 0.6361, "step": 13146 }, { "epoch": 0.3838428075092698, "grad_norm": 0.8809427334421857, "learning_rate": 3.423195458231955e-06, "loss": 0.6453, "step": 13147 }, { "epoch": 0.38387200373711716, "grad_norm": 0.7168103728248154, "learning_rate": 3.423033252230333e-06, "loss": 0.6111, "step": 13148 }, { "epoch": 0.3839011999649645, "grad_norm": 0.7273189980599293, "learning_rate": 3.422871046228711e-06, "loss": 0.6691, "step": 13149 }, { "epoch": 0.3839303961928119, "grad_norm": 0.6989601562943655, "learning_rate": 3.4227088402270884e-06, "loss": 0.6227, "step": 13150 }, { "epoch": 0.38395959242065925, "grad_norm": 0.7141952166257571, "learning_rate": 3.4225466342254664e-06, "loss": 0.6438, "step": 13151 }, { "epoch": 0.3839887886485066, "grad_norm": 0.7277105351994112, "learning_rate": 3.4223844282238444e-06, "loss": 0.7259, "step": 13152 }, { "epoch": 0.38401798487635397, "grad_norm": 0.7997840617076193, "learning_rate": 3.4222222222222224e-06, "loss": 0.7573, "step": 13153 }, { "epoch": 0.38404718110420133, "grad_norm": 0.6730157420373348, "learning_rate": 3.4220600162206005e-06, "loss": 0.6095, "step": 13154 }, { "epoch": 0.3840763773320487, "grad_norm": 0.6819877979048551, "learning_rate": 3.421897810218978e-06, "loss": 0.5915, "step": 13155 }, { "epoch": 0.38410557355989605, "grad_norm": 0.7021734676374961, "learning_rate": 3.421735604217356e-06, "loss": 0.6418, "step": 13156 }, { "epoch": 0.3841347697877434, "grad_norm": 0.903445697332567, "learning_rate": 3.4215733982157345e-06, "loss": 0.6663, "step": 13157 }, { "epoch": 0.3841639660155908, "grad_norm": 0.7734776952426065, "learning_rate": 3.4214111922141125e-06, "loss": 0.6341, "step": 13158 }, { "epoch": 0.38419316224343814, "grad_norm": 0.6959875214787282, "learning_rate": 3.4212489862124905e-06, "loss": 0.6128, "step": 13159 }, { "epoch": 0.3842223584712855, "grad_norm": 0.7570147371228202, "learning_rate": 3.421086780210868e-06, "loss": 0.6609, "step": 13160 }, { "epoch": 0.38425155469913286, "grad_norm": 0.8757638612922953, "learning_rate": 3.420924574209246e-06, "loss": 0.7535, "step": 13161 }, { "epoch": 0.3842807509269802, "grad_norm": 0.7588281006029169, "learning_rate": 3.420762368207624e-06, "loss": 0.6364, "step": 13162 }, { "epoch": 0.3843099471548276, "grad_norm": 0.7427937157476387, "learning_rate": 3.420600162206002e-06, "loss": 0.6562, "step": 13163 }, { "epoch": 0.38433914338267494, "grad_norm": 0.7684179323295539, "learning_rate": 3.42043795620438e-06, "loss": 0.6927, "step": 13164 }, { "epoch": 0.3843683396105223, "grad_norm": 0.7356810542760986, "learning_rate": 3.4202757502027577e-06, "loss": 0.6624, "step": 13165 }, { "epoch": 0.38439753583836966, "grad_norm": 0.7419132970953447, "learning_rate": 3.4201135442011357e-06, "loss": 0.6707, "step": 13166 }, { "epoch": 0.384426732066217, "grad_norm": 0.758936852385582, "learning_rate": 3.4199513381995137e-06, "loss": 0.7077, "step": 13167 }, { "epoch": 0.3844559282940644, "grad_norm": 0.72865732069166, "learning_rate": 3.4197891321978917e-06, "loss": 0.6644, "step": 13168 }, { "epoch": 0.38448512452191175, "grad_norm": 1.3256535361106188, "learning_rate": 3.4196269261962693e-06, "loss": 0.6634, "step": 13169 }, { "epoch": 0.3845143207497591, "grad_norm": 0.7341886417449512, "learning_rate": 3.4194647201946473e-06, "loss": 0.693, "step": 13170 }, { "epoch": 0.38454351697760647, "grad_norm": 0.7164124933614592, "learning_rate": 3.4193025141930253e-06, "loss": 0.6183, "step": 13171 }, { "epoch": 0.38457271320545383, "grad_norm": 0.907424869789844, "learning_rate": 3.4191403081914033e-06, "loss": 0.6676, "step": 13172 }, { "epoch": 0.3846019094333012, "grad_norm": 0.6943047092107105, "learning_rate": 3.4189781021897813e-06, "loss": 0.6097, "step": 13173 }, { "epoch": 0.38463110566114855, "grad_norm": 0.7291847880645861, "learning_rate": 3.418815896188159e-06, "loss": 0.6295, "step": 13174 }, { "epoch": 0.3846603018889959, "grad_norm": 0.6907194512882027, "learning_rate": 3.418653690186537e-06, "loss": 0.6205, "step": 13175 }, { "epoch": 0.38468949811684333, "grad_norm": 0.6679020880952925, "learning_rate": 3.4184914841849153e-06, "loss": 0.5568, "step": 13176 }, { "epoch": 0.3847186943446907, "grad_norm": 0.7372462422988253, "learning_rate": 3.4183292781832933e-06, "loss": 0.6464, "step": 13177 }, { "epoch": 0.38474789057253805, "grad_norm": 0.7052385577392394, "learning_rate": 3.4181670721816713e-06, "loss": 0.5661, "step": 13178 }, { "epoch": 0.3847770868003854, "grad_norm": 0.7715403722964986, "learning_rate": 3.418004866180049e-06, "loss": 0.6837, "step": 13179 }, { "epoch": 0.3848062830282328, "grad_norm": 0.693604491863175, "learning_rate": 3.417842660178427e-06, "loss": 0.6252, "step": 13180 }, { "epoch": 0.38483547925608014, "grad_norm": 0.7638421726672404, "learning_rate": 3.417680454176805e-06, "loss": 0.6694, "step": 13181 }, { "epoch": 0.3848646754839275, "grad_norm": 0.6879069289281955, "learning_rate": 3.417518248175183e-06, "loss": 0.6039, "step": 13182 }, { "epoch": 0.38489387171177486, "grad_norm": 0.719533390187857, "learning_rate": 3.417356042173561e-06, "loss": 0.5735, "step": 13183 }, { "epoch": 0.3849230679396222, "grad_norm": 0.8063870569120575, "learning_rate": 3.4171938361719385e-06, "loss": 0.7339, "step": 13184 }, { "epoch": 0.3849522641674696, "grad_norm": 0.7060282129350455, "learning_rate": 3.4170316301703165e-06, "loss": 0.5779, "step": 13185 }, { "epoch": 0.38498146039531694, "grad_norm": 0.73750352500787, "learning_rate": 3.4168694241686946e-06, "loss": 0.6198, "step": 13186 }, { "epoch": 0.3850106566231643, "grad_norm": 1.0250734223938325, "learning_rate": 3.4167072181670726e-06, "loss": 0.6565, "step": 13187 }, { "epoch": 0.38503985285101167, "grad_norm": 0.7009018730835512, "learning_rate": 3.41654501216545e-06, "loss": 0.6225, "step": 13188 }, { "epoch": 0.385069049078859, "grad_norm": 0.9107124528681163, "learning_rate": 3.416382806163828e-06, "loss": 0.6828, "step": 13189 }, { "epoch": 0.3850982453067064, "grad_norm": 0.6797124117806114, "learning_rate": 3.416220600162206e-06, "loss": 0.5904, "step": 13190 }, { "epoch": 0.38512744153455375, "grad_norm": 0.7354979211192941, "learning_rate": 3.416058394160584e-06, "loss": 0.6578, "step": 13191 }, { "epoch": 0.3851566377624011, "grad_norm": 0.7372593054886051, "learning_rate": 3.415896188158962e-06, "loss": 0.7132, "step": 13192 }, { "epoch": 0.38518583399024847, "grad_norm": 0.7535135355867627, "learning_rate": 3.4157339821573398e-06, "loss": 0.714, "step": 13193 }, { "epoch": 0.38521503021809583, "grad_norm": 0.7708395196412161, "learning_rate": 3.4155717761557178e-06, "loss": 0.6747, "step": 13194 }, { "epoch": 0.3852442264459432, "grad_norm": 0.7108249835575808, "learning_rate": 3.415409570154096e-06, "loss": 0.6374, "step": 13195 }, { "epoch": 0.38527342267379056, "grad_norm": 0.839316994948814, "learning_rate": 3.415247364152474e-06, "loss": 0.7227, "step": 13196 }, { "epoch": 0.3853026189016379, "grad_norm": 0.7062361279639526, "learning_rate": 3.415085158150852e-06, "loss": 0.5626, "step": 13197 }, { "epoch": 0.3853318151294853, "grad_norm": 0.7299986527702058, "learning_rate": 3.4149229521492298e-06, "loss": 0.635, "step": 13198 }, { "epoch": 0.38536101135733264, "grad_norm": 0.8172744609451804, "learning_rate": 3.414760746147608e-06, "loss": 0.7574, "step": 13199 }, { "epoch": 0.38539020758518, "grad_norm": 0.7056994557260644, "learning_rate": 3.414598540145986e-06, "loss": 0.6317, "step": 13200 }, { "epoch": 0.38541940381302736, "grad_norm": 0.7458149953539718, "learning_rate": 3.414436334144364e-06, "loss": 0.6834, "step": 13201 }, { "epoch": 0.3854486000408747, "grad_norm": 0.7540377107264332, "learning_rate": 3.414274128142742e-06, "loss": 0.6743, "step": 13202 }, { "epoch": 0.3854777962687221, "grad_norm": 0.739679385226417, "learning_rate": 3.4141119221411194e-06, "loss": 0.622, "step": 13203 }, { "epoch": 0.38550699249656945, "grad_norm": 0.7266436739199911, "learning_rate": 3.4139497161394974e-06, "loss": 0.662, "step": 13204 }, { "epoch": 0.3855361887244168, "grad_norm": 0.7485168247237024, "learning_rate": 3.4137875101378754e-06, "loss": 0.6484, "step": 13205 }, { "epoch": 0.38556538495226417, "grad_norm": 0.6872651604545085, "learning_rate": 3.4136253041362534e-06, "loss": 0.5833, "step": 13206 }, { "epoch": 0.38559458118011153, "grad_norm": 0.7094448580261002, "learning_rate": 3.413463098134631e-06, "loss": 0.6371, "step": 13207 }, { "epoch": 0.3856237774079589, "grad_norm": 0.7057253916810323, "learning_rate": 3.413300892133009e-06, "loss": 0.5852, "step": 13208 }, { "epoch": 0.38565297363580625, "grad_norm": 0.7559863123496622, "learning_rate": 3.413138686131387e-06, "loss": 0.6132, "step": 13209 }, { "epoch": 0.3856821698636536, "grad_norm": 0.72455381734956, "learning_rate": 3.412976480129765e-06, "loss": 0.646, "step": 13210 }, { "epoch": 0.385711366091501, "grad_norm": 0.7251865628599987, "learning_rate": 3.4128142741281426e-06, "loss": 0.6574, "step": 13211 }, { "epoch": 0.38574056231934833, "grad_norm": 0.7583906178540203, "learning_rate": 3.4126520681265206e-06, "loss": 0.7041, "step": 13212 }, { "epoch": 0.3857697585471957, "grad_norm": 0.7601782605792758, "learning_rate": 3.4124898621248986e-06, "loss": 0.7284, "step": 13213 }, { "epoch": 0.38579895477504306, "grad_norm": 0.8489839652238, "learning_rate": 3.412327656123277e-06, "loss": 0.7457, "step": 13214 }, { "epoch": 0.3858281510028904, "grad_norm": 0.7516632496486166, "learning_rate": 3.412165450121655e-06, "loss": 0.6677, "step": 13215 }, { "epoch": 0.3858573472307378, "grad_norm": 0.7561426300004077, "learning_rate": 3.412003244120033e-06, "loss": 0.6755, "step": 13216 }, { "epoch": 0.38588654345858514, "grad_norm": 0.6904122002904388, "learning_rate": 3.4118410381184106e-06, "loss": 0.5717, "step": 13217 }, { "epoch": 0.3859157396864325, "grad_norm": 0.710273627444057, "learning_rate": 3.4116788321167887e-06, "loss": 0.6196, "step": 13218 }, { "epoch": 0.38594493591427986, "grad_norm": 0.8171899266587214, "learning_rate": 3.4115166261151667e-06, "loss": 0.7624, "step": 13219 }, { "epoch": 0.3859741321421272, "grad_norm": 0.7399983360940324, "learning_rate": 3.4113544201135447e-06, "loss": 0.6637, "step": 13220 }, { "epoch": 0.3860033283699746, "grad_norm": 0.6591884429048389, "learning_rate": 3.4111922141119227e-06, "loss": 0.5421, "step": 13221 }, { "epoch": 0.38603252459782195, "grad_norm": 0.7685949474723767, "learning_rate": 3.4110300081103003e-06, "loss": 0.7438, "step": 13222 }, { "epoch": 0.3860617208256693, "grad_norm": 0.7408599968960282, "learning_rate": 3.4108678021086783e-06, "loss": 0.6941, "step": 13223 }, { "epoch": 0.38609091705351667, "grad_norm": 0.7937099093610323, "learning_rate": 3.4107055961070563e-06, "loss": 0.7108, "step": 13224 }, { "epoch": 0.38612011328136403, "grad_norm": 0.6780617804115565, "learning_rate": 3.4105433901054343e-06, "loss": 0.5967, "step": 13225 }, { "epoch": 0.3861493095092114, "grad_norm": 0.7604176390501609, "learning_rate": 3.410381184103812e-06, "loss": 0.7118, "step": 13226 }, { "epoch": 0.38617850573705875, "grad_norm": 0.8751586456785213, "learning_rate": 3.41021897810219e-06, "loss": 0.6552, "step": 13227 }, { "epoch": 0.3862077019649061, "grad_norm": 0.7173159292691571, "learning_rate": 3.410056772100568e-06, "loss": 0.598, "step": 13228 }, { "epoch": 0.3862368981927535, "grad_norm": 0.7256572679018705, "learning_rate": 3.409894566098946e-06, "loss": 0.6975, "step": 13229 }, { "epoch": 0.38626609442060084, "grad_norm": 0.7112612320367309, "learning_rate": 3.4097323600973235e-06, "loss": 0.5901, "step": 13230 }, { "epoch": 0.3862952906484482, "grad_norm": 0.6819978956778491, "learning_rate": 3.4095701540957015e-06, "loss": 0.6131, "step": 13231 }, { "epoch": 0.38632448687629556, "grad_norm": 0.7800404957349009, "learning_rate": 3.4094079480940795e-06, "loss": 0.6377, "step": 13232 }, { "epoch": 0.3863536831041429, "grad_norm": 0.7281902574088828, "learning_rate": 3.409245742092458e-06, "loss": 0.6682, "step": 13233 }, { "epoch": 0.3863828793319903, "grad_norm": 0.7867365398247544, "learning_rate": 3.409083536090836e-06, "loss": 0.6943, "step": 13234 }, { "epoch": 0.38641207555983764, "grad_norm": 0.7090641875039752, "learning_rate": 3.408921330089214e-06, "loss": 0.6606, "step": 13235 }, { "epoch": 0.386441271787685, "grad_norm": 0.6878822063778822, "learning_rate": 3.4087591240875915e-06, "loss": 0.5907, "step": 13236 }, { "epoch": 0.3864704680155324, "grad_norm": 0.7131153220163394, "learning_rate": 3.4085969180859695e-06, "loss": 0.6024, "step": 13237 }, { "epoch": 0.3864996642433798, "grad_norm": 0.7287184017361625, "learning_rate": 3.4084347120843475e-06, "loss": 0.6623, "step": 13238 }, { "epoch": 0.38652886047122714, "grad_norm": 0.6870066069027384, "learning_rate": 3.4082725060827255e-06, "loss": 0.5732, "step": 13239 }, { "epoch": 0.3865580566990745, "grad_norm": 0.7481218835420691, "learning_rate": 3.4081103000811035e-06, "loss": 0.6863, "step": 13240 }, { "epoch": 0.38658725292692186, "grad_norm": 0.7299649185130767, "learning_rate": 3.407948094079481e-06, "loss": 0.6541, "step": 13241 }, { "epoch": 0.3866164491547692, "grad_norm": 0.8875209452342632, "learning_rate": 3.407785888077859e-06, "loss": 0.7317, "step": 13242 }, { "epoch": 0.3866456453826166, "grad_norm": 0.8058127700690226, "learning_rate": 3.407623682076237e-06, "loss": 0.7081, "step": 13243 }, { "epoch": 0.38667484161046395, "grad_norm": 0.7521539863397674, "learning_rate": 3.407461476074615e-06, "loss": 0.6426, "step": 13244 }, { "epoch": 0.3867040378383113, "grad_norm": 0.777992645092158, "learning_rate": 3.4072992700729927e-06, "loss": 0.6457, "step": 13245 }, { "epoch": 0.38673323406615867, "grad_norm": 0.7174795743037232, "learning_rate": 3.4071370640713707e-06, "loss": 0.6245, "step": 13246 }, { "epoch": 0.38676243029400603, "grad_norm": 0.82759817566386, "learning_rate": 3.4069748580697487e-06, "loss": 0.8063, "step": 13247 }, { "epoch": 0.3867916265218534, "grad_norm": 0.7845548558904105, "learning_rate": 3.4068126520681267e-06, "loss": 0.6934, "step": 13248 }, { "epoch": 0.38682082274970075, "grad_norm": 0.7462001698297394, "learning_rate": 3.4066504460665043e-06, "loss": 0.598, "step": 13249 }, { "epoch": 0.3868500189775481, "grad_norm": 0.7133248522738005, "learning_rate": 3.4064882400648823e-06, "loss": 0.6034, "step": 13250 }, { "epoch": 0.3868792152053955, "grad_norm": 0.7259882477870744, "learning_rate": 3.4063260340632603e-06, "loss": 0.6838, "step": 13251 }, { "epoch": 0.38690841143324284, "grad_norm": 0.7825308831054639, "learning_rate": 3.4061638280616388e-06, "loss": 0.6867, "step": 13252 }, { "epoch": 0.3869376076610902, "grad_norm": 0.8085324005757974, "learning_rate": 3.4060016220600168e-06, "loss": 0.6521, "step": 13253 }, { "epoch": 0.38696680388893756, "grad_norm": 0.6962998351405083, "learning_rate": 3.4058394160583948e-06, "loss": 0.634, "step": 13254 }, { "epoch": 0.3869960001167849, "grad_norm": 0.710205488191152, "learning_rate": 3.4056772100567724e-06, "loss": 0.6176, "step": 13255 }, { "epoch": 0.3870251963446323, "grad_norm": 0.7210974641951097, "learning_rate": 3.4055150040551504e-06, "loss": 0.6247, "step": 13256 }, { "epoch": 0.38705439257247964, "grad_norm": 0.8017652030740776, "learning_rate": 3.4053527980535284e-06, "loss": 0.7547, "step": 13257 }, { "epoch": 0.387083588800327, "grad_norm": 0.7439553318102249, "learning_rate": 3.4051905920519064e-06, "loss": 0.6659, "step": 13258 }, { "epoch": 0.38711278502817437, "grad_norm": 0.7351723234153015, "learning_rate": 3.4050283860502844e-06, "loss": 0.6419, "step": 13259 }, { "epoch": 0.3871419812560217, "grad_norm": 0.7437593385386991, "learning_rate": 3.404866180048662e-06, "loss": 0.7025, "step": 13260 }, { "epoch": 0.3871711774838691, "grad_norm": 0.7299270917700901, "learning_rate": 3.40470397404704e-06, "loss": 0.6424, "step": 13261 }, { "epoch": 0.38720037371171645, "grad_norm": 0.8479210223881999, "learning_rate": 3.404541768045418e-06, "loss": 0.6584, "step": 13262 }, { "epoch": 0.3872295699395638, "grad_norm": 1.0515627164943568, "learning_rate": 3.404379562043796e-06, "loss": 0.746, "step": 13263 }, { "epoch": 0.3872587661674112, "grad_norm": 0.7304729737562258, "learning_rate": 3.4042173560421736e-06, "loss": 0.608, "step": 13264 }, { "epoch": 0.38728796239525853, "grad_norm": 0.7174341880438321, "learning_rate": 3.4040551500405516e-06, "loss": 0.6897, "step": 13265 }, { "epoch": 0.3873171586231059, "grad_norm": 0.6831182663082485, "learning_rate": 3.4038929440389296e-06, "loss": 0.6349, "step": 13266 }, { "epoch": 0.38734635485095326, "grad_norm": 0.6538513197278565, "learning_rate": 3.4037307380373076e-06, "loss": 0.5514, "step": 13267 }, { "epoch": 0.3873755510788006, "grad_norm": 0.7619230242446392, "learning_rate": 3.403568532035685e-06, "loss": 0.7095, "step": 13268 }, { "epoch": 0.387404747306648, "grad_norm": 0.7500257810463443, "learning_rate": 3.403406326034063e-06, "loss": 0.6232, "step": 13269 }, { "epoch": 0.38743394353449534, "grad_norm": 0.6981139164797655, "learning_rate": 3.4032441200324416e-06, "loss": 0.6201, "step": 13270 }, { "epoch": 0.3874631397623427, "grad_norm": 0.8077991364524789, "learning_rate": 3.4030819140308196e-06, "loss": 0.7444, "step": 13271 }, { "epoch": 0.38749233599019006, "grad_norm": 0.7333110908859503, "learning_rate": 3.4029197080291976e-06, "loss": 0.5886, "step": 13272 }, { "epoch": 0.3875215322180374, "grad_norm": 0.748528682248754, "learning_rate": 3.4027575020275756e-06, "loss": 0.5822, "step": 13273 }, { "epoch": 0.3875507284458848, "grad_norm": 0.7463784101716328, "learning_rate": 3.4025952960259532e-06, "loss": 0.6704, "step": 13274 }, { "epoch": 0.38757992467373215, "grad_norm": 0.7635947915168834, "learning_rate": 3.4024330900243312e-06, "loss": 0.7277, "step": 13275 }, { "epoch": 0.3876091209015795, "grad_norm": 0.6799073802054599, "learning_rate": 3.4022708840227092e-06, "loss": 0.5851, "step": 13276 }, { "epoch": 0.38763831712942687, "grad_norm": 0.699830556788319, "learning_rate": 3.4021086780210872e-06, "loss": 0.603, "step": 13277 }, { "epoch": 0.38766751335727423, "grad_norm": 0.728422121037246, "learning_rate": 3.4019464720194652e-06, "loss": 0.6682, "step": 13278 }, { "epoch": 0.3876967095851216, "grad_norm": 0.8255299027461571, "learning_rate": 3.401784266017843e-06, "loss": 0.7679, "step": 13279 }, { "epoch": 0.38772590581296895, "grad_norm": 0.9220627468522493, "learning_rate": 3.401622060016221e-06, "loss": 0.7673, "step": 13280 }, { "epoch": 0.3877551020408163, "grad_norm": 0.7525839022983671, "learning_rate": 3.401459854014599e-06, "loss": 0.7414, "step": 13281 }, { "epoch": 0.3877842982686637, "grad_norm": 0.6717010550193407, "learning_rate": 3.401297648012977e-06, "loss": 0.5864, "step": 13282 }, { "epoch": 0.38781349449651104, "grad_norm": 0.7353418058720854, "learning_rate": 3.4011354420113544e-06, "loss": 0.7044, "step": 13283 }, { "epoch": 0.3878426907243584, "grad_norm": 0.7699311231439863, "learning_rate": 3.4009732360097324e-06, "loss": 0.7313, "step": 13284 }, { "epoch": 0.38787188695220576, "grad_norm": 0.7074634662478232, "learning_rate": 3.4008110300081104e-06, "loss": 0.602, "step": 13285 }, { "epoch": 0.3879010831800531, "grad_norm": 0.6763823695209544, "learning_rate": 3.4006488240064885e-06, "loss": 0.5839, "step": 13286 }, { "epoch": 0.3879302794079005, "grad_norm": 0.7066264044146133, "learning_rate": 3.400486618004866e-06, "loss": 0.6613, "step": 13287 }, { "epoch": 0.38795947563574784, "grad_norm": 0.7331640171976503, "learning_rate": 3.400324412003244e-06, "loss": 0.6204, "step": 13288 }, { "epoch": 0.3879886718635952, "grad_norm": 0.740752988014124, "learning_rate": 3.4001622060016225e-06, "loss": 0.6563, "step": 13289 }, { "epoch": 0.38801786809144256, "grad_norm": 0.6514523208595068, "learning_rate": 3.4000000000000005e-06, "loss": 0.5396, "step": 13290 }, { "epoch": 0.3880470643192899, "grad_norm": 0.7540796330508486, "learning_rate": 3.3998377939983785e-06, "loss": 0.7231, "step": 13291 }, { "epoch": 0.3880762605471373, "grad_norm": 0.759363027701404, "learning_rate": 3.3996755879967565e-06, "loss": 0.6502, "step": 13292 }, { "epoch": 0.38810545677498465, "grad_norm": 0.7328227469035014, "learning_rate": 3.399513381995134e-06, "loss": 0.6791, "step": 13293 }, { "epoch": 0.388134653002832, "grad_norm": 0.7255884993362083, "learning_rate": 3.399351175993512e-06, "loss": 0.6654, "step": 13294 }, { "epoch": 0.38816384923067937, "grad_norm": 0.781663671600138, "learning_rate": 3.39918896999189e-06, "loss": 0.7409, "step": 13295 }, { "epoch": 0.38819304545852673, "grad_norm": 0.7359667865806935, "learning_rate": 3.399026763990268e-06, "loss": 0.6688, "step": 13296 }, { "epoch": 0.38822224168637415, "grad_norm": 0.7150046632351151, "learning_rate": 3.398864557988646e-06, "loss": 0.6511, "step": 13297 }, { "epoch": 0.3882514379142215, "grad_norm": 0.7264592486592718, "learning_rate": 3.3987023519870237e-06, "loss": 0.633, "step": 13298 }, { "epoch": 0.38828063414206887, "grad_norm": 0.6863577796220602, "learning_rate": 3.3985401459854017e-06, "loss": 0.5902, "step": 13299 }, { "epoch": 0.38830983036991623, "grad_norm": 0.8038232950314601, "learning_rate": 3.3983779399837797e-06, "loss": 0.6418, "step": 13300 }, { "epoch": 0.3883390265977636, "grad_norm": 0.7155913614266985, "learning_rate": 3.3982157339821577e-06, "loss": 0.6275, "step": 13301 }, { "epoch": 0.38836822282561095, "grad_norm": 0.6786359027830927, "learning_rate": 3.3980535279805353e-06, "loss": 0.5777, "step": 13302 }, { "epoch": 0.3883974190534583, "grad_norm": 0.7044841522310513, "learning_rate": 3.3978913219789133e-06, "loss": 0.5633, "step": 13303 }, { "epoch": 0.3884266152813057, "grad_norm": 0.7199220141393924, "learning_rate": 3.3977291159772913e-06, "loss": 0.6575, "step": 13304 }, { "epoch": 0.38845581150915304, "grad_norm": 0.6736836475238409, "learning_rate": 3.3975669099756693e-06, "loss": 0.541, "step": 13305 }, { "epoch": 0.3884850077370004, "grad_norm": 0.7589568988011889, "learning_rate": 3.397404703974047e-06, "loss": 0.6784, "step": 13306 }, { "epoch": 0.38851420396484776, "grad_norm": 0.9431234947260427, "learning_rate": 3.397242497972425e-06, "loss": 0.7311, "step": 13307 }, { "epoch": 0.3885434001926951, "grad_norm": 0.7247403158963225, "learning_rate": 3.3970802919708033e-06, "loss": 0.6236, "step": 13308 }, { "epoch": 0.3885725964205425, "grad_norm": 0.7136535834533649, "learning_rate": 3.3969180859691813e-06, "loss": 0.6264, "step": 13309 }, { "epoch": 0.38860179264838984, "grad_norm": 0.7193793344710384, "learning_rate": 3.3967558799675593e-06, "loss": 0.6891, "step": 13310 }, { "epoch": 0.3886309888762372, "grad_norm": 0.6626861309827955, "learning_rate": 3.3965936739659374e-06, "loss": 0.5613, "step": 13311 }, { "epoch": 0.38866018510408457, "grad_norm": 0.7653425252145881, "learning_rate": 3.396431467964315e-06, "loss": 0.73, "step": 13312 }, { "epoch": 0.3886893813319319, "grad_norm": 0.774414036159711, "learning_rate": 3.396269261962693e-06, "loss": 0.7311, "step": 13313 }, { "epoch": 0.3887185775597793, "grad_norm": 0.6827326155766645, "learning_rate": 3.396107055961071e-06, "loss": 0.5799, "step": 13314 }, { "epoch": 0.38874777378762665, "grad_norm": 0.7476417824798098, "learning_rate": 3.395944849959449e-06, "loss": 0.7152, "step": 13315 }, { "epoch": 0.388776970015474, "grad_norm": 0.7472956511778768, "learning_rate": 3.3957826439578265e-06, "loss": 0.6683, "step": 13316 }, { "epoch": 0.38880616624332137, "grad_norm": 0.727971150829442, "learning_rate": 3.3956204379562045e-06, "loss": 0.7007, "step": 13317 }, { "epoch": 0.38883536247116873, "grad_norm": 0.7417773912177752, "learning_rate": 3.3954582319545826e-06, "loss": 0.6857, "step": 13318 }, { "epoch": 0.3888645586990161, "grad_norm": 0.7755890819179371, "learning_rate": 3.3952960259529606e-06, "loss": 0.683, "step": 13319 }, { "epoch": 0.38889375492686346, "grad_norm": 0.7227718280540073, "learning_rate": 3.3951338199513386e-06, "loss": 0.6582, "step": 13320 }, { "epoch": 0.3889229511547108, "grad_norm": 0.7323931827762906, "learning_rate": 3.394971613949716e-06, "loss": 0.6464, "step": 13321 }, { "epoch": 0.3889521473825582, "grad_norm": 0.7297868011443313, "learning_rate": 3.394809407948094e-06, "loss": 0.6334, "step": 13322 }, { "epoch": 0.38898134361040554, "grad_norm": 0.7476091384819459, "learning_rate": 3.394647201946472e-06, "loss": 0.6748, "step": 13323 }, { "epoch": 0.3890105398382529, "grad_norm": 0.7736303696709169, "learning_rate": 3.39448499594485e-06, "loss": 0.7015, "step": 13324 }, { "epoch": 0.38903973606610026, "grad_norm": 0.7914617137761687, "learning_rate": 3.3943227899432278e-06, "loss": 0.7165, "step": 13325 }, { "epoch": 0.3890689322939476, "grad_norm": 0.6812763230796116, "learning_rate": 3.3941605839416058e-06, "loss": 0.6156, "step": 13326 }, { "epoch": 0.389098128521795, "grad_norm": 0.6754289191616121, "learning_rate": 3.393998377939984e-06, "loss": 0.5731, "step": 13327 }, { "epoch": 0.38912732474964234, "grad_norm": 0.7553868561510911, "learning_rate": 3.393836171938362e-06, "loss": 0.6174, "step": 13328 }, { "epoch": 0.3891565209774897, "grad_norm": 0.7482026834493024, "learning_rate": 3.39367396593674e-06, "loss": 0.691, "step": 13329 }, { "epoch": 0.38918571720533707, "grad_norm": 0.8147920493415618, "learning_rate": 3.3935117599351182e-06, "loss": 0.769, "step": 13330 }, { "epoch": 0.38921491343318443, "grad_norm": 0.7432318489210902, "learning_rate": 3.393349553933496e-06, "loss": 0.6533, "step": 13331 }, { "epoch": 0.3892441096610318, "grad_norm": 0.7083354170628369, "learning_rate": 3.393187347931874e-06, "loss": 0.6965, "step": 13332 }, { "epoch": 0.38927330588887915, "grad_norm": 0.7169059223029892, "learning_rate": 3.393025141930252e-06, "loss": 0.6797, "step": 13333 }, { "epoch": 0.3893025021167265, "grad_norm": 0.7394273035368976, "learning_rate": 3.39286293592863e-06, "loss": 0.6298, "step": 13334 }, { "epoch": 0.3893316983445739, "grad_norm": 0.7187069374212122, "learning_rate": 3.3927007299270074e-06, "loss": 0.6581, "step": 13335 }, { "epoch": 0.38936089457242123, "grad_norm": 0.8074868308205372, "learning_rate": 3.3925385239253854e-06, "loss": 0.7701, "step": 13336 }, { "epoch": 0.3893900908002686, "grad_norm": 0.7495911592355738, "learning_rate": 3.3923763179237634e-06, "loss": 0.6838, "step": 13337 }, { "epoch": 0.38941928702811596, "grad_norm": 0.7910015670273663, "learning_rate": 3.3922141119221414e-06, "loss": 0.68, "step": 13338 }, { "epoch": 0.3894484832559633, "grad_norm": 0.7759053211393494, "learning_rate": 3.3920519059205194e-06, "loss": 0.7467, "step": 13339 }, { "epoch": 0.3894776794838107, "grad_norm": 0.7486683427266869, "learning_rate": 3.391889699918897e-06, "loss": 0.7049, "step": 13340 }, { "epoch": 0.38950687571165804, "grad_norm": 0.7135846254712865, "learning_rate": 3.391727493917275e-06, "loss": 0.6622, "step": 13341 }, { "epoch": 0.3895360719395054, "grad_norm": 0.672316272732415, "learning_rate": 3.391565287915653e-06, "loss": 0.5801, "step": 13342 }, { "epoch": 0.38956526816735276, "grad_norm": 0.7243446025180396, "learning_rate": 3.391403081914031e-06, "loss": 0.6698, "step": 13343 }, { "epoch": 0.3895944643952001, "grad_norm": 0.7271526447511923, "learning_rate": 3.3912408759124086e-06, "loss": 0.6802, "step": 13344 }, { "epoch": 0.3896236606230475, "grad_norm": 0.7186193479375754, "learning_rate": 3.3910786699107866e-06, "loss": 0.6282, "step": 13345 }, { "epoch": 0.38965285685089485, "grad_norm": 0.7160405333291913, "learning_rate": 3.390916463909165e-06, "loss": 0.6513, "step": 13346 }, { "epoch": 0.3896820530787422, "grad_norm": 0.7099652884911171, "learning_rate": 3.390754257907543e-06, "loss": 0.6305, "step": 13347 }, { "epoch": 0.38971124930658957, "grad_norm": 0.7095971859036898, "learning_rate": 3.390592051905921e-06, "loss": 0.6587, "step": 13348 }, { "epoch": 0.38974044553443693, "grad_norm": 0.698509459651704, "learning_rate": 3.390429845904299e-06, "loss": 0.6335, "step": 13349 }, { "epoch": 0.3897696417622843, "grad_norm": 0.7194336856277063, "learning_rate": 3.3902676399026767e-06, "loss": 0.6389, "step": 13350 }, { "epoch": 0.38979883799013165, "grad_norm": 0.7230575689626704, "learning_rate": 3.3901054339010547e-06, "loss": 0.6567, "step": 13351 }, { "epoch": 0.389828034217979, "grad_norm": 0.7001953529799505, "learning_rate": 3.3899432278994327e-06, "loss": 0.6387, "step": 13352 }, { "epoch": 0.3898572304458264, "grad_norm": 0.7140205931537964, "learning_rate": 3.3897810218978107e-06, "loss": 0.6112, "step": 13353 }, { "epoch": 0.38988642667367374, "grad_norm": 0.7250392462159554, "learning_rate": 3.3896188158961883e-06, "loss": 0.5864, "step": 13354 }, { "epoch": 0.3899156229015211, "grad_norm": 0.7869245422006941, "learning_rate": 3.3894566098945663e-06, "loss": 0.6541, "step": 13355 }, { "epoch": 0.38994481912936846, "grad_norm": 0.7070484150888523, "learning_rate": 3.3892944038929443e-06, "loss": 0.6773, "step": 13356 }, { "epoch": 0.3899740153572159, "grad_norm": 0.6856698303088197, "learning_rate": 3.3891321978913223e-06, "loss": 0.6028, "step": 13357 }, { "epoch": 0.39000321158506324, "grad_norm": 0.7267624508750111, "learning_rate": 3.3889699918897003e-06, "loss": 0.6046, "step": 13358 }, { "epoch": 0.3900324078129106, "grad_norm": 0.8402319187563612, "learning_rate": 3.388807785888078e-06, "loss": 0.6614, "step": 13359 }, { "epoch": 0.39006160404075796, "grad_norm": 0.826427048756643, "learning_rate": 3.388645579886456e-06, "loss": 0.6265, "step": 13360 }, { "epoch": 0.3900908002686053, "grad_norm": 0.7413527312596718, "learning_rate": 3.388483373884834e-06, "loss": 0.6819, "step": 13361 }, { "epoch": 0.3901199964964527, "grad_norm": 0.7376211656996767, "learning_rate": 3.388321167883212e-06, "loss": 0.6325, "step": 13362 }, { "epoch": 0.39014919272430004, "grad_norm": 0.7448013482108553, "learning_rate": 3.3881589618815895e-06, "loss": 0.659, "step": 13363 }, { "epoch": 0.3901783889521474, "grad_norm": 0.7397252772659884, "learning_rate": 3.3879967558799675e-06, "loss": 0.6349, "step": 13364 }, { "epoch": 0.39020758517999476, "grad_norm": 0.7987726950762243, "learning_rate": 3.387834549878346e-06, "loss": 0.7315, "step": 13365 }, { "epoch": 0.3902367814078421, "grad_norm": 0.7622854409725802, "learning_rate": 3.387672343876724e-06, "loss": 0.6354, "step": 13366 }, { "epoch": 0.3902659776356895, "grad_norm": 0.7248775394038702, "learning_rate": 3.387510137875102e-06, "loss": 0.6462, "step": 13367 }, { "epoch": 0.39029517386353685, "grad_norm": 0.7224770115789539, "learning_rate": 3.38734793187348e-06, "loss": 0.6449, "step": 13368 }, { "epoch": 0.3903243700913842, "grad_norm": 0.7123899008363288, "learning_rate": 3.3871857258718575e-06, "loss": 0.6053, "step": 13369 }, { "epoch": 0.39035356631923157, "grad_norm": 0.9640582079300287, "learning_rate": 3.3870235198702355e-06, "loss": 0.7139, "step": 13370 }, { "epoch": 0.39038276254707893, "grad_norm": 0.6877889891029266, "learning_rate": 3.3868613138686135e-06, "loss": 0.6067, "step": 13371 }, { "epoch": 0.3904119587749263, "grad_norm": 0.7647240860092044, "learning_rate": 3.3866991078669915e-06, "loss": 0.6878, "step": 13372 }, { "epoch": 0.39044115500277365, "grad_norm": 0.7838043647518157, "learning_rate": 3.386536901865369e-06, "loss": 0.753, "step": 13373 }, { "epoch": 0.390470351230621, "grad_norm": 0.7290069596848989, "learning_rate": 3.386374695863747e-06, "loss": 0.6312, "step": 13374 }, { "epoch": 0.3904995474584684, "grad_norm": 0.7736045151514414, "learning_rate": 3.386212489862125e-06, "loss": 0.6751, "step": 13375 }, { "epoch": 0.39052874368631574, "grad_norm": 0.7158839340243559, "learning_rate": 3.386050283860503e-06, "loss": 0.6404, "step": 13376 }, { "epoch": 0.3905579399141631, "grad_norm": 0.7612728645118109, "learning_rate": 3.385888077858881e-06, "loss": 0.7187, "step": 13377 }, { "epoch": 0.39058713614201046, "grad_norm": 0.7149144177884642, "learning_rate": 3.3857258718572587e-06, "loss": 0.6344, "step": 13378 }, { "epoch": 0.3906163323698578, "grad_norm": 0.748774284637062, "learning_rate": 3.3855636658556367e-06, "loss": 0.5505, "step": 13379 }, { "epoch": 0.3906455285977052, "grad_norm": 0.7376459670512816, "learning_rate": 3.3854014598540147e-06, "loss": 0.6978, "step": 13380 }, { "epoch": 0.39067472482555254, "grad_norm": 0.7801649840894194, "learning_rate": 3.3852392538523927e-06, "loss": 0.6629, "step": 13381 }, { "epoch": 0.3907039210533999, "grad_norm": 0.7503470942335985, "learning_rate": 3.3850770478507703e-06, "loss": 0.6644, "step": 13382 }, { "epoch": 0.39073311728124727, "grad_norm": 0.8114015847122672, "learning_rate": 3.3849148418491483e-06, "loss": 0.7518, "step": 13383 }, { "epoch": 0.3907623135090946, "grad_norm": 0.7477816860207759, "learning_rate": 3.3847526358475268e-06, "loss": 0.7087, "step": 13384 }, { "epoch": 0.390791509736942, "grad_norm": 0.7478189938065127, "learning_rate": 3.3845904298459048e-06, "loss": 0.627, "step": 13385 }, { "epoch": 0.39082070596478935, "grad_norm": 0.7549552119890873, "learning_rate": 3.3844282238442828e-06, "loss": 0.7027, "step": 13386 }, { "epoch": 0.3908499021926367, "grad_norm": 0.745921085175651, "learning_rate": 3.3842660178426608e-06, "loss": 0.6885, "step": 13387 }, { "epoch": 0.3908790984204841, "grad_norm": 0.757628048829829, "learning_rate": 3.3841038118410384e-06, "loss": 0.7058, "step": 13388 }, { "epoch": 0.39090829464833143, "grad_norm": 0.767882148545368, "learning_rate": 3.3839416058394164e-06, "loss": 0.7265, "step": 13389 }, { "epoch": 0.3909374908761788, "grad_norm": 0.726131253847007, "learning_rate": 3.3837793998377944e-06, "loss": 0.6435, "step": 13390 }, { "epoch": 0.39096668710402616, "grad_norm": 0.7457154398130235, "learning_rate": 3.3836171938361724e-06, "loss": 0.6502, "step": 13391 }, { "epoch": 0.3909958833318735, "grad_norm": 0.7821907480426082, "learning_rate": 3.38345498783455e-06, "loss": 0.7802, "step": 13392 }, { "epoch": 0.3910250795597209, "grad_norm": 0.724300602744313, "learning_rate": 3.383292781832928e-06, "loss": 0.5741, "step": 13393 }, { "epoch": 0.39105427578756824, "grad_norm": 0.7135143535168317, "learning_rate": 3.383130575831306e-06, "loss": 0.6403, "step": 13394 }, { "epoch": 0.3910834720154156, "grad_norm": 0.7106182675421697, "learning_rate": 3.382968369829684e-06, "loss": 0.6252, "step": 13395 }, { "epoch": 0.39111266824326296, "grad_norm": 0.7735293070856181, "learning_rate": 3.382806163828062e-06, "loss": 0.566, "step": 13396 }, { "epoch": 0.3911418644711103, "grad_norm": 0.6883543395173373, "learning_rate": 3.3826439578264396e-06, "loss": 0.6026, "step": 13397 }, { "epoch": 0.3911710606989577, "grad_norm": 0.6528241818532614, "learning_rate": 3.3824817518248176e-06, "loss": 0.558, "step": 13398 }, { "epoch": 0.39120025692680505, "grad_norm": 0.728349969867654, "learning_rate": 3.3823195458231956e-06, "loss": 0.6202, "step": 13399 }, { "epoch": 0.3912294531546524, "grad_norm": 0.7279866383039766, "learning_rate": 3.3821573398215736e-06, "loss": 0.7011, "step": 13400 }, { "epoch": 0.39125864938249977, "grad_norm": 0.7375639491447191, "learning_rate": 3.381995133819951e-06, "loss": 0.6712, "step": 13401 }, { "epoch": 0.39128784561034713, "grad_norm": 0.7972639600349963, "learning_rate": 3.381832927818329e-06, "loss": 0.7131, "step": 13402 }, { "epoch": 0.3913170418381945, "grad_norm": 0.7403830384918046, "learning_rate": 3.3816707218167076e-06, "loss": 0.6875, "step": 13403 }, { "epoch": 0.39134623806604185, "grad_norm": 0.7084063452615375, "learning_rate": 3.3815085158150856e-06, "loss": 0.5943, "step": 13404 }, { "epoch": 0.3913754342938892, "grad_norm": 0.8112324932730347, "learning_rate": 3.3813463098134636e-06, "loss": 0.6876, "step": 13405 }, { "epoch": 0.3914046305217366, "grad_norm": 0.7295359080246674, "learning_rate": 3.3811841038118416e-06, "loss": 0.6116, "step": 13406 }, { "epoch": 0.39143382674958394, "grad_norm": 0.8403583883729248, "learning_rate": 3.3810218978102192e-06, "loss": 0.7595, "step": 13407 }, { "epoch": 0.3914630229774313, "grad_norm": 0.7088421062109851, "learning_rate": 3.3808596918085972e-06, "loss": 0.5645, "step": 13408 }, { "epoch": 0.39149221920527866, "grad_norm": 0.713699062804271, "learning_rate": 3.3806974858069752e-06, "loss": 0.6259, "step": 13409 }, { "epoch": 0.391521415433126, "grad_norm": 0.7222490277588461, "learning_rate": 3.3805352798053533e-06, "loss": 0.6411, "step": 13410 }, { "epoch": 0.3915506116609734, "grad_norm": 0.7328997662947575, "learning_rate": 3.380373073803731e-06, "loss": 0.6898, "step": 13411 }, { "epoch": 0.39157980788882074, "grad_norm": 0.7858961116896324, "learning_rate": 3.380210867802109e-06, "loss": 0.7674, "step": 13412 }, { "epoch": 0.3916090041166681, "grad_norm": 0.7029076920990999, "learning_rate": 3.380048661800487e-06, "loss": 0.5988, "step": 13413 }, { "epoch": 0.39163820034451546, "grad_norm": 0.7821811360590591, "learning_rate": 3.379886455798865e-06, "loss": 0.7133, "step": 13414 }, { "epoch": 0.3916673965723628, "grad_norm": 0.6732368081325932, "learning_rate": 3.379724249797243e-06, "loss": 0.5388, "step": 13415 }, { "epoch": 0.3916965928002102, "grad_norm": 0.89652984853213, "learning_rate": 3.3795620437956204e-06, "loss": 0.6622, "step": 13416 }, { "epoch": 0.3917257890280576, "grad_norm": 0.7777399700723734, "learning_rate": 3.3793998377939985e-06, "loss": 0.7594, "step": 13417 }, { "epoch": 0.39175498525590496, "grad_norm": 0.6926115520224736, "learning_rate": 3.3792376317923765e-06, "loss": 0.6379, "step": 13418 }, { "epoch": 0.3917841814837523, "grad_norm": 0.7785211764034107, "learning_rate": 3.3790754257907545e-06, "loss": 0.6478, "step": 13419 }, { "epoch": 0.3918133777115997, "grad_norm": 1.1598973389299942, "learning_rate": 3.378913219789132e-06, "loss": 0.6992, "step": 13420 }, { "epoch": 0.39184257393944705, "grad_norm": 0.7451612466335447, "learning_rate": 3.37875101378751e-06, "loss": 0.7292, "step": 13421 }, { "epoch": 0.3918717701672944, "grad_norm": 0.7120223282036083, "learning_rate": 3.3785888077858885e-06, "loss": 0.629, "step": 13422 }, { "epoch": 0.39190096639514177, "grad_norm": 0.7107244506639484, "learning_rate": 3.3784266017842665e-06, "loss": 0.6131, "step": 13423 }, { "epoch": 0.39193016262298913, "grad_norm": 0.7811947989254798, "learning_rate": 3.3782643957826445e-06, "loss": 0.5909, "step": 13424 }, { "epoch": 0.3919593588508365, "grad_norm": 0.7355264175311926, "learning_rate": 3.3781021897810225e-06, "loss": 0.6858, "step": 13425 }, { "epoch": 0.39198855507868385, "grad_norm": 0.7420235936567487, "learning_rate": 3.3779399837794e-06, "loss": 0.6627, "step": 13426 }, { "epoch": 0.3920177513065312, "grad_norm": 0.7288006894497202, "learning_rate": 3.377777777777778e-06, "loss": 0.6745, "step": 13427 }, { "epoch": 0.3920469475343786, "grad_norm": 0.7341626847559579, "learning_rate": 3.377615571776156e-06, "loss": 0.6363, "step": 13428 }, { "epoch": 0.39207614376222594, "grad_norm": 0.7113263449039373, "learning_rate": 3.377453365774534e-06, "loss": 0.5587, "step": 13429 }, { "epoch": 0.3921053399900733, "grad_norm": 0.7194871828671023, "learning_rate": 3.3772911597729117e-06, "loss": 0.6066, "step": 13430 }, { "epoch": 0.39213453621792066, "grad_norm": 0.7307354648207053, "learning_rate": 3.3771289537712897e-06, "loss": 0.7, "step": 13431 }, { "epoch": 0.392163732445768, "grad_norm": 0.736546631763881, "learning_rate": 3.3769667477696677e-06, "loss": 0.6597, "step": 13432 }, { "epoch": 0.3921929286736154, "grad_norm": 0.7494636075387041, "learning_rate": 3.3768045417680457e-06, "loss": 0.6642, "step": 13433 }, { "epoch": 0.39222212490146274, "grad_norm": 0.793945863036729, "learning_rate": 3.3766423357664237e-06, "loss": 0.7665, "step": 13434 }, { "epoch": 0.3922513211293101, "grad_norm": 0.6525395380271174, "learning_rate": 3.3764801297648013e-06, "loss": 0.5692, "step": 13435 }, { "epoch": 0.39228051735715747, "grad_norm": 0.7521144484264402, "learning_rate": 3.3763179237631793e-06, "loss": 0.7105, "step": 13436 }, { "epoch": 0.3923097135850048, "grad_norm": 0.7613841439165049, "learning_rate": 3.3761557177615573e-06, "loss": 0.7217, "step": 13437 }, { "epoch": 0.3923389098128522, "grad_norm": 0.7790256781636677, "learning_rate": 3.3759935117599353e-06, "loss": 0.7062, "step": 13438 }, { "epoch": 0.39236810604069955, "grad_norm": 0.70498891500993, "learning_rate": 3.375831305758313e-06, "loss": 0.6265, "step": 13439 }, { "epoch": 0.3923973022685469, "grad_norm": 0.7417369228279633, "learning_rate": 3.3756690997566913e-06, "loss": 0.6852, "step": 13440 }, { "epoch": 0.39242649849639427, "grad_norm": 0.7788328874129061, "learning_rate": 3.3755068937550693e-06, "loss": 0.6349, "step": 13441 }, { "epoch": 0.39245569472424163, "grad_norm": 0.7787064383747385, "learning_rate": 3.3753446877534474e-06, "loss": 0.648, "step": 13442 }, { "epoch": 0.392484890952089, "grad_norm": 0.744755470600729, "learning_rate": 3.3751824817518254e-06, "loss": 0.7017, "step": 13443 }, { "epoch": 0.39251408717993636, "grad_norm": 0.757185737568228, "learning_rate": 3.3750202757502034e-06, "loss": 0.6779, "step": 13444 }, { "epoch": 0.3925432834077837, "grad_norm": 0.7103853701170786, "learning_rate": 3.374858069748581e-06, "loss": 0.5699, "step": 13445 }, { "epoch": 0.3925724796356311, "grad_norm": 0.714664030869562, "learning_rate": 3.374695863746959e-06, "loss": 0.6565, "step": 13446 }, { "epoch": 0.39260167586347844, "grad_norm": 0.7204340761324111, "learning_rate": 3.374533657745337e-06, "loss": 0.6382, "step": 13447 }, { "epoch": 0.3926308720913258, "grad_norm": 0.7429654068833951, "learning_rate": 3.374371451743715e-06, "loss": 0.7099, "step": 13448 }, { "epoch": 0.39266006831917316, "grad_norm": 0.7786970097479639, "learning_rate": 3.3742092457420926e-06, "loss": 0.7174, "step": 13449 }, { "epoch": 0.3926892645470205, "grad_norm": 0.6533830908357453, "learning_rate": 3.3740470397404706e-06, "loss": 0.5435, "step": 13450 }, { "epoch": 0.3927184607748679, "grad_norm": 0.7573854721523889, "learning_rate": 3.3738848337388486e-06, "loss": 0.6599, "step": 13451 }, { "epoch": 0.39274765700271524, "grad_norm": 0.7744743702614005, "learning_rate": 3.3737226277372266e-06, "loss": 0.7123, "step": 13452 }, { "epoch": 0.3927768532305626, "grad_norm": 0.7401050868022798, "learning_rate": 3.3735604217356046e-06, "loss": 0.6252, "step": 13453 }, { "epoch": 0.39280604945840997, "grad_norm": 0.7017242624721225, "learning_rate": 3.373398215733982e-06, "loss": 0.6059, "step": 13454 }, { "epoch": 0.39283524568625733, "grad_norm": 1.188129937456483, "learning_rate": 3.37323600973236e-06, "loss": 0.7526, "step": 13455 }, { "epoch": 0.3928644419141047, "grad_norm": 1.0336836324850895, "learning_rate": 3.373073803730738e-06, "loss": 0.6424, "step": 13456 }, { "epoch": 0.39289363814195205, "grad_norm": 0.6313429651228816, "learning_rate": 3.372911597729116e-06, "loss": 0.5014, "step": 13457 }, { "epoch": 0.3929228343697994, "grad_norm": 0.7305128274172594, "learning_rate": 3.3727493917274938e-06, "loss": 0.6127, "step": 13458 }, { "epoch": 0.3929520305976468, "grad_norm": 0.7210794543966309, "learning_rate": 3.372587185725872e-06, "loss": 0.6394, "step": 13459 }, { "epoch": 0.39298122682549413, "grad_norm": 0.7648681770233154, "learning_rate": 3.37242497972425e-06, "loss": 0.696, "step": 13460 }, { "epoch": 0.3930104230533415, "grad_norm": 0.6673654939532564, "learning_rate": 3.372262773722628e-06, "loss": 0.5936, "step": 13461 }, { "epoch": 0.39303961928118886, "grad_norm": 0.7650840564226549, "learning_rate": 3.3721005677210062e-06, "loss": 0.6317, "step": 13462 }, { "epoch": 0.3930688155090362, "grad_norm": 0.8628478406600094, "learning_rate": 3.3719383617193842e-06, "loss": 0.7785, "step": 13463 }, { "epoch": 0.3930980117368836, "grad_norm": 0.6998516581892291, "learning_rate": 3.371776155717762e-06, "loss": 0.6207, "step": 13464 }, { "epoch": 0.39312720796473094, "grad_norm": 0.7192501056648651, "learning_rate": 3.37161394971614e-06, "loss": 0.642, "step": 13465 }, { "epoch": 0.3931564041925783, "grad_norm": 0.7369737125632733, "learning_rate": 3.371451743714518e-06, "loss": 0.7318, "step": 13466 }, { "epoch": 0.39318560042042566, "grad_norm": 0.7744607413111606, "learning_rate": 3.371289537712896e-06, "loss": 0.7011, "step": 13467 }, { "epoch": 0.393214796648273, "grad_norm": 0.7815257095050246, "learning_rate": 3.3711273317112734e-06, "loss": 0.6504, "step": 13468 }, { "epoch": 0.3932439928761204, "grad_norm": 0.6920800236576861, "learning_rate": 3.3709651257096514e-06, "loss": 0.6395, "step": 13469 }, { "epoch": 0.39327318910396775, "grad_norm": 0.6874750670563243, "learning_rate": 3.3708029197080294e-06, "loss": 0.5539, "step": 13470 }, { "epoch": 0.3933023853318151, "grad_norm": 0.7439250337209328, "learning_rate": 3.3706407137064074e-06, "loss": 0.6897, "step": 13471 }, { "epoch": 0.39333158155966247, "grad_norm": 0.7720668438822557, "learning_rate": 3.3704785077047854e-06, "loss": 0.738, "step": 13472 }, { "epoch": 0.39336077778750983, "grad_norm": 0.7730534567544423, "learning_rate": 3.370316301703163e-06, "loss": 0.6991, "step": 13473 }, { "epoch": 0.3933899740153572, "grad_norm": 0.694584886999101, "learning_rate": 3.370154095701541e-06, "loss": 0.5822, "step": 13474 }, { "epoch": 0.39341917024320455, "grad_norm": 0.8055593309855386, "learning_rate": 3.369991889699919e-06, "loss": 0.6873, "step": 13475 }, { "epoch": 0.3934483664710519, "grad_norm": 0.7537692931537231, "learning_rate": 3.369829683698297e-06, "loss": 0.6602, "step": 13476 }, { "epoch": 0.3934775626988993, "grad_norm": 0.7494946742719438, "learning_rate": 3.3696674776966746e-06, "loss": 0.6323, "step": 13477 }, { "epoch": 0.3935067589267467, "grad_norm": 0.7112268961339334, "learning_rate": 3.369505271695053e-06, "loss": 0.6644, "step": 13478 }, { "epoch": 0.39353595515459405, "grad_norm": 0.6897689390694077, "learning_rate": 3.369343065693431e-06, "loss": 0.6029, "step": 13479 }, { "epoch": 0.3935651513824414, "grad_norm": 0.7518079749803134, "learning_rate": 3.369180859691809e-06, "loss": 0.7621, "step": 13480 }, { "epoch": 0.3935943476102888, "grad_norm": 0.7463368646524908, "learning_rate": 3.369018653690187e-06, "loss": 0.6637, "step": 13481 }, { "epoch": 0.39362354383813614, "grad_norm": 0.689349902330268, "learning_rate": 3.368856447688565e-06, "loss": 0.5917, "step": 13482 }, { "epoch": 0.3936527400659835, "grad_norm": 0.770520574451496, "learning_rate": 3.3686942416869427e-06, "loss": 0.5929, "step": 13483 }, { "epoch": 0.39368193629383086, "grad_norm": 0.7766118811288538, "learning_rate": 3.3685320356853207e-06, "loss": 0.7082, "step": 13484 }, { "epoch": 0.3937111325216782, "grad_norm": 0.6969189584588641, "learning_rate": 3.3683698296836987e-06, "loss": 0.6143, "step": 13485 }, { "epoch": 0.3937403287495256, "grad_norm": 0.8105738524218299, "learning_rate": 3.3682076236820767e-06, "loss": 0.7028, "step": 13486 }, { "epoch": 0.39376952497737294, "grad_norm": 0.7694058543129758, "learning_rate": 3.3680454176804543e-06, "loss": 0.6449, "step": 13487 }, { "epoch": 0.3937987212052203, "grad_norm": 0.8048800802045017, "learning_rate": 3.3678832116788323e-06, "loss": 0.7342, "step": 13488 }, { "epoch": 0.39382791743306766, "grad_norm": 0.7090775713146305, "learning_rate": 3.3677210056772103e-06, "loss": 0.599, "step": 13489 }, { "epoch": 0.393857113660915, "grad_norm": 0.7460534834180695, "learning_rate": 3.3675587996755883e-06, "loss": 0.708, "step": 13490 }, { "epoch": 0.3938863098887624, "grad_norm": 0.7425409671468021, "learning_rate": 3.3673965936739663e-06, "loss": 0.6113, "step": 13491 }, { "epoch": 0.39391550611660975, "grad_norm": 0.7385722698369322, "learning_rate": 3.367234387672344e-06, "loss": 0.6608, "step": 13492 }, { "epoch": 0.3939447023444571, "grad_norm": 0.7327889268190503, "learning_rate": 3.367072181670722e-06, "loss": 0.6157, "step": 13493 }, { "epoch": 0.39397389857230447, "grad_norm": 0.7296488223637135, "learning_rate": 3.3669099756691e-06, "loss": 0.6303, "step": 13494 }, { "epoch": 0.39400309480015183, "grad_norm": 0.6852094209864394, "learning_rate": 3.366747769667478e-06, "loss": 0.6032, "step": 13495 }, { "epoch": 0.3940322910279992, "grad_norm": 0.7466070856415857, "learning_rate": 3.3665855636658555e-06, "loss": 0.6556, "step": 13496 }, { "epoch": 0.39406148725584655, "grad_norm": 0.7663072092021764, "learning_rate": 3.366423357664234e-06, "loss": 0.7474, "step": 13497 }, { "epoch": 0.3940906834836939, "grad_norm": 0.7132097475633287, "learning_rate": 3.366261151662612e-06, "loss": 0.6053, "step": 13498 }, { "epoch": 0.3941198797115413, "grad_norm": 0.6677615249279769, "learning_rate": 3.36609894566099e-06, "loss": 0.5849, "step": 13499 }, { "epoch": 0.39414907593938864, "grad_norm": 1.2145180213804556, "learning_rate": 3.365936739659368e-06, "loss": 0.6625, "step": 13500 }, { "epoch": 0.394178272167236, "grad_norm": 0.6640439091804248, "learning_rate": 3.365774533657746e-06, "loss": 0.5426, "step": 13501 }, { "epoch": 0.39420746839508336, "grad_norm": 0.7772024148003253, "learning_rate": 3.3656123276561235e-06, "loss": 0.7359, "step": 13502 }, { "epoch": 0.3942366646229307, "grad_norm": 0.7464112693911189, "learning_rate": 3.3654501216545015e-06, "loss": 0.6303, "step": 13503 }, { "epoch": 0.3942658608507781, "grad_norm": 0.8850849667404465, "learning_rate": 3.3652879156528795e-06, "loss": 0.6736, "step": 13504 }, { "epoch": 0.39429505707862544, "grad_norm": 0.7893051296498613, "learning_rate": 3.3651257096512575e-06, "loss": 0.7476, "step": 13505 }, { "epoch": 0.3943242533064728, "grad_norm": 0.7477506472158983, "learning_rate": 3.364963503649635e-06, "loss": 0.6694, "step": 13506 }, { "epoch": 0.39435344953432017, "grad_norm": 0.6879198007560715, "learning_rate": 3.364801297648013e-06, "loss": 0.587, "step": 13507 }, { "epoch": 0.3943826457621675, "grad_norm": 0.6346417312788977, "learning_rate": 3.364639091646391e-06, "loss": 0.5134, "step": 13508 }, { "epoch": 0.3944118419900149, "grad_norm": 0.6988283848357228, "learning_rate": 3.364476885644769e-06, "loss": 0.6408, "step": 13509 }, { "epoch": 0.39444103821786225, "grad_norm": 0.7361527915411371, "learning_rate": 3.364314679643147e-06, "loss": 0.6861, "step": 13510 }, { "epoch": 0.3944702344457096, "grad_norm": 0.6844941343391417, "learning_rate": 3.3641524736415247e-06, "loss": 0.5836, "step": 13511 }, { "epoch": 0.39449943067355697, "grad_norm": 0.8069369377323825, "learning_rate": 3.3639902676399027e-06, "loss": 0.5902, "step": 13512 }, { "epoch": 0.39452862690140433, "grad_norm": 0.7165915353196105, "learning_rate": 3.3638280616382808e-06, "loss": 0.6722, "step": 13513 }, { "epoch": 0.3945578231292517, "grad_norm": 0.7704287965641092, "learning_rate": 3.3636658556366588e-06, "loss": 0.7581, "step": 13514 }, { "epoch": 0.39458701935709906, "grad_norm": 0.7193560651746991, "learning_rate": 3.3635036496350363e-06, "loss": 0.636, "step": 13515 }, { "epoch": 0.3946162155849464, "grad_norm": 0.7089172952428748, "learning_rate": 3.3633414436334148e-06, "loss": 0.6257, "step": 13516 }, { "epoch": 0.3946454118127938, "grad_norm": 0.7836856404361271, "learning_rate": 3.3631792376317928e-06, "loss": 0.7361, "step": 13517 }, { "epoch": 0.39467460804064114, "grad_norm": 0.8010940113870649, "learning_rate": 3.3630170316301708e-06, "loss": 0.7553, "step": 13518 }, { "epoch": 0.3947038042684885, "grad_norm": 0.8100388059707951, "learning_rate": 3.362854825628549e-06, "loss": 0.7464, "step": 13519 }, { "epoch": 0.39473300049633586, "grad_norm": 0.7502849390989715, "learning_rate": 3.362692619626927e-06, "loss": 0.6212, "step": 13520 }, { "epoch": 0.3947621967241832, "grad_norm": 0.7056669244827432, "learning_rate": 3.3625304136253044e-06, "loss": 0.6423, "step": 13521 }, { "epoch": 0.3947913929520306, "grad_norm": 0.7954156493000721, "learning_rate": 3.3623682076236824e-06, "loss": 0.7243, "step": 13522 }, { "epoch": 0.39482058917987795, "grad_norm": 0.7074267800599925, "learning_rate": 3.3622060016220604e-06, "loss": 0.6129, "step": 13523 }, { "epoch": 0.3948497854077253, "grad_norm": 0.6716631293600157, "learning_rate": 3.3620437956204384e-06, "loss": 0.5768, "step": 13524 }, { "epoch": 0.39487898163557267, "grad_norm": 0.7188619358674435, "learning_rate": 3.361881589618816e-06, "loss": 0.6181, "step": 13525 }, { "epoch": 0.39490817786342003, "grad_norm": 0.6744026554299944, "learning_rate": 3.361719383617194e-06, "loss": 0.5745, "step": 13526 }, { "epoch": 0.3949373740912674, "grad_norm": 0.9449601692305563, "learning_rate": 3.361557177615572e-06, "loss": 0.7214, "step": 13527 }, { "epoch": 0.39496657031911475, "grad_norm": 0.7216502040579497, "learning_rate": 3.36139497161395e-06, "loss": 0.64, "step": 13528 }, { "epoch": 0.3949957665469621, "grad_norm": 0.6799012684344987, "learning_rate": 3.361232765612328e-06, "loss": 0.578, "step": 13529 }, { "epoch": 0.3950249627748095, "grad_norm": 0.7402147599000399, "learning_rate": 3.3610705596107056e-06, "loss": 0.7203, "step": 13530 }, { "epoch": 0.39505415900265684, "grad_norm": 0.7109565856767168, "learning_rate": 3.3609083536090836e-06, "loss": 0.6541, "step": 13531 }, { "epoch": 0.3950833552305042, "grad_norm": 0.7284779458562495, "learning_rate": 3.3607461476074616e-06, "loss": 0.6154, "step": 13532 }, { "epoch": 0.39511255145835156, "grad_norm": 0.750774026908034, "learning_rate": 3.3605839416058396e-06, "loss": 0.6039, "step": 13533 }, { "epoch": 0.3951417476861989, "grad_norm": 0.785967088876224, "learning_rate": 3.360421735604217e-06, "loss": 0.6971, "step": 13534 }, { "epoch": 0.3951709439140463, "grad_norm": 0.6964358504493456, "learning_rate": 3.3602595296025956e-06, "loss": 0.5729, "step": 13535 }, { "epoch": 0.39520014014189364, "grad_norm": 0.7274790976763409, "learning_rate": 3.3600973236009736e-06, "loss": 0.6709, "step": 13536 }, { "epoch": 0.395229336369741, "grad_norm": 0.7208369264215327, "learning_rate": 3.3599351175993516e-06, "loss": 0.6253, "step": 13537 }, { "epoch": 0.3952585325975884, "grad_norm": 0.791028241852178, "learning_rate": 3.3597729115977297e-06, "loss": 0.6967, "step": 13538 }, { "epoch": 0.3952877288254358, "grad_norm": 0.7493362269678386, "learning_rate": 3.3596107055961077e-06, "loss": 0.6747, "step": 13539 }, { "epoch": 0.39531692505328314, "grad_norm": 0.770802084663571, "learning_rate": 3.3594484995944852e-06, "loss": 0.6536, "step": 13540 }, { "epoch": 0.3953461212811305, "grad_norm": 0.7311962827179189, "learning_rate": 3.3592862935928632e-06, "loss": 0.643, "step": 13541 }, { "epoch": 0.39537531750897786, "grad_norm": 0.7748173984605271, "learning_rate": 3.3591240875912413e-06, "loss": 0.7546, "step": 13542 }, { "epoch": 0.3954045137368252, "grad_norm": 0.6926796901226271, "learning_rate": 3.3589618815896193e-06, "loss": 0.5603, "step": 13543 }, { "epoch": 0.3954337099646726, "grad_norm": 0.7413106647189439, "learning_rate": 3.358799675587997e-06, "loss": 0.7017, "step": 13544 }, { "epoch": 0.39546290619251995, "grad_norm": 0.7168095865825472, "learning_rate": 3.358637469586375e-06, "loss": 0.6415, "step": 13545 }, { "epoch": 0.3954921024203673, "grad_norm": 0.7065604255203017, "learning_rate": 3.358475263584753e-06, "loss": 0.6248, "step": 13546 }, { "epoch": 0.39552129864821467, "grad_norm": 0.7071895990094323, "learning_rate": 3.358313057583131e-06, "loss": 0.6255, "step": 13547 }, { "epoch": 0.39555049487606203, "grad_norm": 0.8217826871162909, "learning_rate": 3.358150851581509e-06, "loss": 0.7653, "step": 13548 }, { "epoch": 0.3955796911039094, "grad_norm": 0.6691553994111888, "learning_rate": 3.3579886455798865e-06, "loss": 0.5767, "step": 13549 }, { "epoch": 0.39560888733175675, "grad_norm": 0.6840787071595853, "learning_rate": 3.3578264395782645e-06, "loss": 0.5828, "step": 13550 }, { "epoch": 0.3956380835596041, "grad_norm": 0.6674122378983675, "learning_rate": 3.3576642335766425e-06, "loss": 0.5746, "step": 13551 }, { "epoch": 0.3956672797874515, "grad_norm": 0.7012878116831767, "learning_rate": 3.3575020275750205e-06, "loss": 0.5879, "step": 13552 }, { "epoch": 0.39569647601529884, "grad_norm": 0.8252122020362757, "learning_rate": 3.357339821573398e-06, "loss": 0.7784, "step": 13553 }, { "epoch": 0.3957256722431462, "grad_norm": 0.7414861911808756, "learning_rate": 3.3571776155717765e-06, "loss": 0.5799, "step": 13554 }, { "epoch": 0.39575486847099356, "grad_norm": 0.7475213842054999, "learning_rate": 3.3570154095701545e-06, "loss": 0.6906, "step": 13555 }, { "epoch": 0.3957840646988409, "grad_norm": 0.66964019571597, "learning_rate": 3.3568532035685325e-06, "loss": 0.5685, "step": 13556 }, { "epoch": 0.3958132609266883, "grad_norm": 0.7435590199526361, "learning_rate": 3.3566909975669105e-06, "loss": 0.671, "step": 13557 }, { "epoch": 0.39584245715453564, "grad_norm": 0.7243875184652464, "learning_rate": 3.3565287915652885e-06, "loss": 0.6252, "step": 13558 }, { "epoch": 0.395871653382383, "grad_norm": 0.7627005383388864, "learning_rate": 3.356366585563666e-06, "loss": 0.5914, "step": 13559 }, { "epoch": 0.39590084961023037, "grad_norm": 0.6955577620118324, "learning_rate": 3.356204379562044e-06, "loss": 0.6089, "step": 13560 }, { "epoch": 0.3959300458380777, "grad_norm": 0.7020355511636238, "learning_rate": 3.356042173560422e-06, "loss": 0.6138, "step": 13561 }, { "epoch": 0.3959592420659251, "grad_norm": 0.6948153305002676, "learning_rate": 3.3558799675588e-06, "loss": 0.5581, "step": 13562 }, { "epoch": 0.39598843829377245, "grad_norm": 0.753551970696385, "learning_rate": 3.3557177615571777e-06, "loss": 0.6835, "step": 13563 }, { "epoch": 0.3960176345216198, "grad_norm": 0.7769354631243773, "learning_rate": 3.3555555555555557e-06, "loss": 0.7439, "step": 13564 }, { "epoch": 0.39604683074946717, "grad_norm": 0.8556179010828927, "learning_rate": 3.3553933495539337e-06, "loss": 0.6017, "step": 13565 }, { "epoch": 0.39607602697731453, "grad_norm": 0.7405226077457876, "learning_rate": 3.3552311435523117e-06, "loss": 0.6718, "step": 13566 }, { "epoch": 0.3961052232051619, "grad_norm": 0.7123256737839754, "learning_rate": 3.3550689375506897e-06, "loss": 0.6246, "step": 13567 }, { "epoch": 0.39613441943300925, "grad_norm": 0.7263316796316167, "learning_rate": 3.3549067315490673e-06, "loss": 0.6852, "step": 13568 }, { "epoch": 0.3961636156608566, "grad_norm": 0.7655026153559926, "learning_rate": 3.3547445255474453e-06, "loss": 0.6701, "step": 13569 }, { "epoch": 0.396192811888704, "grad_norm": 0.7582538099667221, "learning_rate": 3.3545823195458233e-06, "loss": 0.6552, "step": 13570 }, { "epoch": 0.39622200811655134, "grad_norm": 0.7042818039615694, "learning_rate": 3.3544201135442013e-06, "loss": 0.6332, "step": 13571 }, { "epoch": 0.3962512043443987, "grad_norm": 0.799769750373886, "learning_rate": 3.354257907542579e-06, "loss": 0.7711, "step": 13572 }, { "epoch": 0.39628040057224606, "grad_norm": 0.68691725398488, "learning_rate": 3.3540957015409573e-06, "loss": 0.5902, "step": 13573 }, { "epoch": 0.3963095968000934, "grad_norm": 1.1591063711901826, "learning_rate": 3.3539334955393354e-06, "loss": 0.7343, "step": 13574 }, { "epoch": 0.3963387930279408, "grad_norm": 0.7218025744809565, "learning_rate": 3.3537712895377134e-06, "loss": 0.658, "step": 13575 }, { "epoch": 0.39636798925578814, "grad_norm": 0.7738345122032407, "learning_rate": 3.3536090835360914e-06, "loss": 0.695, "step": 13576 }, { "epoch": 0.3963971854836355, "grad_norm": 0.6971474387881758, "learning_rate": 3.3534468775344694e-06, "loss": 0.5994, "step": 13577 }, { "epoch": 0.39642638171148287, "grad_norm": 0.7673541630978644, "learning_rate": 3.353284671532847e-06, "loss": 0.7355, "step": 13578 }, { "epoch": 0.39645557793933023, "grad_norm": 0.6997868947707163, "learning_rate": 3.353122465531225e-06, "loss": 0.5753, "step": 13579 }, { "epoch": 0.3964847741671776, "grad_norm": 0.8134135348481442, "learning_rate": 3.352960259529603e-06, "loss": 0.6907, "step": 13580 }, { "epoch": 0.39651397039502495, "grad_norm": 0.7481415927247878, "learning_rate": 3.352798053527981e-06, "loss": 0.653, "step": 13581 }, { "epoch": 0.3965431666228723, "grad_norm": 0.7247960695852151, "learning_rate": 3.3526358475263586e-06, "loss": 0.5907, "step": 13582 }, { "epoch": 0.3965723628507197, "grad_norm": 0.7057571998896887, "learning_rate": 3.3524736415247366e-06, "loss": 0.6087, "step": 13583 }, { "epoch": 0.39660155907856703, "grad_norm": 0.7688036644583333, "learning_rate": 3.3523114355231146e-06, "loss": 0.6883, "step": 13584 }, { "epoch": 0.3966307553064144, "grad_norm": 0.7862059573701852, "learning_rate": 3.3521492295214926e-06, "loss": 0.7282, "step": 13585 }, { "epoch": 0.39665995153426176, "grad_norm": 1.1498560735530923, "learning_rate": 3.35198702351987e-06, "loss": 0.6747, "step": 13586 }, { "epoch": 0.3966891477621091, "grad_norm": 0.7472444090310608, "learning_rate": 3.351824817518248e-06, "loss": 0.6201, "step": 13587 }, { "epoch": 0.3967183439899565, "grad_norm": 0.7311128438863519, "learning_rate": 3.351662611516626e-06, "loss": 0.6763, "step": 13588 }, { "epoch": 0.39674754021780384, "grad_norm": 0.750035523085201, "learning_rate": 3.351500405515004e-06, "loss": 0.6612, "step": 13589 }, { "epoch": 0.3967767364456512, "grad_norm": 0.6818409161097878, "learning_rate": 3.351338199513382e-06, "loss": 0.5878, "step": 13590 }, { "epoch": 0.39680593267349856, "grad_norm": 0.6816086506109548, "learning_rate": 3.3511759935117606e-06, "loss": 0.6052, "step": 13591 }, { "epoch": 0.3968351289013459, "grad_norm": 0.7598141232021407, "learning_rate": 3.351013787510138e-06, "loss": 0.7756, "step": 13592 }, { "epoch": 0.3968643251291933, "grad_norm": 0.8193423042196084, "learning_rate": 3.3508515815085162e-06, "loss": 0.6777, "step": 13593 }, { "epoch": 0.39689352135704065, "grad_norm": 0.6998828674933516, "learning_rate": 3.3506893755068942e-06, "loss": 0.619, "step": 13594 }, { "epoch": 0.396922717584888, "grad_norm": 0.8360413908807107, "learning_rate": 3.3505271695052722e-06, "loss": 0.7154, "step": 13595 }, { "epoch": 0.39695191381273537, "grad_norm": 0.6872447333389324, "learning_rate": 3.3503649635036502e-06, "loss": 0.5987, "step": 13596 }, { "epoch": 0.39698111004058273, "grad_norm": 0.727052835582698, "learning_rate": 3.350202757502028e-06, "loss": 0.6529, "step": 13597 }, { "epoch": 0.39701030626843015, "grad_norm": 0.7380976193206142, "learning_rate": 3.350040551500406e-06, "loss": 0.6759, "step": 13598 }, { "epoch": 0.3970395024962775, "grad_norm": 0.6655995734107254, "learning_rate": 3.349878345498784e-06, "loss": 0.5938, "step": 13599 }, { "epoch": 0.39706869872412487, "grad_norm": 0.7596266277002565, "learning_rate": 3.349716139497162e-06, "loss": 0.6772, "step": 13600 }, { "epoch": 0.39709789495197223, "grad_norm": 0.7761497448623887, "learning_rate": 3.3495539334955394e-06, "loss": 0.7499, "step": 13601 }, { "epoch": 0.3971270911798196, "grad_norm": 0.6613168451768094, "learning_rate": 3.3493917274939174e-06, "loss": 0.6098, "step": 13602 }, { "epoch": 0.39715628740766695, "grad_norm": 0.716385909676133, "learning_rate": 3.3492295214922954e-06, "loss": 0.6064, "step": 13603 }, { "epoch": 0.3971854836355143, "grad_norm": 0.7417848921368019, "learning_rate": 3.3490673154906734e-06, "loss": 0.6824, "step": 13604 }, { "epoch": 0.3972146798633617, "grad_norm": 0.6844068499249076, "learning_rate": 3.348905109489051e-06, "loss": 0.596, "step": 13605 }, { "epoch": 0.39724387609120904, "grad_norm": 0.7636102516201668, "learning_rate": 3.348742903487429e-06, "loss": 0.6987, "step": 13606 }, { "epoch": 0.3972730723190564, "grad_norm": 0.7703160680908455, "learning_rate": 3.348580697485807e-06, "loss": 0.7476, "step": 13607 }, { "epoch": 0.39730226854690376, "grad_norm": 0.7486422909327074, "learning_rate": 3.348418491484185e-06, "loss": 0.7099, "step": 13608 }, { "epoch": 0.3973314647747511, "grad_norm": 0.7024510466823796, "learning_rate": 3.348256285482563e-06, "loss": 0.6206, "step": 13609 }, { "epoch": 0.3973606610025985, "grad_norm": 0.7264046642093599, "learning_rate": 3.3480940794809415e-06, "loss": 0.6237, "step": 13610 }, { "epoch": 0.39738985723044584, "grad_norm": 0.7750348050396594, "learning_rate": 3.347931873479319e-06, "loss": 0.7646, "step": 13611 }, { "epoch": 0.3974190534582932, "grad_norm": 0.7610031427324402, "learning_rate": 3.347769667477697e-06, "loss": 0.6427, "step": 13612 }, { "epoch": 0.39744824968614056, "grad_norm": 0.6798119408991135, "learning_rate": 3.347607461476075e-06, "loss": 0.585, "step": 13613 }, { "epoch": 0.3974774459139879, "grad_norm": 0.6980237626930204, "learning_rate": 3.347445255474453e-06, "loss": 0.5874, "step": 13614 }, { "epoch": 0.3975066421418353, "grad_norm": 0.7750198906933742, "learning_rate": 3.347283049472831e-06, "loss": 0.7348, "step": 13615 }, { "epoch": 0.39753583836968265, "grad_norm": 0.7234385069694498, "learning_rate": 3.3471208434712087e-06, "loss": 0.6377, "step": 13616 }, { "epoch": 0.39756503459753, "grad_norm": 0.7190233513229453, "learning_rate": 3.3469586374695867e-06, "loss": 0.5785, "step": 13617 }, { "epoch": 0.39759423082537737, "grad_norm": 0.7518548814632562, "learning_rate": 3.3467964314679647e-06, "loss": 0.7416, "step": 13618 }, { "epoch": 0.39762342705322473, "grad_norm": 0.776995531887849, "learning_rate": 3.3466342254663427e-06, "loss": 0.6773, "step": 13619 }, { "epoch": 0.3976526232810721, "grad_norm": 0.7293046861973719, "learning_rate": 3.3464720194647203e-06, "loss": 0.5721, "step": 13620 }, { "epoch": 0.39768181950891945, "grad_norm": 0.6760380862446883, "learning_rate": 3.3463098134630983e-06, "loss": 0.5555, "step": 13621 }, { "epoch": 0.3977110157367668, "grad_norm": 0.7145064425192212, "learning_rate": 3.3461476074614763e-06, "loss": 0.619, "step": 13622 }, { "epoch": 0.3977402119646142, "grad_norm": 0.7140907072246095, "learning_rate": 3.3459854014598543e-06, "loss": 0.6001, "step": 13623 }, { "epoch": 0.39776940819246154, "grad_norm": 0.7300416714201864, "learning_rate": 3.345823195458232e-06, "loss": 0.6777, "step": 13624 }, { "epoch": 0.3977986044203089, "grad_norm": 0.7454316703418942, "learning_rate": 3.34566098945661e-06, "loss": 0.6459, "step": 13625 }, { "epoch": 0.39782780064815626, "grad_norm": 0.7135068092883183, "learning_rate": 3.345498783454988e-06, "loss": 0.6587, "step": 13626 }, { "epoch": 0.3978569968760036, "grad_norm": 0.7121546312036461, "learning_rate": 3.345336577453366e-06, "loss": 0.6512, "step": 13627 }, { "epoch": 0.397886193103851, "grad_norm": 0.7080680649535708, "learning_rate": 3.345174371451744e-06, "loss": 0.6072, "step": 13628 }, { "epoch": 0.39791538933169834, "grad_norm": 0.7769640554045889, "learning_rate": 3.3450121654501223e-06, "loss": 0.7127, "step": 13629 }, { "epoch": 0.3979445855595457, "grad_norm": 0.8465541294369506, "learning_rate": 3.3448499594485e-06, "loss": 0.6523, "step": 13630 }, { "epoch": 0.39797378178739307, "grad_norm": 0.7269716349195131, "learning_rate": 3.344687753446878e-06, "loss": 0.6211, "step": 13631 }, { "epoch": 0.3980029780152404, "grad_norm": 0.7113631902841009, "learning_rate": 3.344525547445256e-06, "loss": 0.6415, "step": 13632 }, { "epoch": 0.3980321742430878, "grad_norm": 1.0593554823752003, "learning_rate": 3.344363341443634e-06, "loss": 0.7264, "step": 13633 }, { "epoch": 0.39806137047093515, "grad_norm": 0.6913739932040379, "learning_rate": 3.344201135442012e-06, "loss": 0.632, "step": 13634 }, { "epoch": 0.3980905666987825, "grad_norm": 0.723354055047149, "learning_rate": 3.3440389294403895e-06, "loss": 0.6257, "step": 13635 }, { "epoch": 0.39811976292662987, "grad_norm": 0.790825726767498, "learning_rate": 3.3438767234387675e-06, "loss": 0.7208, "step": 13636 }, { "epoch": 0.39814895915447723, "grad_norm": 0.6992566517507749, "learning_rate": 3.3437145174371455e-06, "loss": 0.5933, "step": 13637 }, { "epoch": 0.3981781553823246, "grad_norm": 0.7231313266794197, "learning_rate": 3.3435523114355236e-06, "loss": 0.677, "step": 13638 }, { "epoch": 0.39820735161017196, "grad_norm": 0.7684694955603428, "learning_rate": 3.343390105433901e-06, "loss": 0.6723, "step": 13639 }, { "epoch": 0.3982365478380193, "grad_norm": 0.7374582204706056, "learning_rate": 3.343227899432279e-06, "loss": 0.6862, "step": 13640 }, { "epoch": 0.3982657440658667, "grad_norm": 0.7948583137229832, "learning_rate": 3.343065693430657e-06, "loss": 0.7474, "step": 13641 }, { "epoch": 0.39829494029371404, "grad_norm": 0.7437720584908349, "learning_rate": 3.342903487429035e-06, "loss": 0.6103, "step": 13642 }, { "epoch": 0.3983241365215614, "grad_norm": 0.794151058842894, "learning_rate": 3.3427412814274127e-06, "loss": 0.7496, "step": 13643 }, { "epoch": 0.39835333274940876, "grad_norm": 0.7419759271697816, "learning_rate": 3.3425790754257907e-06, "loss": 0.6678, "step": 13644 }, { "epoch": 0.3983825289772561, "grad_norm": 0.7179938233742997, "learning_rate": 3.3424168694241688e-06, "loss": 0.6921, "step": 13645 }, { "epoch": 0.3984117252051035, "grad_norm": 0.6983283696233543, "learning_rate": 3.3422546634225468e-06, "loss": 0.5952, "step": 13646 }, { "epoch": 0.39844092143295085, "grad_norm": 0.9819038183817855, "learning_rate": 3.3420924574209248e-06, "loss": 0.6807, "step": 13647 }, { "epoch": 0.3984701176607982, "grad_norm": 0.7835392574617642, "learning_rate": 3.341930251419303e-06, "loss": 0.7748, "step": 13648 }, { "epoch": 0.39849931388864557, "grad_norm": 0.7293278892139802, "learning_rate": 3.3417680454176808e-06, "loss": 0.6666, "step": 13649 }, { "epoch": 0.39852851011649293, "grad_norm": 0.7348586473425702, "learning_rate": 3.341605839416059e-06, "loss": 0.6532, "step": 13650 }, { "epoch": 0.3985577063443403, "grad_norm": 0.7404465280508087, "learning_rate": 3.341443633414437e-06, "loss": 0.6149, "step": 13651 }, { "epoch": 0.39858690257218765, "grad_norm": 0.7731241471158079, "learning_rate": 3.341281427412815e-06, "loss": 0.6341, "step": 13652 }, { "epoch": 0.398616098800035, "grad_norm": 0.7543346970748074, "learning_rate": 3.341119221411193e-06, "loss": 0.698, "step": 13653 }, { "epoch": 0.3986452950278824, "grad_norm": 0.8430288685421793, "learning_rate": 3.3409570154095704e-06, "loss": 0.7615, "step": 13654 }, { "epoch": 0.39867449125572973, "grad_norm": 0.6694387640389543, "learning_rate": 3.3407948094079484e-06, "loss": 0.5766, "step": 13655 }, { "epoch": 0.3987036874835771, "grad_norm": 0.7672783823599576, "learning_rate": 3.3406326034063264e-06, "loss": 0.7134, "step": 13656 }, { "epoch": 0.39873288371142446, "grad_norm": 0.7738214486566706, "learning_rate": 3.3404703974047044e-06, "loss": 0.7315, "step": 13657 }, { "epoch": 0.3987620799392718, "grad_norm": 0.7543852793132572, "learning_rate": 3.340308191403082e-06, "loss": 0.6283, "step": 13658 }, { "epoch": 0.39879127616711924, "grad_norm": 0.6775446926833342, "learning_rate": 3.34014598540146e-06, "loss": 0.5555, "step": 13659 }, { "epoch": 0.3988204723949666, "grad_norm": 0.7044858677079856, "learning_rate": 3.339983779399838e-06, "loss": 0.6024, "step": 13660 }, { "epoch": 0.39884966862281396, "grad_norm": 0.7612975576642754, "learning_rate": 3.339821573398216e-06, "loss": 0.6965, "step": 13661 }, { "epoch": 0.3988788648506613, "grad_norm": 0.7355889638384392, "learning_rate": 3.3396593673965936e-06, "loss": 0.7145, "step": 13662 }, { "epoch": 0.3989080610785087, "grad_norm": 0.7446852778586068, "learning_rate": 3.3394971613949716e-06, "loss": 0.6393, "step": 13663 }, { "epoch": 0.39893725730635604, "grad_norm": 0.8723585580040398, "learning_rate": 3.3393349553933496e-06, "loss": 0.7405, "step": 13664 }, { "epoch": 0.3989664535342034, "grad_norm": 0.7633163189165261, "learning_rate": 3.3391727493917276e-06, "loss": 0.6891, "step": 13665 }, { "epoch": 0.39899564976205076, "grad_norm": 0.6939095250685182, "learning_rate": 3.3390105433901056e-06, "loss": 0.6222, "step": 13666 }, { "epoch": 0.3990248459898981, "grad_norm": 0.7477650031138959, "learning_rate": 3.338848337388484e-06, "loss": 0.6591, "step": 13667 }, { "epoch": 0.3990540422177455, "grad_norm": 0.719068709346442, "learning_rate": 3.3386861313868616e-06, "loss": 0.5871, "step": 13668 }, { "epoch": 0.39908323844559285, "grad_norm": 0.7604257314459859, "learning_rate": 3.3385239253852396e-06, "loss": 0.6327, "step": 13669 }, { "epoch": 0.3991124346734402, "grad_norm": 0.7423158616343901, "learning_rate": 3.3383617193836177e-06, "loss": 0.6881, "step": 13670 }, { "epoch": 0.39914163090128757, "grad_norm": 0.7174244361007923, "learning_rate": 3.3381995133819957e-06, "loss": 0.6633, "step": 13671 }, { "epoch": 0.39917082712913493, "grad_norm": 0.7716195908280639, "learning_rate": 3.3380373073803737e-06, "loss": 0.6976, "step": 13672 }, { "epoch": 0.3992000233569823, "grad_norm": 0.6862602185534051, "learning_rate": 3.3378751013787513e-06, "loss": 0.5673, "step": 13673 }, { "epoch": 0.39922921958482965, "grad_norm": 0.7752290493996274, "learning_rate": 3.3377128953771293e-06, "loss": 0.7529, "step": 13674 }, { "epoch": 0.399258415812677, "grad_norm": 0.7266863174772753, "learning_rate": 3.3375506893755073e-06, "loss": 0.6261, "step": 13675 }, { "epoch": 0.3992876120405244, "grad_norm": 0.7708086239076969, "learning_rate": 3.3373884833738853e-06, "loss": 0.6738, "step": 13676 }, { "epoch": 0.39931680826837174, "grad_norm": 0.7710223262456037, "learning_rate": 3.337226277372263e-06, "loss": 0.6992, "step": 13677 }, { "epoch": 0.3993460044962191, "grad_norm": 0.7045728331247578, "learning_rate": 3.337064071370641e-06, "loss": 0.5719, "step": 13678 }, { "epoch": 0.39937520072406646, "grad_norm": 0.7975005065803664, "learning_rate": 3.336901865369019e-06, "loss": 0.6845, "step": 13679 }, { "epoch": 0.3994043969519138, "grad_norm": 0.7818613289531294, "learning_rate": 3.336739659367397e-06, "loss": 0.6838, "step": 13680 }, { "epoch": 0.3994335931797612, "grad_norm": 0.7515560648032322, "learning_rate": 3.3365774533657745e-06, "loss": 0.6805, "step": 13681 }, { "epoch": 0.39946278940760854, "grad_norm": 0.7441194620161339, "learning_rate": 3.3364152473641525e-06, "loss": 0.7312, "step": 13682 }, { "epoch": 0.3994919856354559, "grad_norm": 0.746713278764671, "learning_rate": 3.3362530413625305e-06, "loss": 0.6532, "step": 13683 }, { "epoch": 0.39952118186330327, "grad_norm": 0.7893517592970273, "learning_rate": 3.3360908353609085e-06, "loss": 0.7785, "step": 13684 }, { "epoch": 0.3995503780911506, "grad_norm": 0.7625401091207382, "learning_rate": 3.3359286293592865e-06, "loss": 0.6551, "step": 13685 }, { "epoch": 0.399579574318998, "grad_norm": 0.7410688527190684, "learning_rate": 3.335766423357665e-06, "loss": 0.6636, "step": 13686 }, { "epoch": 0.39960877054684535, "grad_norm": 0.730764886329028, "learning_rate": 3.3356042173560425e-06, "loss": 0.653, "step": 13687 }, { "epoch": 0.3996379667746927, "grad_norm": 0.7553812563042318, "learning_rate": 3.3354420113544205e-06, "loss": 0.6733, "step": 13688 }, { "epoch": 0.39966716300254007, "grad_norm": 0.72081773951882, "learning_rate": 3.3352798053527985e-06, "loss": 0.6334, "step": 13689 }, { "epoch": 0.39969635923038743, "grad_norm": 0.7154316116270667, "learning_rate": 3.3351175993511765e-06, "loss": 0.6318, "step": 13690 }, { "epoch": 0.3997255554582348, "grad_norm": 0.72631086811124, "learning_rate": 3.3349553933495545e-06, "loss": 0.683, "step": 13691 }, { "epoch": 0.39975475168608215, "grad_norm": 0.713438857849251, "learning_rate": 3.334793187347932e-06, "loss": 0.6341, "step": 13692 }, { "epoch": 0.3997839479139295, "grad_norm": 0.7139712929593399, "learning_rate": 3.33463098134631e-06, "loss": 0.6181, "step": 13693 }, { "epoch": 0.3998131441417769, "grad_norm": 0.7159258761312938, "learning_rate": 3.334468775344688e-06, "loss": 0.5984, "step": 13694 }, { "epoch": 0.39984234036962424, "grad_norm": 0.7886207275549868, "learning_rate": 3.334306569343066e-06, "loss": 0.6776, "step": 13695 }, { "epoch": 0.3998715365974716, "grad_norm": 0.7738634938100422, "learning_rate": 3.3341443633414437e-06, "loss": 0.6558, "step": 13696 }, { "epoch": 0.39990073282531896, "grad_norm": 0.6907432585555745, "learning_rate": 3.3339821573398217e-06, "loss": 0.5974, "step": 13697 }, { "epoch": 0.3999299290531663, "grad_norm": 0.6945673410660577, "learning_rate": 3.3338199513381997e-06, "loss": 0.6102, "step": 13698 }, { "epoch": 0.3999591252810137, "grad_norm": 0.6962208396660025, "learning_rate": 3.3336577453365777e-06, "loss": 0.6393, "step": 13699 }, { "epoch": 0.39998832150886104, "grad_norm": 0.7236880679485067, "learning_rate": 3.3334955393349553e-06, "loss": 0.6731, "step": 13700 }, { "epoch": 0.4000175177367084, "grad_norm": 0.7262042386526699, "learning_rate": 3.3333333333333333e-06, "loss": 0.6969, "step": 13701 }, { "epoch": 0.40004671396455577, "grad_norm": 0.7944073330031519, "learning_rate": 3.3331711273317113e-06, "loss": 0.6655, "step": 13702 }, { "epoch": 0.40007591019240313, "grad_norm": 0.7409133894341843, "learning_rate": 3.3330089213300893e-06, "loss": 0.6304, "step": 13703 }, { "epoch": 0.4001051064202505, "grad_norm": 0.7454722242318105, "learning_rate": 3.3328467153284673e-06, "loss": 0.6024, "step": 13704 }, { "epoch": 0.40013430264809785, "grad_norm": 0.6975812587639983, "learning_rate": 3.3326845093268458e-06, "loss": 0.5944, "step": 13705 }, { "epoch": 0.4001634988759452, "grad_norm": 0.7406780154638208, "learning_rate": 3.3325223033252234e-06, "loss": 0.6589, "step": 13706 }, { "epoch": 0.4001926951037926, "grad_norm": 0.7320882264698885, "learning_rate": 3.3323600973236014e-06, "loss": 0.663, "step": 13707 }, { "epoch": 0.40022189133163993, "grad_norm": 0.7303299034401011, "learning_rate": 3.3321978913219794e-06, "loss": 0.6649, "step": 13708 }, { "epoch": 0.4002510875594873, "grad_norm": 0.7242387082712628, "learning_rate": 3.3320356853203574e-06, "loss": 0.6683, "step": 13709 }, { "epoch": 0.40028028378733466, "grad_norm": 0.7771598431563961, "learning_rate": 3.331873479318735e-06, "loss": 0.7549, "step": 13710 }, { "epoch": 0.400309480015182, "grad_norm": 0.7032864716335377, "learning_rate": 3.331711273317113e-06, "loss": 0.608, "step": 13711 }, { "epoch": 0.4003386762430294, "grad_norm": 0.7245432751645176, "learning_rate": 3.331549067315491e-06, "loss": 0.6129, "step": 13712 }, { "epoch": 0.40036787247087674, "grad_norm": 0.7468114241668283, "learning_rate": 3.331386861313869e-06, "loss": 0.6942, "step": 13713 }, { "epoch": 0.4003970686987241, "grad_norm": 0.7209026723848696, "learning_rate": 3.331224655312247e-06, "loss": 0.6375, "step": 13714 }, { "epoch": 0.40042626492657146, "grad_norm": 0.7339252094208358, "learning_rate": 3.3310624493106246e-06, "loss": 0.626, "step": 13715 }, { "epoch": 0.4004554611544188, "grad_norm": 0.7600787269424115, "learning_rate": 3.3309002433090026e-06, "loss": 0.6495, "step": 13716 }, { "epoch": 0.4004846573822662, "grad_norm": 0.735526948190635, "learning_rate": 3.3307380373073806e-06, "loss": 0.5945, "step": 13717 }, { "epoch": 0.40051385361011355, "grad_norm": 0.648611395740044, "learning_rate": 3.3305758313057586e-06, "loss": 0.5106, "step": 13718 }, { "epoch": 0.40054304983796096, "grad_norm": 0.7637922516981415, "learning_rate": 3.330413625304136e-06, "loss": 0.7069, "step": 13719 }, { "epoch": 0.4005722460658083, "grad_norm": 0.6583403307366719, "learning_rate": 3.330251419302514e-06, "loss": 0.5298, "step": 13720 }, { "epoch": 0.4006014422936557, "grad_norm": 0.7760406016528015, "learning_rate": 3.330089213300892e-06, "loss": 0.7899, "step": 13721 }, { "epoch": 0.40063063852150305, "grad_norm": 0.7219995833236496, "learning_rate": 3.32992700729927e-06, "loss": 0.6419, "step": 13722 }, { "epoch": 0.4006598347493504, "grad_norm": 0.7251729933242345, "learning_rate": 3.329764801297648e-06, "loss": 0.643, "step": 13723 }, { "epoch": 0.40068903097719777, "grad_norm": 0.7153754972968452, "learning_rate": 3.3296025952960266e-06, "loss": 0.6661, "step": 13724 }, { "epoch": 0.40071822720504513, "grad_norm": 0.7736335560606838, "learning_rate": 3.3294403892944042e-06, "loss": 0.7158, "step": 13725 }, { "epoch": 0.4007474234328925, "grad_norm": 0.7119863461237556, "learning_rate": 3.3292781832927822e-06, "loss": 0.5836, "step": 13726 }, { "epoch": 0.40077661966073985, "grad_norm": 0.7204716865356495, "learning_rate": 3.3291159772911602e-06, "loss": 0.6498, "step": 13727 }, { "epoch": 0.4008058158885872, "grad_norm": 0.741899305477506, "learning_rate": 3.3289537712895382e-06, "loss": 0.6915, "step": 13728 }, { "epoch": 0.4008350121164346, "grad_norm": 0.8272357477276311, "learning_rate": 3.328791565287916e-06, "loss": 0.7366, "step": 13729 }, { "epoch": 0.40086420834428194, "grad_norm": 0.7113303811838407, "learning_rate": 3.328629359286294e-06, "loss": 0.6665, "step": 13730 }, { "epoch": 0.4008934045721293, "grad_norm": 0.7407201577925737, "learning_rate": 3.328467153284672e-06, "loss": 0.677, "step": 13731 }, { "epoch": 0.40092260079997666, "grad_norm": 0.7205336309825202, "learning_rate": 3.32830494728305e-06, "loss": 0.6403, "step": 13732 }, { "epoch": 0.400951797027824, "grad_norm": 0.6468521977860626, "learning_rate": 3.328142741281428e-06, "loss": 0.5409, "step": 13733 }, { "epoch": 0.4009809932556714, "grad_norm": 0.7640235959234977, "learning_rate": 3.3279805352798054e-06, "loss": 0.6727, "step": 13734 }, { "epoch": 0.40101018948351874, "grad_norm": 0.7274100439067215, "learning_rate": 3.3278183292781834e-06, "loss": 0.6304, "step": 13735 }, { "epoch": 0.4010393857113661, "grad_norm": 0.7428649783143881, "learning_rate": 3.3276561232765614e-06, "loss": 0.6905, "step": 13736 }, { "epoch": 0.40106858193921346, "grad_norm": 0.7132209851611889, "learning_rate": 3.3274939172749395e-06, "loss": 0.6394, "step": 13737 }, { "epoch": 0.4010977781670608, "grad_norm": 0.7706931770187172, "learning_rate": 3.327331711273317e-06, "loss": 0.6633, "step": 13738 }, { "epoch": 0.4011269743949082, "grad_norm": 0.7146062022129634, "learning_rate": 3.327169505271695e-06, "loss": 0.5759, "step": 13739 }, { "epoch": 0.40115617062275555, "grad_norm": 0.7208463459532292, "learning_rate": 3.327007299270073e-06, "loss": 0.5977, "step": 13740 }, { "epoch": 0.4011853668506029, "grad_norm": 0.7041500753285433, "learning_rate": 3.326845093268451e-06, "loss": 0.5714, "step": 13741 }, { "epoch": 0.40121456307845027, "grad_norm": 0.7483349914123103, "learning_rate": 3.3266828872668295e-06, "loss": 0.6805, "step": 13742 }, { "epoch": 0.40124375930629763, "grad_norm": 0.653721680473003, "learning_rate": 3.3265206812652075e-06, "loss": 0.5627, "step": 13743 }, { "epoch": 0.401272955534145, "grad_norm": 0.7044704404220826, "learning_rate": 3.326358475263585e-06, "loss": 0.6068, "step": 13744 }, { "epoch": 0.40130215176199235, "grad_norm": 0.8674177281741056, "learning_rate": 3.326196269261963e-06, "loss": 0.6979, "step": 13745 }, { "epoch": 0.4013313479898397, "grad_norm": 0.7677823585735635, "learning_rate": 3.326034063260341e-06, "loss": 0.7282, "step": 13746 }, { "epoch": 0.4013605442176871, "grad_norm": 0.752611783794472, "learning_rate": 3.325871857258719e-06, "loss": 0.733, "step": 13747 }, { "epoch": 0.40138974044553444, "grad_norm": 0.6990464760033643, "learning_rate": 3.3257096512570967e-06, "loss": 0.6292, "step": 13748 }, { "epoch": 0.4014189366733818, "grad_norm": 0.8115428807960718, "learning_rate": 3.3255474452554747e-06, "loss": 0.8074, "step": 13749 }, { "epoch": 0.40144813290122916, "grad_norm": 0.7843404734478692, "learning_rate": 3.3253852392538527e-06, "loss": 0.7305, "step": 13750 }, { "epoch": 0.4014773291290765, "grad_norm": 0.7483210601336971, "learning_rate": 3.3252230332522307e-06, "loss": 0.697, "step": 13751 }, { "epoch": 0.4015065253569239, "grad_norm": 0.6625585689198019, "learning_rate": 3.3250608272506087e-06, "loss": 0.5997, "step": 13752 }, { "epoch": 0.40153572158477124, "grad_norm": 0.7877981552696253, "learning_rate": 3.3248986212489863e-06, "loss": 0.7019, "step": 13753 }, { "epoch": 0.4015649178126186, "grad_norm": 0.7016133234311149, "learning_rate": 3.3247364152473643e-06, "loss": 0.6191, "step": 13754 }, { "epoch": 0.40159411404046597, "grad_norm": 0.7253883709963554, "learning_rate": 3.3245742092457423e-06, "loss": 0.6962, "step": 13755 }, { "epoch": 0.4016233102683133, "grad_norm": 0.6669844268304539, "learning_rate": 3.3244120032441203e-06, "loss": 0.571, "step": 13756 }, { "epoch": 0.4016525064961607, "grad_norm": 0.6417761853459969, "learning_rate": 3.324249797242498e-06, "loss": 0.5136, "step": 13757 }, { "epoch": 0.40168170272400805, "grad_norm": 0.725436577122709, "learning_rate": 3.324087591240876e-06, "loss": 0.6084, "step": 13758 }, { "epoch": 0.4017108989518554, "grad_norm": 0.7273554076610556, "learning_rate": 3.323925385239254e-06, "loss": 0.6493, "step": 13759 }, { "epoch": 0.40174009517970277, "grad_norm": 0.6935902980804861, "learning_rate": 3.323763179237632e-06, "loss": 0.6258, "step": 13760 }, { "epoch": 0.40176929140755013, "grad_norm": 0.6771092512381913, "learning_rate": 3.3236009732360103e-06, "loss": 0.5704, "step": 13761 }, { "epoch": 0.4017984876353975, "grad_norm": 0.7354456646589285, "learning_rate": 3.3234387672343884e-06, "loss": 0.6276, "step": 13762 }, { "epoch": 0.40182768386324486, "grad_norm": 0.6829052106300211, "learning_rate": 3.323276561232766e-06, "loss": 0.5698, "step": 13763 }, { "epoch": 0.4018568800910922, "grad_norm": 0.8824188906964823, "learning_rate": 3.323114355231144e-06, "loss": 0.7213, "step": 13764 }, { "epoch": 0.4018860763189396, "grad_norm": 0.7330134649917978, "learning_rate": 3.322952149229522e-06, "loss": 0.6585, "step": 13765 }, { "epoch": 0.40191527254678694, "grad_norm": 0.7327905084761605, "learning_rate": 3.3227899432279e-06, "loss": 0.6619, "step": 13766 }, { "epoch": 0.4019444687746343, "grad_norm": 0.8073985359159539, "learning_rate": 3.3226277372262775e-06, "loss": 0.7574, "step": 13767 }, { "epoch": 0.40197366500248166, "grad_norm": 0.7224715905695472, "learning_rate": 3.3224655312246555e-06, "loss": 0.6374, "step": 13768 }, { "epoch": 0.402002861230329, "grad_norm": 0.7338569095462301, "learning_rate": 3.3223033252230336e-06, "loss": 0.5971, "step": 13769 }, { "epoch": 0.4020320574581764, "grad_norm": 0.6496584641725469, "learning_rate": 3.3221411192214116e-06, "loss": 0.5578, "step": 13770 }, { "epoch": 0.40206125368602375, "grad_norm": 0.7305737705988502, "learning_rate": 3.3219789132197896e-06, "loss": 0.6783, "step": 13771 }, { "epoch": 0.4020904499138711, "grad_norm": 0.749938076367508, "learning_rate": 3.321816707218167e-06, "loss": 0.6438, "step": 13772 }, { "epoch": 0.40211964614171847, "grad_norm": 0.7542624190834608, "learning_rate": 3.321654501216545e-06, "loss": 0.6725, "step": 13773 }, { "epoch": 0.40214884236956583, "grad_norm": 0.7222977572173346, "learning_rate": 3.321492295214923e-06, "loss": 0.6393, "step": 13774 }, { "epoch": 0.4021780385974132, "grad_norm": 0.7165558194352039, "learning_rate": 3.321330089213301e-06, "loss": 0.6971, "step": 13775 }, { "epoch": 0.40220723482526055, "grad_norm": 0.7730419902924499, "learning_rate": 3.3211678832116788e-06, "loss": 0.755, "step": 13776 }, { "epoch": 0.4022364310531079, "grad_norm": 0.7231534560272191, "learning_rate": 3.3210056772100568e-06, "loss": 0.6323, "step": 13777 }, { "epoch": 0.4022656272809553, "grad_norm": 0.7534904890257901, "learning_rate": 3.3208434712084348e-06, "loss": 0.6834, "step": 13778 }, { "epoch": 0.4022948235088027, "grad_norm": 0.781151659940108, "learning_rate": 3.3206812652068128e-06, "loss": 0.704, "step": 13779 }, { "epoch": 0.40232401973665005, "grad_norm": 0.7190809960338239, "learning_rate": 3.320519059205191e-06, "loss": 0.586, "step": 13780 }, { "epoch": 0.4023532159644974, "grad_norm": 0.7538031370879446, "learning_rate": 3.320356853203569e-06, "loss": 0.6895, "step": 13781 }, { "epoch": 0.4023824121923448, "grad_norm": 0.7387180857832728, "learning_rate": 3.320194647201947e-06, "loss": 0.6809, "step": 13782 }, { "epoch": 0.40241160842019214, "grad_norm": 0.7042860941215375, "learning_rate": 3.320032441200325e-06, "loss": 0.6237, "step": 13783 }, { "epoch": 0.4024408046480395, "grad_norm": 0.7681646051775932, "learning_rate": 3.319870235198703e-06, "loss": 0.6835, "step": 13784 }, { "epoch": 0.40247000087588686, "grad_norm": 0.6980884302886304, "learning_rate": 3.319708029197081e-06, "loss": 0.6298, "step": 13785 }, { "epoch": 0.4024991971037342, "grad_norm": 0.7052134442637679, "learning_rate": 3.3195458231954584e-06, "loss": 0.5934, "step": 13786 }, { "epoch": 0.4025283933315816, "grad_norm": 0.6968566799935046, "learning_rate": 3.3193836171938364e-06, "loss": 0.5768, "step": 13787 }, { "epoch": 0.40255758955942894, "grad_norm": 0.7018529234055757, "learning_rate": 3.3192214111922144e-06, "loss": 0.6477, "step": 13788 }, { "epoch": 0.4025867857872763, "grad_norm": 0.7039660166102539, "learning_rate": 3.3190592051905924e-06, "loss": 0.6268, "step": 13789 }, { "epoch": 0.40261598201512366, "grad_norm": 0.7837153142901262, "learning_rate": 3.3188969991889704e-06, "loss": 0.7107, "step": 13790 }, { "epoch": 0.402645178242971, "grad_norm": 0.7200192505135885, "learning_rate": 3.318734793187348e-06, "loss": 0.634, "step": 13791 }, { "epoch": 0.4026743744708184, "grad_norm": 0.6684107280327867, "learning_rate": 3.318572587185726e-06, "loss": 0.5397, "step": 13792 }, { "epoch": 0.40270357069866575, "grad_norm": 0.6824062253013974, "learning_rate": 3.318410381184104e-06, "loss": 0.6101, "step": 13793 }, { "epoch": 0.4027327669265131, "grad_norm": 0.7307882673969894, "learning_rate": 3.318248175182482e-06, "loss": 0.6075, "step": 13794 }, { "epoch": 0.40276196315436047, "grad_norm": 0.8490430392271844, "learning_rate": 3.3180859691808596e-06, "loss": 0.6301, "step": 13795 }, { "epoch": 0.40279115938220783, "grad_norm": 0.7193475261139258, "learning_rate": 3.3179237631792376e-06, "loss": 0.5985, "step": 13796 }, { "epoch": 0.4028203556100552, "grad_norm": 0.7956431655259725, "learning_rate": 3.3177615571776156e-06, "loss": 0.6709, "step": 13797 }, { "epoch": 0.40284955183790255, "grad_norm": 0.745922856368844, "learning_rate": 3.3175993511759936e-06, "loss": 0.7501, "step": 13798 }, { "epoch": 0.4028787480657499, "grad_norm": 0.7888601236094831, "learning_rate": 3.317437145174372e-06, "loss": 0.7188, "step": 13799 }, { "epoch": 0.4029079442935973, "grad_norm": 0.6832463122592725, "learning_rate": 3.31727493917275e-06, "loss": 0.6145, "step": 13800 }, { "epoch": 0.40293714052144464, "grad_norm": 0.6967141640777587, "learning_rate": 3.3171127331711277e-06, "loss": 0.5846, "step": 13801 }, { "epoch": 0.402966336749292, "grad_norm": 0.7254661765109425, "learning_rate": 3.3169505271695057e-06, "loss": 0.674, "step": 13802 }, { "epoch": 0.40299553297713936, "grad_norm": 0.7099096343502365, "learning_rate": 3.3167883211678837e-06, "loss": 0.6987, "step": 13803 }, { "epoch": 0.4030247292049867, "grad_norm": 0.7094816370202074, "learning_rate": 3.3166261151662617e-06, "loss": 0.6452, "step": 13804 }, { "epoch": 0.4030539254328341, "grad_norm": 0.7743698750720934, "learning_rate": 3.3164639091646393e-06, "loss": 0.7191, "step": 13805 }, { "epoch": 0.40308312166068144, "grad_norm": 0.7241475869187084, "learning_rate": 3.3163017031630173e-06, "loss": 0.6436, "step": 13806 }, { "epoch": 0.4031123178885288, "grad_norm": 0.9916004821283091, "learning_rate": 3.3161394971613953e-06, "loss": 0.7029, "step": 13807 }, { "epoch": 0.40314151411637617, "grad_norm": 0.7484878433813327, "learning_rate": 3.3159772911597733e-06, "loss": 0.6919, "step": 13808 }, { "epoch": 0.4031707103442235, "grad_norm": 0.7105718790301261, "learning_rate": 3.3158150851581513e-06, "loss": 0.6376, "step": 13809 }, { "epoch": 0.4031999065720709, "grad_norm": 0.7844357876369729, "learning_rate": 3.315652879156529e-06, "loss": 0.7155, "step": 13810 }, { "epoch": 0.40322910279991825, "grad_norm": 0.7035354838934808, "learning_rate": 3.315490673154907e-06, "loss": 0.6944, "step": 13811 }, { "epoch": 0.4032582990277656, "grad_norm": 0.6784918434367935, "learning_rate": 3.315328467153285e-06, "loss": 0.6116, "step": 13812 }, { "epoch": 0.40328749525561297, "grad_norm": 0.716778312496687, "learning_rate": 3.315166261151663e-06, "loss": 0.6435, "step": 13813 }, { "epoch": 0.40331669148346033, "grad_norm": 0.7636965136077756, "learning_rate": 3.3150040551500405e-06, "loss": 0.7101, "step": 13814 }, { "epoch": 0.4033458877113077, "grad_norm": 0.7358951477758189, "learning_rate": 3.3148418491484185e-06, "loss": 0.644, "step": 13815 }, { "epoch": 0.40337508393915505, "grad_norm": 0.697822836965131, "learning_rate": 3.3146796431467965e-06, "loss": 0.5993, "step": 13816 }, { "epoch": 0.4034042801670024, "grad_norm": 0.7242349859169326, "learning_rate": 3.3145174371451745e-06, "loss": 0.6093, "step": 13817 }, { "epoch": 0.4034334763948498, "grad_norm": 0.7933558879030358, "learning_rate": 3.314355231143553e-06, "loss": 0.7192, "step": 13818 }, { "epoch": 0.40346267262269714, "grad_norm": 0.7791804714758161, "learning_rate": 3.314193025141931e-06, "loss": 0.7305, "step": 13819 }, { "epoch": 0.4034918688505445, "grad_norm": 0.7116289140038871, "learning_rate": 3.3140308191403085e-06, "loss": 0.6397, "step": 13820 }, { "epoch": 0.40352106507839186, "grad_norm": 0.7766435665486606, "learning_rate": 3.3138686131386865e-06, "loss": 0.7114, "step": 13821 }, { "epoch": 0.4035502613062392, "grad_norm": 0.7769040118092317, "learning_rate": 3.3137064071370645e-06, "loss": 0.6588, "step": 13822 }, { "epoch": 0.4035794575340866, "grad_norm": 0.7189243672550971, "learning_rate": 3.3135442011354425e-06, "loss": 0.6065, "step": 13823 }, { "epoch": 0.40360865376193394, "grad_norm": 0.6930676278096424, "learning_rate": 3.31338199513382e-06, "loss": 0.5781, "step": 13824 }, { "epoch": 0.4036378499897813, "grad_norm": 0.7303095158706803, "learning_rate": 3.313219789132198e-06, "loss": 0.6703, "step": 13825 }, { "epoch": 0.40366704621762867, "grad_norm": 0.6763437223329419, "learning_rate": 3.313057583130576e-06, "loss": 0.6171, "step": 13826 }, { "epoch": 0.40369624244547603, "grad_norm": 0.7894939507243579, "learning_rate": 3.312895377128954e-06, "loss": 0.6808, "step": 13827 }, { "epoch": 0.4037254386733234, "grad_norm": 0.7330138131976061, "learning_rate": 3.312733171127332e-06, "loss": 0.6596, "step": 13828 }, { "epoch": 0.40375463490117075, "grad_norm": 0.7733011295053241, "learning_rate": 3.3125709651257097e-06, "loss": 0.7508, "step": 13829 }, { "epoch": 0.4037838311290181, "grad_norm": 0.7520185442511431, "learning_rate": 3.3124087591240877e-06, "loss": 0.6963, "step": 13830 }, { "epoch": 0.4038130273568655, "grad_norm": 0.6903139405286219, "learning_rate": 3.3122465531224657e-06, "loss": 0.6268, "step": 13831 }, { "epoch": 0.40384222358471283, "grad_norm": 0.7385965325757381, "learning_rate": 3.3120843471208437e-06, "loss": 0.6205, "step": 13832 }, { "epoch": 0.4038714198125602, "grad_norm": 0.7485220697174978, "learning_rate": 3.3119221411192213e-06, "loss": 0.6716, "step": 13833 }, { "epoch": 0.40390061604040756, "grad_norm": 0.6828951133215793, "learning_rate": 3.3117599351175993e-06, "loss": 0.5669, "step": 13834 }, { "epoch": 0.4039298122682549, "grad_norm": 0.7590817423419469, "learning_rate": 3.3115977291159773e-06, "loss": 0.7184, "step": 13835 }, { "epoch": 0.4039590084961023, "grad_norm": 0.7098580100052747, "learning_rate": 3.3114355231143553e-06, "loss": 0.6198, "step": 13836 }, { "epoch": 0.40398820472394964, "grad_norm": 0.8275041530132318, "learning_rate": 3.3112733171127338e-06, "loss": 0.6773, "step": 13837 }, { "epoch": 0.404017400951797, "grad_norm": 0.722151733800785, "learning_rate": 3.3111111111111118e-06, "loss": 0.6819, "step": 13838 }, { "epoch": 0.40404659717964436, "grad_norm": 0.981410331001848, "learning_rate": 3.3109489051094894e-06, "loss": 0.6218, "step": 13839 }, { "epoch": 0.4040757934074918, "grad_norm": 0.7983251057705071, "learning_rate": 3.3107866991078674e-06, "loss": 0.7133, "step": 13840 }, { "epoch": 0.40410498963533914, "grad_norm": 0.6995727110291919, "learning_rate": 3.3106244931062454e-06, "loss": 0.5956, "step": 13841 }, { "epoch": 0.4041341858631865, "grad_norm": 0.6886185874888806, "learning_rate": 3.3104622871046234e-06, "loss": 0.6072, "step": 13842 }, { "epoch": 0.40416338209103386, "grad_norm": 0.7780498944802661, "learning_rate": 3.310300081103001e-06, "loss": 0.6535, "step": 13843 }, { "epoch": 0.4041925783188812, "grad_norm": 0.7523398365546518, "learning_rate": 3.310137875101379e-06, "loss": 0.6657, "step": 13844 }, { "epoch": 0.4042217745467286, "grad_norm": 0.7282674563687352, "learning_rate": 3.309975669099757e-06, "loss": 0.7006, "step": 13845 }, { "epoch": 0.40425097077457595, "grad_norm": 0.830054493035928, "learning_rate": 3.309813463098135e-06, "loss": 0.6755, "step": 13846 }, { "epoch": 0.4042801670024233, "grad_norm": 0.7357637660801656, "learning_rate": 3.309651257096513e-06, "loss": 0.6721, "step": 13847 }, { "epoch": 0.40430936323027067, "grad_norm": 0.7031563665335233, "learning_rate": 3.3094890510948906e-06, "loss": 0.6467, "step": 13848 }, { "epoch": 0.40433855945811803, "grad_norm": 0.7371172866809249, "learning_rate": 3.3093268450932686e-06, "loss": 0.5958, "step": 13849 }, { "epoch": 0.4043677556859654, "grad_norm": 0.7488506141622082, "learning_rate": 3.3091646390916466e-06, "loss": 0.6936, "step": 13850 }, { "epoch": 0.40439695191381275, "grad_norm": 0.6972042349812412, "learning_rate": 3.3090024330900246e-06, "loss": 0.5762, "step": 13851 }, { "epoch": 0.4044261481416601, "grad_norm": 1.2886048401607642, "learning_rate": 3.308840227088402e-06, "loss": 0.7187, "step": 13852 }, { "epoch": 0.4044553443695075, "grad_norm": 0.7477316784774107, "learning_rate": 3.30867802108678e-06, "loss": 0.6103, "step": 13853 }, { "epoch": 0.40448454059735484, "grad_norm": 0.7433807161708567, "learning_rate": 3.308515815085158e-06, "loss": 0.5437, "step": 13854 }, { "epoch": 0.4045137368252022, "grad_norm": 0.7453356136881396, "learning_rate": 3.308353609083536e-06, "loss": 0.6677, "step": 13855 }, { "epoch": 0.40454293305304956, "grad_norm": 0.6855244734911476, "learning_rate": 3.3081914030819146e-06, "loss": 0.5899, "step": 13856 }, { "epoch": 0.4045721292808969, "grad_norm": 0.779376107434519, "learning_rate": 3.3080291970802926e-06, "loss": 0.7141, "step": 13857 }, { "epoch": 0.4046013255087443, "grad_norm": 0.7452698988011225, "learning_rate": 3.3078669910786702e-06, "loss": 0.6815, "step": 13858 }, { "epoch": 0.40463052173659164, "grad_norm": 0.7213980881652431, "learning_rate": 3.3077047850770482e-06, "loss": 0.6242, "step": 13859 }, { "epoch": 0.404659717964439, "grad_norm": 0.770718097262382, "learning_rate": 3.3075425790754262e-06, "loss": 0.7292, "step": 13860 }, { "epoch": 0.40468891419228636, "grad_norm": 0.7577454788749669, "learning_rate": 3.3073803730738042e-06, "loss": 0.6555, "step": 13861 }, { "epoch": 0.4047181104201337, "grad_norm": 0.733050173267739, "learning_rate": 3.307218167072182e-06, "loss": 0.6285, "step": 13862 }, { "epoch": 0.4047473066479811, "grad_norm": 0.6969693846816668, "learning_rate": 3.30705596107056e-06, "loss": 0.6172, "step": 13863 }, { "epoch": 0.40477650287582845, "grad_norm": 0.7367471148823607, "learning_rate": 3.306893755068938e-06, "loss": 0.6302, "step": 13864 }, { "epoch": 0.4048056991036758, "grad_norm": 0.7309024654473475, "learning_rate": 3.306731549067316e-06, "loss": 0.7025, "step": 13865 }, { "epoch": 0.40483489533152317, "grad_norm": 0.6678639546399971, "learning_rate": 3.306569343065694e-06, "loss": 0.584, "step": 13866 }, { "epoch": 0.40486409155937053, "grad_norm": 0.8034235801070428, "learning_rate": 3.3064071370640714e-06, "loss": 0.6703, "step": 13867 }, { "epoch": 0.4048932877872179, "grad_norm": 0.776910684885265, "learning_rate": 3.3062449310624494e-06, "loss": 0.728, "step": 13868 }, { "epoch": 0.40492248401506525, "grad_norm": 0.6922266003300143, "learning_rate": 3.3060827250608275e-06, "loss": 0.6129, "step": 13869 }, { "epoch": 0.4049516802429126, "grad_norm": 0.7654114783292755, "learning_rate": 3.3059205190592055e-06, "loss": 0.6524, "step": 13870 }, { "epoch": 0.40498087647076, "grad_norm": 0.7689124246524676, "learning_rate": 3.305758313057583e-06, "loss": 0.6723, "step": 13871 }, { "epoch": 0.40501007269860734, "grad_norm": 0.6950858653723713, "learning_rate": 3.305596107055961e-06, "loss": 0.6285, "step": 13872 }, { "epoch": 0.4050392689264547, "grad_norm": 0.6869951293930612, "learning_rate": 3.305433901054339e-06, "loss": 0.6278, "step": 13873 }, { "epoch": 0.40506846515430206, "grad_norm": 0.7264104866239259, "learning_rate": 3.305271695052717e-06, "loss": 0.681, "step": 13874 }, { "epoch": 0.4050976613821494, "grad_norm": 0.7946696278742112, "learning_rate": 3.3051094890510955e-06, "loss": 0.6903, "step": 13875 }, { "epoch": 0.4051268576099968, "grad_norm": 0.7256113841806224, "learning_rate": 3.3049472830494735e-06, "loss": 0.6626, "step": 13876 }, { "epoch": 0.40515605383784414, "grad_norm": 0.8131075308232791, "learning_rate": 3.304785077047851e-06, "loss": 0.6339, "step": 13877 }, { "epoch": 0.4051852500656915, "grad_norm": 0.7810869694713763, "learning_rate": 3.304622871046229e-06, "loss": 0.7019, "step": 13878 }, { "epoch": 0.40521444629353887, "grad_norm": 0.7488823086215395, "learning_rate": 3.304460665044607e-06, "loss": 0.6403, "step": 13879 }, { "epoch": 0.4052436425213862, "grad_norm": 0.7749185483471016, "learning_rate": 3.304298459042985e-06, "loss": 0.6977, "step": 13880 }, { "epoch": 0.4052728387492336, "grad_norm": 0.720480117474727, "learning_rate": 3.3041362530413627e-06, "loss": 0.5622, "step": 13881 }, { "epoch": 0.40530203497708095, "grad_norm": 0.7645095034266306, "learning_rate": 3.3039740470397407e-06, "loss": 0.6925, "step": 13882 }, { "epoch": 0.4053312312049283, "grad_norm": 0.797357023519456, "learning_rate": 3.3038118410381187e-06, "loss": 0.7098, "step": 13883 }, { "epoch": 0.40536042743277567, "grad_norm": 0.7029859764556957, "learning_rate": 3.3036496350364967e-06, "loss": 0.6131, "step": 13884 }, { "epoch": 0.40538962366062303, "grad_norm": 0.7350961413263501, "learning_rate": 3.3034874290348747e-06, "loss": 0.7202, "step": 13885 }, { "epoch": 0.4054188198884704, "grad_norm": 0.7278915829691931, "learning_rate": 3.3033252230332523e-06, "loss": 0.6319, "step": 13886 }, { "epoch": 0.40544801611631776, "grad_norm": 0.7412500782259379, "learning_rate": 3.3031630170316303e-06, "loss": 0.6818, "step": 13887 }, { "epoch": 0.4054772123441651, "grad_norm": 0.7320700425421776, "learning_rate": 3.3030008110300083e-06, "loss": 0.6571, "step": 13888 }, { "epoch": 0.4055064085720125, "grad_norm": 0.6635825793639505, "learning_rate": 3.3028386050283863e-06, "loss": 0.5463, "step": 13889 }, { "epoch": 0.40553560479985984, "grad_norm": 0.7369148691759168, "learning_rate": 3.302676399026764e-06, "loss": 0.6409, "step": 13890 }, { "epoch": 0.4055648010277072, "grad_norm": 0.7516671295183255, "learning_rate": 3.302514193025142e-06, "loss": 0.6972, "step": 13891 }, { "epoch": 0.40559399725555456, "grad_norm": 0.738375983268048, "learning_rate": 3.30235198702352e-06, "loss": 0.6518, "step": 13892 }, { "epoch": 0.4056231934834019, "grad_norm": 0.7606965535594792, "learning_rate": 3.3021897810218983e-06, "loss": 0.6772, "step": 13893 }, { "epoch": 0.4056523897112493, "grad_norm": 0.8269969241242487, "learning_rate": 3.3020275750202764e-06, "loss": 0.8557, "step": 13894 }, { "epoch": 0.40568158593909664, "grad_norm": 0.6496253295523481, "learning_rate": 3.3018653690186544e-06, "loss": 0.5271, "step": 13895 }, { "epoch": 0.405710782166944, "grad_norm": 0.7278219202855497, "learning_rate": 3.301703163017032e-06, "loss": 0.6473, "step": 13896 }, { "epoch": 0.40573997839479137, "grad_norm": 0.7054104417706595, "learning_rate": 3.30154095701541e-06, "loss": 0.6033, "step": 13897 }, { "epoch": 0.40576917462263873, "grad_norm": 0.750411530970016, "learning_rate": 3.301378751013788e-06, "loss": 0.6818, "step": 13898 }, { "epoch": 0.4057983708504861, "grad_norm": 0.7295943709102866, "learning_rate": 3.301216545012166e-06, "loss": 0.6605, "step": 13899 }, { "epoch": 0.4058275670783335, "grad_norm": 0.7249851356007235, "learning_rate": 3.3010543390105435e-06, "loss": 0.6463, "step": 13900 }, { "epoch": 0.40585676330618087, "grad_norm": 0.6864584269678023, "learning_rate": 3.3008921330089216e-06, "loss": 0.5758, "step": 13901 }, { "epoch": 0.40588595953402823, "grad_norm": 0.7221829689389605, "learning_rate": 3.3007299270072996e-06, "loss": 0.6707, "step": 13902 }, { "epoch": 0.4059151557618756, "grad_norm": 0.7357334997899136, "learning_rate": 3.3005677210056776e-06, "loss": 0.6916, "step": 13903 }, { "epoch": 0.40594435198972295, "grad_norm": 0.7217759573211489, "learning_rate": 3.3004055150040556e-06, "loss": 0.6639, "step": 13904 }, { "epoch": 0.4059735482175703, "grad_norm": 0.7616923420773543, "learning_rate": 3.300243309002433e-06, "loss": 0.6542, "step": 13905 }, { "epoch": 0.4060027444454177, "grad_norm": 0.7779021198488665, "learning_rate": 3.300081103000811e-06, "loss": 0.5872, "step": 13906 }, { "epoch": 0.40603194067326503, "grad_norm": 0.7921922365577817, "learning_rate": 3.299918896999189e-06, "loss": 0.7323, "step": 13907 }, { "epoch": 0.4060611369011124, "grad_norm": 0.7377152821927531, "learning_rate": 3.299756690997567e-06, "loss": 0.6353, "step": 13908 }, { "epoch": 0.40609033312895976, "grad_norm": 0.7494161077024686, "learning_rate": 3.2995944849959448e-06, "loss": 0.6772, "step": 13909 }, { "epoch": 0.4061195293568071, "grad_norm": 0.7027910174965816, "learning_rate": 3.2994322789943228e-06, "loss": 0.6047, "step": 13910 }, { "epoch": 0.4061487255846545, "grad_norm": 0.7705889210031044, "learning_rate": 3.2992700729927008e-06, "loss": 0.6474, "step": 13911 }, { "epoch": 0.40617792181250184, "grad_norm": 0.7136374428326583, "learning_rate": 3.299107866991079e-06, "loss": 0.6367, "step": 13912 }, { "epoch": 0.4062071180403492, "grad_norm": 0.7019393557764377, "learning_rate": 3.2989456609894572e-06, "loss": 0.5758, "step": 13913 }, { "epoch": 0.40623631426819656, "grad_norm": 0.7439308182726299, "learning_rate": 3.2987834549878352e-06, "loss": 0.7142, "step": 13914 }, { "epoch": 0.4062655104960439, "grad_norm": 0.745886571933815, "learning_rate": 3.298621248986213e-06, "loss": 0.7186, "step": 13915 }, { "epoch": 0.4062947067238913, "grad_norm": 0.7772567762394172, "learning_rate": 3.298459042984591e-06, "loss": 0.6933, "step": 13916 }, { "epoch": 0.40632390295173865, "grad_norm": 0.7933162383521916, "learning_rate": 3.298296836982969e-06, "loss": 0.6907, "step": 13917 }, { "epoch": 0.406353099179586, "grad_norm": 0.7867254645637086, "learning_rate": 3.298134630981347e-06, "loss": 0.7166, "step": 13918 }, { "epoch": 0.40638229540743337, "grad_norm": 0.7057165091334264, "learning_rate": 3.2979724249797244e-06, "loss": 0.6323, "step": 13919 }, { "epoch": 0.40641149163528073, "grad_norm": 0.829704068785994, "learning_rate": 3.2978102189781024e-06, "loss": 0.6868, "step": 13920 }, { "epoch": 0.4064406878631281, "grad_norm": 0.7617705504798153, "learning_rate": 3.2976480129764804e-06, "loss": 0.7093, "step": 13921 }, { "epoch": 0.40646988409097545, "grad_norm": 0.7788675177824195, "learning_rate": 3.2974858069748584e-06, "loss": 0.6993, "step": 13922 }, { "epoch": 0.4064990803188228, "grad_norm": 0.680691697679666, "learning_rate": 3.2973236009732364e-06, "loss": 0.5249, "step": 13923 }, { "epoch": 0.4065282765466702, "grad_norm": 0.7261244747265856, "learning_rate": 3.297161394971614e-06, "loss": 0.6758, "step": 13924 }, { "epoch": 0.40655747277451754, "grad_norm": 0.7722302200918122, "learning_rate": 3.296999188969992e-06, "loss": 0.769, "step": 13925 }, { "epoch": 0.4065866690023649, "grad_norm": 0.7514836476017035, "learning_rate": 3.29683698296837e-06, "loss": 0.7299, "step": 13926 }, { "epoch": 0.40661586523021226, "grad_norm": 0.6816583427500672, "learning_rate": 3.296674776966748e-06, "loss": 0.5805, "step": 13927 }, { "epoch": 0.4066450614580596, "grad_norm": 0.6619892278170278, "learning_rate": 3.2965125709651256e-06, "loss": 0.55, "step": 13928 }, { "epoch": 0.406674257685907, "grad_norm": 0.9055415122256953, "learning_rate": 3.2963503649635036e-06, "loss": 0.7277, "step": 13929 }, { "epoch": 0.40670345391375434, "grad_norm": 0.7099886432773308, "learning_rate": 3.2961881589618816e-06, "loss": 0.6223, "step": 13930 }, { "epoch": 0.4067326501416017, "grad_norm": 0.7581692794253422, "learning_rate": 3.29602595296026e-06, "loss": 0.6726, "step": 13931 }, { "epoch": 0.40676184636944906, "grad_norm": 0.7048291610643258, "learning_rate": 3.295863746958638e-06, "loss": 0.6203, "step": 13932 }, { "epoch": 0.4067910425972964, "grad_norm": 0.7303614155080335, "learning_rate": 3.295701540957016e-06, "loss": 0.6641, "step": 13933 }, { "epoch": 0.4068202388251438, "grad_norm": 0.7804148072207755, "learning_rate": 3.2955393349553937e-06, "loss": 0.7686, "step": 13934 }, { "epoch": 0.40684943505299115, "grad_norm": 0.6932098831369495, "learning_rate": 3.2953771289537717e-06, "loss": 0.5965, "step": 13935 }, { "epoch": 0.4068786312808385, "grad_norm": 0.7489431133546424, "learning_rate": 3.2952149229521497e-06, "loss": 0.6154, "step": 13936 }, { "epoch": 0.40690782750868587, "grad_norm": 0.7260372080472461, "learning_rate": 3.2950527169505277e-06, "loss": 0.6636, "step": 13937 }, { "epoch": 0.40693702373653323, "grad_norm": 0.7267741910489719, "learning_rate": 3.2948905109489053e-06, "loss": 0.6536, "step": 13938 }, { "epoch": 0.4069662199643806, "grad_norm": 0.738671319855456, "learning_rate": 3.2947283049472833e-06, "loss": 0.6646, "step": 13939 }, { "epoch": 0.40699541619222795, "grad_norm": 0.8360950113153603, "learning_rate": 3.2945660989456613e-06, "loss": 0.6898, "step": 13940 }, { "epoch": 0.4070246124200753, "grad_norm": 0.7288313911249399, "learning_rate": 3.2944038929440393e-06, "loss": 0.6935, "step": 13941 }, { "epoch": 0.4070538086479227, "grad_norm": 0.7601006302681903, "learning_rate": 3.2942416869424173e-06, "loss": 0.6892, "step": 13942 }, { "epoch": 0.40708300487577004, "grad_norm": 0.8079474244529763, "learning_rate": 3.294079480940795e-06, "loss": 0.6958, "step": 13943 }, { "epoch": 0.4071122011036174, "grad_norm": 0.7695096826491007, "learning_rate": 3.293917274939173e-06, "loss": 0.7324, "step": 13944 }, { "epoch": 0.40714139733146476, "grad_norm": 0.7478600069981038, "learning_rate": 3.293755068937551e-06, "loss": 0.6485, "step": 13945 }, { "epoch": 0.4071705935593121, "grad_norm": 0.7004202526006744, "learning_rate": 3.293592862935929e-06, "loss": 0.6188, "step": 13946 }, { "epoch": 0.4071997897871595, "grad_norm": 0.7454279667755648, "learning_rate": 3.2934306569343065e-06, "loss": 0.657, "step": 13947 }, { "epoch": 0.40722898601500684, "grad_norm": 0.6722742166677367, "learning_rate": 3.2932684509326845e-06, "loss": 0.581, "step": 13948 }, { "epoch": 0.4072581822428542, "grad_norm": 0.6966117912066823, "learning_rate": 3.2931062449310625e-06, "loss": 0.6506, "step": 13949 }, { "epoch": 0.40728737847070157, "grad_norm": 0.7296665162794141, "learning_rate": 3.292944038929441e-06, "loss": 0.6238, "step": 13950 }, { "epoch": 0.4073165746985489, "grad_norm": 0.7575479656385045, "learning_rate": 3.292781832927819e-06, "loss": 0.6759, "step": 13951 }, { "epoch": 0.4073457709263963, "grad_norm": 0.747466789745888, "learning_rate": 3.292619626926197e-06, "loss": 0.6989, "step": 13952 }, { "epoch": 0.40737496715424365, "grad_norm": 0.795055029849286, "learning_rate": 3.2924574209245745e-06, "loss": 0.6564, "step": 13953 }, { "epoch": 0.407404163382091, "grad_norm": 0.702622934875827, "learning_rate": 3.2922952149229525e-06, "loss": 0.6113, "step": 13954 }, { "epoch": 0.4074333596099384, "grad_norm": 0.7097112918319487, "learning_rate": 3.2921330089213305e-06, "loss": 0.5896, "step": 13955 }, { "epoch": 0.40746255583778573, "grad_norm": 0.6660477152167593, "learning_rate": 3.2919708029197085e-06, "loss": 0.5446, "step": 13956 }, { "epoch": 0.4074917520656331, "grad_norm": 0.7557417817659361, "learning_rate": 3.291808596918086e-06, "loss": 0.6952, "step": 13957 }, { "epoch": 0.40752094829348046, "grad_norm": 0.7036037441675689, "learning_rate": 3.291646390916464e-06, "loss": 0.653, "step": 13958 }, { "epoch": 0.4075501445213278, "grad_norm": 0.7507644067016115, "learning_rate": 3.291484184914842e-06, "loss": 0.6853, "step": 13959 }, { "epoch": 0.40757934074917523, "grad_norm": 0.7423691409060316, "learning_rate": 3.29132197891322e-06, "loss": 0.6413, "step": 13960 }, { "epoch": 0.4076085369770226, "grad_norm": 0.7916065543785157, "learning_rate": 3.2911597729115977e-06, "loss": 0.6088, "step": 13961 }, { "epoch": 0.40763773320486996, "grad_norm": 0.6573565938432228, "learning_rate": 3.2909975669099757e-06, "loss": 0.5626, "step": 13962 }, { "epoch": 0.4076669294327173, "grad_norm": 0.7030115204767277, "learning_rate": 3.2908353609083537e-06, "loss": 0.6182, "step": 13963 }, { "epoch": 0.4076961256605647, "grad_norm": 0.7701207856831074, "learning_rate": 3.2906731549067317e-06, "loss": 0.6811, "step": 13964 }, { "epoch": 0.40772532188841204, "grad_norm": 0.7389040784585951, "learning_rate": 3.2905109489051098e-06, "loss": 0.6491, "step": 13965 }, { "epoch": 0.4077545181162594, "grad_norm": 0.7112088508129941, "learning_rate": 3.2903487429034873e-06, "loss": 0.6481, "step": 13966 }, { "epoch": 0.40778371434410676, "grad_norm": 0.7782686914968026, "learning_rate": 3.2901865369018653e-06, "loss": 0.622, "step": 13967 }, { "epoch": 0.4078129105719541, "grad_norm": 0.7559456763693471, "learning_rate": 3.2900243309002434e-06, "loss": 0.6464, "step": 13968 }, { "epoch": 0.4078421067998015, "grad_norm": 0.6984540847518516, "learning_rate": 3.2898621248986218e-06, "loss": 0.603, "step": 13969 }, { "epoch": 0.40787130302764885, "grad_norm": 0.776290386763513, "learning_rate": 3.289699918897e-06, "loss": 0.705, "step": 13970 }, { "epoch": 0.4079004992554962, "grad_norm": 0.77023695168182, "learning_rate": 3.289537712895378e-06, "loss": 0.6642, "step": 13971 }, { "epoch": 0.40792969548334357, "grad_norm": 0.7051428253142635, "learning_rate": 3.2893755068937554e-06, "loss": 0.6037, "step": 13972 }, { "epoch": 0.40795889171119093, "grad_norm": 0.714079177161068, "learning_rate": 3.2892133008921334e-06, "loss": 0.6677, "step": 13973 }, { "epoch": 0.4079880879390383, "grad_norm": 0.7063996798060793, "learning_rate": 3.2890510948905114e-06, "loss": 0.6307, "step": 13974 }, { "epoch": 0.40801728416688565, "grad_norm": 0.806143372739286, "learning_rate": 3.2888888888888894e-06, "loss": 0.6054, "step": 13975 }, { "epoch": 0.408046480394733, "grad_norm": 0.7159645569725003, "learning_rate": 3.288726682887267e-06, "loss": 0.6495, "step": 13976 }, { "epoch": 0.4080756766225804, "grad_norm": 0.7276356234353112, "learning_rate": 3.288564476885645e-06, "loss": 0.6752, "step": 13977 }, { "epoch": 0.40810487285042774, "grad_norm": 0.7901706074439758, "learning_rate": 3.288402270884023e-06, "loss": 0.72, "step": 13978 }, { "epoch": 0.4081340690782751, "grad_norm": 0.7248636123686506, "learning_rate": 3.288240064882401e-06, "loss": 0.6332, "step": 13979 }, { "epoch": 0.40816326530612246, "grad_norm": 0.7743525837462952, "learning_rate": 3.2880778588807786e-06, "loss": 0.737, "step": 13980 }, { "epoch": 0.4081924615339698, "grad_norm": 0.7182333108654814, "learning_rate": 3.2879156528791566e-06, "loss": 0.6522, "step": 13981 }, { "epoch": 0.4082216577618172, "grad_norm": 0.728025386899119, "learning_rate": 3.2877534468775346e-06, "loss": 0.6392, "step": 13982 }, { "epoch": 0.40825085398966454, "grad_norm": 0.7921623941800112, "learning_rate": 3.2875912408759126e-06, "loss": 0.7303, "step": 13983 }, { "epoch": 0.4082800502175119, "grad_norm": 0.792645503271881, "learning_rate": 3.2874290348742906e-06, "loss": 0.7234, "step": 13984 }, { "epoch": 0.40830924644535926, "grad_norm": 0.7224359701540952, "learning_rate": 3.287266828872668e-06, "loss": 0.6864, "step": 13985 }, { "epoch": 0.4083384426732066, "grad_norm": 0.7172350224319297, "learning_rate": 3.287104622871046e-06, "loss": 0.6857, "step": 13986 }, { "epoch": 0.408367638901054, "grad_norm": 0.8041105025619831, "learning_rate": 3.286942416869424e-06, "loss": 0.6869, "step": 13987 }, { "epoch": 0.40839683512890135, "grad_norm": 0.8117497930776287, "learning_rate": 3.2867802108678026e-06, "loss": 0.7973, "step": 13988 }, { "epoch": 0.4084260313567487, "grad_norm": 0.784167539611369, "learning_rate": 3.2866180048661806e-06, "loss": 0.6784, "step": 13989 }, { "epoch": 0.40845522758459607, "grad_norm": 0.6947870941930266, "learning_rate": 3.2864557988645587e-06, "loss": 0.6387, "step": 13990 }, { "epoch": 0.40848442381244343, "grad_norm": 0.7749994124782004, "learning_rate": 3.2862935928629362e-06, "loss": 0.7182, "step": 13991 }, { "epoch": 0.4085136200402908, "grad_norm": 0.8192377215751617, "learning_rate": 3.2861313868613142e-06, "loss": 0.7572, "step": 13992 }, { "epoch": 0.40854281626813815, "grad_norm": 0.7182740156201232, "learning_rate": 3.2859691808596923e-06, "loss": 0.6384, "step": 13993 }, { "epoch": 0.4085720124959855, "grad_norm": 0.7788073035855263, "learning_rate": 3.2858069748580703e-06, "loss": 0.6794, "step": 13994 }, { "epoch": 0.4086012087238329, "grad_norm": 0.7417428903302338, "learning_rate": 3.285644768856448e-06, "loss": 0.6608, "step": 13995 }, { "epoch": 0.40863040495168024, "grad_norm": 0.6842849569692119, "learning_rate": 3.285482562854826e-06, "loss": 0.5838, "step": 13996 }, { "epoch": 0.4086596011795276, "grad_norm": 0.7828292963123642, "learning_rate": 3.285320356853204e-06, "loss": 0.6878, "step": 13997 }, { "epoch": 0.40868879740737496, "grad_norm": 0.7218754724768475, "learning_rate": 3.285158150851582e-06, "loss": 0.6386, "step": 13998 }, { "epoch": 0.4087179936352223, "grad_norm": 0.7368570280818145, "learning_rate": 3.2849959448499594e-06, "loss": 0.6457, "step": 13999 }, { "epoch": 0.4087471898630697, "grad_norm": 0.6775063218525014, "learning_rate": 3.2848337388483375e-06, "loss": 0.5441, "step": 14000 }, { "epoch": 0.40877638609091704, "grad_norm": 0.8144833076035136, "learning_rate": 3.2846715328467155e-06, "loss": 0.7244, "step": 14001 }, { "epoch": 0.4088055823187644, "grad_norm": 0.7032732197327681, "learning_rate": 3.2845093268450935e-06, "loss": 0.6263, "step": 14002 }, { "epoch": 0.40883477854661177, "grad_norm": 0.7173435325209447, "learning_rate": 3.2843471208434715e-06, "loss": 0.6823, "step": 14003 }, { "epoch": 0.4088639747744591, "grad_norm": 0.7641312857679863, "learning_rate": 3.284184914841849e-06, "loss": 0.6784, "step": 14004 }, { "epoch": 0.4088931710023065, "grad_norm": 0.6977134615092879, "learning_rate": 3.284022708840227e-06, "loss": 0.6137, "step": 14005 }, { "epoch": 0.40892236723015385, "grad_norm": 0.6897628512005292, "learning_rate": 3.283860502838605e-06, "loss": 0.6017, "step": 14006 }, { "epoch": 0.4089515634580012, "grad_norm": 0.7542518997297245, "learning_rate": 3.2836982968369835e-06, "loss": 0.6479, "step": 14007 }, { "epoch": 0.40898075968584857, "grad_norm": 0.722787535348119, "learning_rate": 3.2835360908353615e-06, "loss": 0.6342, "step": 14008 }, { "epoch": 0.40900995591369593, "grad_norm": 0.6825569825971128, "learning_rate": 3.2833738848337395e-06, "loss": 0.5598, "step": 14009 }, { "epoch": 0.4090391521415433, "grad_norm": 0.731234189270981, "learning_rate": 3.283211678832117e-06, "loss": 0.6685, "step": 14010 }, { "epoch": 0.40906834836939066, "grad_norm": 0.741129073176705, "learning_rate": 3.283049472830495e-06, "loss": 0.6831, "step": 14011 }, { "epoch": 0.409097544597238, "grad_norm": 0.7780860946209911, "learning_rate": 3.282887266828873e-06, "loss": 0.6906, "step": 14012 }, { "epoch": 0.4091267408250854, "grad_norm": 0.7344837963611027, "learning_rate": 3.282725060827251e-06, "loss": 0.6705, "step": 14013 }, { "epoch": 0.40915593705293274, "grad_norm": 0.7082804188287437, "learning_rate": 3.2825628548256287e-06, "loss": 0.6276, "step": 14014 }, { "epoch": 0.4091851332807801, "grad_norm": 0.7065026209201994, "learning_rate": 3.2824006488240067e-06, "loss": 0.6048, "step": 14015 }, { "epoch": 0.40921432950862746, "grad_norm": 0.7265696033999409, "learning_rate": 3.2822384428223847e-06, "loss": 0.6297, "step": 14016 }, { "epoch": 0.4092435257364748, "grad_norm": 0.7603025700731686, "learning_rate": 3.2820762368207627e-06, "loss": 0.7217, "step": 14017 }, { "epoch": 0.4092727219643222, "grad_norm": 0.7696856938312062, "learning_rate": 3.2819140308191403e-06, "loss": 0.7681, "step": 14018 }, { "epoch": 0.40930191819216954, "grad_norm": 0.8021486206510948, "learning_rate": 3.2817518248175183e-06, "loss": 0.7154, "step": 14019 }, { "epoch": 0.40933111442001696, "grad_norm": 0.6566329596091792, "learning_rate": 3.2815896188158963e-06, "loss": 0.5394, "step": 14020 }, { "epoch": 0.4093603106478643, "grad_norm": 0.7922319816863879, "learning_rate": 3.2814274128142743e-06, "loss": 0.6859, "step": 14021 }, { "epoch": 0.4093895068757117, "grad_norm": 0.780679640604786, "learning_rate": 3.2812652068126523e-06, "loss": 0.7133, "step": 14022 }, { "epoch": 0.40941870310355905, "grad_norm": 0.7345673389455019, "learning_rate": 3.28110300081103e-06, "loss": 0.6575, "step": 14023 }, { "epoch": 0.4094478993314064, "grad_norm": 0.7864443120611196, "learning_rate": 3.280940794809408e-06, "loss": 0.6733, "step": 14024 }, { "epoch": 0.40947709555925377, "grad_norm": 0.6910034777016593, "learning_rate": 3.280778588807786e-06, "loss": 0.5701, "step": 14025 }, { "epoch": 0.40950629178710113, "grad_norm": 0.7598041267717413, "learning_rate": 3.2806163828061644e-06, "loss": 0.717, "step": 14026 }, { "epoch": 0.4095354880149485, "grad_norm": 0.7096110770255353, "learning_rate": 3.2804541768045424e-06, "loss": 0.5981, "step": 14027 }, { "epoch": 0.40956468424279585, "grad_norm": 0.6340162986656886, "learning_rate": 3.2802919708029204e-06, "loss": 0.5206, "step": 14028 }, { "epoch": 0.4095938804706432, "grad_norm": 0.7344711227231597, "learning_rate": 3.280129764801298e-06, "loss": 0.7005, "step": 14029 }, { "epoch": 0.4096230766984906, "grad_norm": 0.6685371378993575, "learning_rate": 3.279967558799676e-06, "loss": 0.5809, "step": 14030 }, { "epoch": 0.40965227292633793, "grad_norm": 0.8462772857420461, "learning_rate": 3.279805352798054e-06, "loss": 0.6632, "step": 14031 }, { "epoch": 0.4096814691541853, "grad_norm": 0.737653574751032, "learning_rate": 3.279643146796432e-06, "loss": 0.6645, "step": 14032 }, { "epoch": 0.40971066538203266, "grad_norm": 0.7259518987961042, "learning_rate": 3.2794809407948096e-06, "loss": 0.634, "step": 14033 }, { "epoch": 0.40973986160988, "grad_norm": 0.6749876737164823, "learning_rate": 3.2793187347931876e-06, "loss": 0.6279, "step": 14034 }, { "epoch": 0.4097690578377274, "grad_norm": 0.7441338619825802, "learning_rate": 3.2791565287915656e-06, "loss": 0.6469, "step": 14035 }, { "epoch": 0.40979825406557474, "grad_norm": 0.7349399761662312, "learning_rate": 3.2789943227899436e-06, "loss": 0.6898, "step": 14036 }, { "epoch": 0.4098274502934221, "grad_norm": 0.7591109136924549, "learning_rate": 3.278832116788321e-06, "loss": 0.6956, "step": 14037 }, { "epoch": 0.40985664652126946, "grad_norm": 0.7137187215476135, "learning_rate": 3.278669910786699e-06, "loss": 0.6584, "step": 14038 }, { "epoch": 0.4098858427491168, "grad_norm": 0.7239057692676143, "learning_rate": 3.278507704785077e-06, "loss": 0.6475, "step": 14039 }, { "epoch": 0.4099150389769642, "grad_norm": 0.7022454546407048, "learning_rate": 3.278345498783455e-06, "loss": 0.5587, "step": 14040 }, { "epoch": 0.40994423520481155, "grad_norm": 0.6355089547419756, "learning_rate": 3.278183292781833e-06, "loss": 0.5003, "step": 14041 }, { "epoch": 0.4099734314326589, "grad_norm": 0.6881169739837242, "learning_rate": 3.2780210867802108e-06, "loss": 0.5937, "step": 14042 }, { "epoch": 0.41000262766050627, "grad_norm": 0.7777037810288315, "learning_rate": 3.2778588807785888e-06, "loss": 0.6887, "step": 14043 }, { "epoch": 0.41003182388835363, "grad_norm": 0.7374921700700102, "learning_rate": 3.277696674776967e-06, "loss": 0.6852, "step": 14044 }, { "epoch": 0.410061020116201, "grad_norm": 0.7896349063651344, "learning_rate": 3.2775344687753452e-06, "loss": 0.7281, "step": 14045 }, { "epoch": 0.41009021634404835, "grad_norm": 0.7170924064061758, "learning_rate": 3.2773722627737232e-06, "loss": 0.6163, "step": 14046 }, { "epoch": 0.4101194125718957, "grad_norm": 0.7255467229604375, "learning_rate": 3.2772100567721012e-06, "loss": 0.685, "step": 14047 }, { "epoch": 0.4101486087997431, "grad_norm": 0.7185215892004094, "learning_rate": 3.277047850770479e-06, "loss": 0.588, "step": 14048 }, { "epoch": 0.41017780502759044, "grad_norm": 0.7134258442841631, "learning_rate": 3.276885644768857e-06, "loss": 0.6907, "step": 14049 }, { "epoch": 0.4102070012554378, "grad_norm": 0.6860920809175846, "learning_rate": 3.276723438767235e-06, "loss": 0.5648, "step": 14050 }, { "epoch": 0.41023619748328516, "grad_norm": 0.7217082384433858, "learning_rate": 3.276561232765613e-06, "loss": 0.632, "step": 14051 }, { "epoch": 0.4102653937111325, "grad_norm": 0.7520498790394745, "learning_rate": 3.2763990267639904e-06, "loss": 0.6888, "step": 14052 }, { "epoch": 0.4102945899389799, "grad_norm": 0.784075882923669, "learning_rate": 3.2762368207623684e-06, "loss": 0.7056, "step": 14053 }, { "epoch": 0.41032378616682724, "grad_norm": 0.7187473260196432, "learning_rate": 3.2760746147607464e-06, "loss": 0.6663, "step": 14054 }, { "epoch": 0.4103529823946746, "grad_norm": 0.69911149840325, "learning_rate": 3.2759124087591244e-06, "loss": 0.6104, "step": 14055 }, { "epoch": 0.41038217862252196, "grad_norm": 0.6993478835392255, "learning_rate": 3.275750202757502e-06, "loss": 0.6125, "step": 14056 }, { "epoch": 0.4104113748503693, "grad_norm": 0.8104269254717754, "learning_rate": 3.27558799675588e-06, "loss": 0.7857, "step": 14057 }, { "epoch": 0.4104405710782167, "grad_norm": 0.7252658569862731, "learning_rate": 3.275425790754258e-06, "loss": 0.6122, "step": 14058 }, { "epoch": 0.41046976730606405, "grad_norm": 0.7452489590717728, "learning_rate": 3.275263584752636e-06, "loss": 0.6727, "step": 14059 }, { "epoch": 0.4104989635339114, "grad_norm": 0.7309817548709054, "learning_rate": 3.275101378751014e-06, "loss": 0.6158, "step": 14060 }, { "epoch": 0.41052815976175877, "grad_norm": 0.7493392165106244, "learning_rate": 3.2749391727493916e-06, "loss": 0.6711, "step": 14061 }, { "epoch": 0.41055735598960613, "grad_norm": 0.7188478360684334, "learning_rate": 3.2747769667477696e-06, "loss": 0.6391, "step": 14062 }, { "epoch": 0.4105865522174535, "grad_norm": 0.7518952848159153, "learning_rate": 3.274614760746148e-06, "loss": 0.6841, "step": 14063 }, { "epoch": 0.41061574844530085, "grad_norm": 0.7183870327845382, "learning_rate": 3.274452554744526e-06, "loss": 0.6966, "step": 14064 }, { "epoch": 0.4106449446731482, "grad_norm": 0.7168895746186047, "learning_rate": 3.274290348742904e-06, "loss": 0.6012, "step": 14065 }, { "epoch": 0.4106741409009956, "grad_norm": 0.7233581983234512, "learning_rate": 3.274128142741282e-06, "loss": 0.6444, "step": 14066 }, { "epoch": 0.41070333712884294, "grad_norm": 0.6565897733568545, "learning_rate": 3.2739659367396597e-06, "loss": 0.5592, "step": 14067 }, { "epoch": 0.4107325333566903, "grad_norm": 0.7677057274611446, "learning_rate": 3.2738037307380377e-06, "loss": 0.661, "step": 14068 }, { "epoch": 0.41076172958453766, "grad_norm": 0.763164871115945, "learning_rate": 3.2736415247364157e-06, "loss": 0.659, "step": 14069 }, { "epoch": 0.410790925812385, "grad_norm": 0.7349628841844029, "learning_rate": 3.2734793187347937e-06, "loss": 0.6784, "step": 14070 }, { "epoch": 0.4108201220402324, "grad_norm": 0.8616210640856281, "learning_rate": 3.2733171127331713e-06, "loss": 0.7303, "step": 14071 }, { "epoch": 0.41084931826807974, "grad_norm": 0.6998010219615448, "learning_rate": 3.2731549067315493e-06, "loss": 0.6294, "step": 14072 }, { "epoch": 0.4108785144959271, "grad_norm": 0.7184284065671441, "learning_rate": 3.2729927007299273e-06, "loss": 0.6602, "step": 14073 }, { "epoch": 0.41090771072377447, "grad_norm": 0.7892948927754168, "learning_rate": 3.2728304947283053e-06, "loss": 0.6854, "step": 14074 }, { "epoch": 0.4109369069516218, "grad_norm": 0.7129166035676423, "learning_rate": 3.272668288726683e-06, "loss": 0.6163, "step": 14075 }, { "epoch": 0.4109661031794692, "grad_norm": 0.6850400930184218, "learning_rate": 3.272506082725061e-06, "loss": 0.6195, "step": 14076 }, { "epoch": 0.41099529940731655, "grad_norm": 0.7355357239919813, "learning_rate": 3.272343876723439e-06, "loss": 0.648, "step": 14077 }, { "epoch": 0.4110244956351639, "grad_norm": 0.7533942836243493, "learning_rate": 3.272181670721817e-06, "loss": 0.7016, "step": 14078 }, { "epoch": 0.4110536918630113, "grad_norm": 0.7347736560633881, "learning_rate": 3.272019464720195e-06, "loss": 0.6309, "step": 14079 }, { "epoch": 0.41108288809085863, "grad_norm": 0.6760788482636221, "learning_rate": 3.2718572587185725e-06, "loss": 0.5828, "step": 14080 }, { "epoch": 0.41111208431870605, "grad_norm": 0.7430255799189157, "learning_rate": 3.2716950527169505e-06, "loss": 0.6736, "step": 14081 }, { "epoch": 0.4111412805465534, "grad_norm": 0.7180149849955184, "learning_rate": 3.271532846715329e-06, "loss": 0.6297, "step": 14082 }, { "epoch": 0.4111704767744008, "grad_norm": 0.7506389155163053, "learning_rate": 3.271370640713707e-06, "loss": 0.663, "step": 14083 }, { "epoch": 0.41119967300224813, "grad_norm": 0.8097610596957702, "learning_rate": 3.271208434712085e-06, "loss": 0.6996, "step": 14084 }, { "epoch": 0.4112288692300955, "grad_norm": 0.8005873063364917, "learning_rate": 3.2710462287104625e-06, "loss": 0.6037, "step": 14085 }, { "epoch": 0.41125806545794286, "grad_norm": 0.7375833493225351, "learning_rate": 3.2708840227088405e-06, "loss": 0.6462, "step": 14086 }, { "epoch": 0.4112872616857902, "grad_norm": 0.7453689004621626, "learning_rate": 3.2707218167072185e-06, "loss": 0.6979, "step": 14087 }, { "epoch": 0.4113164579136376, "grad_norm": 0.7195541687144483, "learning_rate": 3.2705596107055965e-06, "loss": 0.6423, "step": 14088 }, { "epoch": 0.41134565414148494, "grad_norm": 0.7625920418627619, "learning_rate": 3.2703974047039746e-06, "loss": 0.6814, "step": 14089 }, { "epoch": 0.4113748503693323, "grad_norm": 0.7537151881809159, "learning_rate": 3.270235198702352e-06, "loss": 0.687, "step": 14090 }, { "epoch": 0.41140404659717966, "grad_norm": 0.7744621021977194, "learning_rate": 3.27007299270073e-06, "loss": 0.7853, "step": 14091 }, { "epoch": 0.411433242825027, "grad_norm": 0.7317894928326932, "learning_rate": 3.269910786699108e-06, "loss": 0.5787, "step": 14092 }, { "epoch": 0.4114624390528744, "grad_norm": 0.7686758471785987, "learning_rate": 3.269748580697486e-06, "loss": 0.668, "step": 14093 }, { "epoch": 0.41149163528072175, "grad_norm": 0.7358862945232827, "learning_rate": 3.2695863746958637e-06, "loss": 0.6347, "step": 14094 }, { "epoch": 0.4115208315085691, "grad_norm": 0.7070269834455375, "learning_rate": 3.2694241686942417e-06, "loss": 0.6327, "step": 14095 }, { "epoch": 0.41155002773641647, "grad_norm": 0.7150308700507748, "learning_rate": 3.2692619626926198e-06, "loss": 0.5954, "step": 14096 }, { "epoch": 0.41157922396426383, "grad_norm": 0.6915689458116663, "learning_rate": 3.2690997566909978e-06, "loss": 0.657, "step": 14097 }, { "epoch": 0.4116084201921112, "grad_norm": 0.8200014077315909, "learning_rate": 3.2689375506893758e-06, "loss": 0.777, "step": 14098 }, { "epoch": 0.41163761641995855, "grad_norm": 0.7579303646708555, "learning_rate": 3.2687753446877533e-06, "loss": 0.6658, "step": 14099 }, { "epoch": 0.4116668126478059, "grad_norm": 0.7065785749410733, "learning_rate": 3.2686131386861314e-06, "loss": 0.5983, "step": 14100 }, { "epoch": 0.4116960088756533, "grad_norm": 0.722239235069941, "learning_rate": 3.2684509326845098e-06, "loss": 0.6144, "step": 14101 }, { "epoch": 0.41172520510350064, "grad_norm": 0.7072329263067623, "learning_rate": 3.268288726682888e-06, "loss": 0.6629, "step": 14102 }, { "epoch": 0.411754401331348, "grad_norm": 0.7334448333434129, "learning_rate": 3.268126520681266e-06, "loss": 0.6288, "step": 14103 }, { "epoch": 0.41178359755919536, "grad_norm": 0.7553959679030976, "learning_rate": 3.2679643146796434e-06, "loss": 0.7476, "step": 14104 }, { "epoch": 0.4118127937870427, "grad_norm": 0.7725332905437625, "learning_rate": 3.2678021086780214e-06, "loss": 0.736, "step": 14105 }, { "epoch": 0.4118419900148901, "grad_norm": 0.7333300788484552, "learning_rate": 3.2676399026763994e-06, "loss": 0.6275, "step": 14106 }, { "epoch": 0.41187118624273744, "grad_norm": 0.6868129014436893, "learning_rate": 3.2674776966747774e-06, "loss": 0.6184, "step": 14107 }, { "epoch": 0.4119003824705848, "grad_norm": 0.8105888149836626, "learning_rate": 3.2673154906731554e-06, "loss": 0.6338, "step": 14108 }, { "epoch": 0.41192957869843216, "grad_norm": 0.7660726404959315, "learning_rate": 3.267153284671533e-06, "loss": 0.6501, "step": 14109 }, { "epoch": 0.4119587749262795, "grad_norm": 0.73421361941202, "learning_rate": 3.266991078669911e-06, "loss": 0.6895, "step": 14110 }, { "epoch": 0.4119879711541269, "grad_norm": 0.7021608681371981, "learning_rate": 3.266828872668289e-06, "loss": 0.5796, "step": 14111 }, { "epoch": 0.41201716738197425, "grad_norm": 0.7786541061964434, "learning_rate": 3.266666666666667e-06, "loss": 0.5886, "step": 14112 }, { "epoch": 0.4120463636098216, "grad_norm": 0.7616632856491875, "learning_rate": 3.2665044606650446e-06, "loss": 0.6974, "step": 14113 }, { "epoch": 0.41207555983766897, "grad_norm": 0.7104145219022373, "learning_rate": 3.2663422546634226e-06, "loss": 0.6376, "step": 14114 }, { "epoch": 0.41210475606551633, "grad_norm": 0.7145981714665077, "learning_rate": 3.2661800486618006e-06, "loss": 0.6238, "step": 14115 }, { "epoch": 0.4121339522933637, "grad_norm": 0.7381322609496361, "learning_rate": 3.2660178426601786e-06, "loss": 0.6615, "step": 14116 }, { "epoch": 0.41216314852121105, "grad_norm": 0.8034274569565895, "learning_rate": 3.2658556366585566e-06, "loss": 0.6991, "step": 14117 }, { "epoch": 0.4121923447490584, "grad_norm": 0.7520844040078682, "learning_rate": 3.265693430656934e-06, "loss": 0.5752, "step": 14118 }, { "epoch": 0.4122215409769058, "grad_norm": 0.7224324816599946, "learning_rate": 3.2655312246553122e-06, "loss": 0.6858, "step": 14119 }, { "epoch": 0.41225073720475314, "grad_norm": 0.7783140358400465, "learning_rate": 3.2653690186536906e-06, "loss": 0.7368, "step": 14120 }, { "epoch": 0.4122799334326005, "grad_norm": 0.7676071275900476, "learning_rate": 3.2652068126520687e-06, "loss": 0.7447, "step": 14121 }, { "epoch": 0.41230912966044786, "grad_norm": 0.6306253983131984, "learning_rate": 3.2650446066504467e-06, "loss": 0.5688, "step": 14122 }, { "epoch": 0.4123383258882952, "grad_norm": 0.7542201162818162, "learning_rate": 3.2648824006488242e-06, "loss": 0.6723, "step": 14123 }, { "epoch": 0.4123675221161426, "grad_norm": 0.7805193011997927, "learning_rate": 3.2647201946472022e-06, "loss": 0.7146, "step": 14124 }, { "epoch": 0.41239671834398994, "grad_norm": 0.7480487591600644, "learning_rate": 3.2645579886455803e-06, "loss": 0.6541, "step": 14125 }, { "epoch": 0.4124259145718373, "grad_norm": 0.7137800600900873, "learning_rate": 3.2643957826439583e-06, "loss": 0.631, "step": 14126 }, { "epoch": 0.41245511079968467, "grad_norm": 0.708538706217593, "learning_rate": 3.2642335766423363e-06, "loss": 0.5872, "step": 14127 }, { "epoch": 0.412484307027532, "grad_norm": 0.7506583647534603, "learning_rate": 3.264071370640714e-06, "loss": 0.6572, "step": 14128 }, { "epoch": 0.4125135032553794, "grad_norm": 0.7300965289972107, "learning_rate": 3.263909164639092e-06, "loss": 0.6505, "step": 14129 }, { "epoch": 0.41254269948322675, "grad_norm": 0.7290334655186078, "learning_rate": 3.26374695863747e-06, "loss": 0.6815, "step": 14130 }, { "epoch": 0.4125718957110741, "grad_norm": 0.7305227994819012, "learning_rate": 3.263584752635848e-06, "loss": 0.6694, "step": 14131 }, { "epoch": 0.41260109193892147, "grad_norm": 0.7575274552976342, "learning_rate": 3.2634225466342255e-06, "loss": 0.6332, "step": 14132 }, { "epoch": 0.41263028816676883, "grad_norm": 0.7984064192637302, "learning_rate": 3.2632603406326035e-06, "loss": 0.7553, "step": 14133 }, { "epoch": 0.4126594843946162, "grad_norm": 0.7259205108856321, "learning_rate": 3.2630981346309815e-06, "loss": 0.6854, "step": 14134 }, { "epoch": 0.41268868062246356, "grad_norm": 0.7138595126013456, "learning_rate": 3.2629359286293595e-06, "loss": 0.5904, "step": 14135 }, { "epoch": 0.4127178768503109, "grad_norm": 0.7849967524167927, "learning_rate": 3.2627737226277375e-06, "loss": 0.705, "step": 14136 }, { "epoch": 0.4127470730781583, "grad_norm": 0.7813309185976258, "learning_rate": 3.262611516626115e-06, "loss": 0.7184, "step": 14137 }, { "epoch": 0.41277626930600564, "grad_norm": 0.7989957022069438, "learning_rate": 3.262449310624493e-06, "loss": 0.7194, "step": 14138 }, { "epoch": 0.412805465533853, "grad_norm": 0.6856375228482223, "learning_rate": 3.2622871046228715e-06, "loss": 0.5956, "step": 14139 }, { "epoch": 0.41283466176170036, "grad_norm": 0.6778766979072642, "learning_rate": 3.2621248986212495e-06, "loss": 0.6069, "step": 14140 }, { "epoch": 0.4128638579895478, "grad_norm": 0.7264709091217374, "learning_rate": 3.2619626926196275e-06, "loss": 0.6555, "step": 14141 }, { "epoch": 0.41289305421739514, "grad_norm": 0.749281559580829, "learning_rate": 3.261800486618005e-06, "loss": 0.6897, "step": 14142 }, { "epoch": 0.4129222504452425, "grad_norm": 0.7006306068267525, "learning_rate": 3.261638280616383e-06, "loss": 0.6199, "step": 14143 }, { "epoch": 0.41295144667308986, "grad_norm": 0.7220077297727613, "learning_rate": 3.261476074614761e-06, "loss": 0.6314, "step": 14144 }, { "epoch": 0.4129806429009372, "grad_norm": 0.6854809590647908, "learning_rate": 3.261313868613139e-06, "loss": 0.607, "step": 14145 }, { "epoch": 0.4130098391287846, "grad_norm": 0.7403356841629238, "learning_rate": 3.261151662611517e-06, "loss": 0.6786, "step": 14146 }, { "epoch": 0.41303903535663195, "grad_norm": 0.7623650894411689, "learning_rate": 3.2609894566098947e-06, "loss": 0.7319, "step": 14147 }, { "epoch": 0.4130682315844793, "grad_norm": 0.7692655939953162, "learning_rate": 3.2608272506082727e-06, "loss": 0.6494, "step": 14148 }, { "epoch": 0.41309742781232667, "grad_norm": 0.7547395239018235, "learning_rate": 3.2606650446066507e-06, "loss": 0.7107, "step": 14149 }, { "epoch": 0.41312662404017403, "grad_norm": 0.7082300197942591, "learning_rate": 3.2605028386050287e-06, "loss": 0.6536, "step": 14150 }, { "epoch": 0.4131558202680214, "grad_norm": 1.020969194831539, "learning_rate": 3.2603406326034063e-06, "loss": 0.5663, "step": 14151 }, { "epoch": 0.41318501649586875, "grad_norm": 0.7242090706451038, "learning_rate": 3.2601784266017843e-06, "loss": 0.6154, "step": 14152 }, { "epoch": 0.4132142127237161, "grad_norm": 0.7277462186971955, "learning_rate": 3.2600162206001623e-06, "loss": 0.6289, "step": 14153 }, { "epoch": 0.4132434089515635, "grad_norm": 0.7481264251129889, "learning_rate": 3.2598540145985403e-06, "loss": 0.6951, "step": 14154 }, { "epoch": 0.41327260517941083, "grad_norm": 0.8517538583213852, "learning_rate": 3.2596918085969183e-06, "loss": 0.6441, "step": 14155 }, { "epoch": 0.4133018014072582, "grad_norm": 0.7708411179292877, "learning_rate": 3.259529602595296e-06, "loss": 0.7233, "step": 14156 }, { "epoch": 0.41333099763510556, "grad_norm": 0.7620750532125021, "learning_rate": 3.259367396593674e-06, "loss": 0.7054, "step": 14157 }, { "epoch": 0.4133601938629529, "grad_norm": 0.7620935543231779, "learning_rate": 3.2592051905920524e-06, "loss": 0.6402, "step": 14158 }, { "epoch": 0.4133893900908003, "grad_norm": 0.6893344309962124, "learning_rate": 3.2590429845904304e-06, "loss": 0.5934, "step": 14159 }, { "epoch": 0.41341858631864764, "grad_norm": 0.7813585999330048, "learning_rate": 3.2588807785888084e-06, "loss": 0.7384, "step": 14160 }, { "epoch": 0.413447782546495, "grad_norm": 0.7222167612087237, "learning_rate": 3.258718572587186e-06, "loss": 0.5561, "step": 14161 }, { "epoch": 0.41347697877434236, "grad_norm": 0.8267811882527241, "learning_rate": 3.258556366585564e-06, "loss": 0.7827, "step": 14162 }, { "epoch": 0.4135061750021897, "grad_norm": 0.7083020342661075, "learning_rate": 3.258394160583942e-06, "loss": 0.6168, "step": 14163 }, { "epoch": 0.4135353712300371, "grad_norm": 0.7038889408922407, "learning_rate": 3.25823195458232e-06, "loss": 0.6264, "step": 14164 }, { "epoch": 0.41356456745788445, "grad_norm": 0.8649495869438694, "learning_rate": 3.258069748580698e-06, "loss": 0.8078, "step": 14165 }, { "epoch": 0.4135937636857318, "grad_norm": 0.7552308223563684, "learning_rate": 3.2579075425790756e-06, "loss": 0.6953, "step": 14166 }, { "epoch": 0.41362295991357917, "grad_norm": 0.7308776935888508, "learning_rate": 3.2577453365774536e-06, "loss": 0.6242, "step": 14167 }, { "epoch": 0.41365215614142653, "grad_norm": 0.7197680043810477, "learning_rate": 3.2575831305758316e-06, "loss": 0.6236, "step": 14168 }, { "epoch": 0.4136813523692739, "grad_norm": 0.7441862770332437, "learning_rate": 3.2574209245742096e-06, "loss": 0.7363, "step": 14169 }, { "epoch": 0.41371054859712125, "grad_norm": 0.7545746996316844, "learning_rate": 3.257258718572587e-06, "loss": 0.6941, "step": 14170 }, { "epoch": 0.4137397448249686, "grad_norm": 0.7858407070377852, "learning_rate": 3.257096512570965e-06, "loss": 0.706, "step": 14171 }, { "epoch": 0.413768941052816, "grad_norm": 0.7297363392081748, "learning_rate": 3.256934306569343e-06, "loss": 0.6528, "step": 14172 }, { "epoch": 0.41379813728066334, "grad_norm": 0.7000319744142146, "learning_rate": 3.256772100567721e-06, "loss": 0.6227, "step": 14173 }, { "epoch": 0.4138273335085107, "grad_norm": 0.6933043548501617, "learning_rate": 3.256609894566099e-06, "loss": 0.6385, "step": 14174 }, { "epoch": 0.41385652973635806, "grad_norm": 0.7062372765431368, "learning_rate": 3.2564476885644768e-06, "loss": 0.6248, "step": 14175 }, { "epoch": 0.4138857259642054, "grad_norm": 0.6921334044764429, "learning_rate": 3.256285482562855e-06, "loss": 0.6185, "step": 14176 }, { "epoch": 0.4139149221920528, "grad_norm": 0.7602514879574743, "learning_rate": 3.2561232765612332e-06, "loss": 0.617, "step": 14177 }, { "epoch": 0.41394411841990014, "grad_norm": 0.828641201529158, "learning_rate": 3.2559610705596112e-06, "loss": 0.7391, "step": 14178 }, { "epoch": 0.4139733146477475, "grad_norm": 0.8120314345382317, "learning_rate": 3.2557988645579892e-06, "loss": 0.7005, "step": 14179 }, { "epoch": 0.41400251087559486, "grad_norm": 0.7629192655979613, "learning_rate": 3.255636658556367e-06, "loss": 0.7191, "step": 14180 }, { "epoch": 0.4140317071034422, "grad_norm": 0.6890573434264564, "learning_rate": 3.255474452554745e-06, "loss": 0.6014, "step": 14181 }, { "epoch": 0.4140609033312896, "grad_norm": 0.6771026484535699, "learning_rate": 3.255312246553123e-06, "loss": 0.5962, "step": 14182 }, { "epoch": 0.41409009955913695, "grad_norm": 0.697664091375286, "learning_rate": 3.255150040551501e-06, "loss": 0.6106, "step": 14183 }, { "epoch": 0.4141192957869843, "grad_norm": 0.7445243435866756, "learning_rate": 3.254987834549879e-06, "loss": 0.5907, "step": 14184 }, { "epoch": 0.41414849201483167, "grad_norm": 0.7785260298029647, "learning_rate": 3.2548256285482564e-06, "loss": 0.7818, "step": 14185 }, { "epoch": 0.41417768824267903, "grad_norm": 0.6876219886447622, "learning_rate": 3.2546634225466344e-06, "loss": 0.5479, "step": 14186 }, { "epoch": 0.4142068844705264, "grad_norm": 0.7439315618101418, "learning_rate": 3.2545012165450124e-06, "loss": 0.6264, "step": 14187 }, { "epoch": 0.41423608069837375, "grad_norm": 0.7825448572051209, "learning_rate": 3.2543390105433904e-06, "loss": 0.6953, "step": 14188 }, { "epoch": 0.4142652769262211, "grad_norm": 0.7659349389610025, "learning_rate": 3.254176804541768e-06, "loss": 0.6467, "step": 14189 }, { "epoch": 0.4142944731540685, "grad_norm": 0.7329742797347777, "learning_rate": 3.254014598540146e-06, "loss": 0.6484, "step": 14190 }, { "epoch": 0.41432366938191584, "grad_norm": 0.6667133512997504, "learning_rate": 3.253852392538524e-06, "loss": 0.5668, "step": 14191 }, { "epoch": 0.4143528656097632, "grad_norm": 0.7891956553225624, "learning_rate": 3.253690186536902e-06, "loss": 0.7572, "step": 14192 }, { "epoch": 0.41438206183761056, "grad_norm": 0.8149951752810908, "learning_rate": 3.25352798053528e-06, "loss": 0.7111, "step": 14193 }, { "epoch": 0.4144112580654579, "grad_norm": 0.6929226099714232, "learning_rate": 3.2533657745336576e-06, "loss": 0.5578, "step": 14194 }, { "epoch": 0.4144404542933053, "grad_norm": 0.7888840275812293, "learning_rate": 3.253203568532036e-06, "loss": 0.7568, "step": 14195 }, { "epoch": 0.41446965052115264, "grad_norm": 0.6868715533500913, "learning_rate": 3.253041362530414e-06, "loss": 0.5766, "step": 14196 }, { "epoch": 0.414498846749, "grad_norm": 0.7335262842094045, "learning_rate": 3.252879156528792e-06, "loss": 0.6696, "step": 14197 }, { "epoch": 0.41452804297684737, "grad_norm": 0.7691148448405439, "learning_rate": 3.25271695052717e-06, "loss": 0.7406, "step": 14198 }, { "epoch": 0.4145572392046947, "grad_norm": 0.6879903671359011, "learning_rate": 3.2525547445255477e-06, "loss": 0.5922, "step": 14199 }, { "epoch": 0.4145864354325421, "grad_norm": 0.7268369066609632, "learning_rate": 3.2523925385239257e-06, "loss": 0.6964, "step": 14200 }, { "epoch": 0.4146156316603895, "grad_norm": 0.7466784287741864, "learning_rate": 3.2522303325223037e-06, "loss": 0.7285, "step": 14201 }, { "epoch": 0.41464482788823687, "grad_norm": 0.6949602311785763, "learning_rate": 3.2520681265206817e-06, "loss": 0.6818, "step": 14202 }, { "epoch": 0.4146740241160842, "grad_norm": 0.6934325555462796, "learning_rate": 3.2519059205190597e-06, "loss": 0.6052, "step": 14203 }, { "epoch": 0.4147032203439316, "grad_norm": 0.7456304878101078, "learning_rate": 3.2517437145174373e-06, "loss": 0.6585, "step": 14204 }, { "epoch": 0.41473241657177895, "grad_norm": 0.704572386030987, "learning_rate": 3.2515815085158153e-06, "loss": 0.6121, "step": 14205 }, { "epoch": 0.4147616127996263, "grad_norm": 0.7974706777693035, "learning_rate": 3.2514193025141933e-06, "loss": 0.7764, "step": 14206 }, { "epoch": 0.4147908090274737, "grad_norm": 0.7676146626372111, "learning_rate": 3.2512570965125713e-06, "loss": 0.6903, "step": 14207 }, { "epoch": 0.41482000525532103, "grad_norm": 0.6958728161105382, "learning_rate": 3.251094890510949e-06, "loss": 0.5549, "step": 14208 }, { "epoch": 0.4148492014831684, "grad_norm": 0.7584789093615878, "learning_rate": 3.250932684509327e-06, "loss": 0.6393, "step": 14209 }, { "epoch": 0.41487839771101576, "grad_norm": 0.7814787690657135, "learning_rate": 3.250770478507705e-06, "loss": 0.7026, "step": 14210 }, { "epoch": 0.4149075939388631, "grad_norm": 0.7477550704297474, "learning_rate": 3.250608272506083e-06, "loss": 0.6947, "step": 14211 }, { "epoch": 0.4149367901667105, "grad_norm": 0.727321276359073, "learning_rate": 3.250446066504461e-06, "loss": 0.6317, "step": 14212 }, { "epoch": 0.41496598639455784, "grad_norm": 0.7967082371293874, "learning_rate": 3.2502838605028385e-06, "loss": 0.7016, "step": 14213 }, { "epoch": 0.4149951826224052, "grad_norm": 0.7150715339327057, "learning_rate": 3.250121654501217e-06, "loss": 0.5716, "step": 14214 }, { "epoch": 0.41502437885025256, "grad_norm": 0.7514909245466833, "learning_rate": 3.249959448499595e-06, "loss": 0.7358, "step": 14215 }, { "epoch": 0.4150535750780999, "grad_norm": 0.7875919928349329, "learning_rate": 3.249797242497973e-06, "loss": 0.6489, "step": 14216 }, { "epoch": 0.4150827713059473, "grad_norm": 0.7679090657601563, "learning_rate": 3.249635036496351e-06, "loss": 0.7203, "step": 14217 }, { "epoch": 0.41511196753379465, "grad_norm": 0.7250171302529647, "learning_rate": 3.2494728304947285e-06, "loss": 0.6526, "step": 14218 }, { "epoch": 0.415141163761642, "grad_norm": 0.7022994268315893, "learning_rate": 3.2493106244931065e-06, "loss": 0.6167, "step": 14219 }, { "epoch": 0.41517035998948937, "grad_norm": 0.6605198316151863, "learning_rate": 3.2491484184914845e-06, "loss": 0.5523, "step": 14220 }, { "epoch": 0.41519955621733673, "grad_norm": 0.7319098455142087, "learning_rate": 3.2489862124898626e-06, "loss": 0.728, "step": 14221 }, { "epoch": 0.4152287524451841, "grad_norm": 0.7260742774699124, "learning_rate": 3.2488240064882406e-06, "loss": 0.6804, "step": 14222 }, { "epoch": 0.41525794867303145, "grad_norm": 0.7786211395230093, "learning_rate": 3.248661800486618e-06, "loss": 0.6996, "step": 14223 }, { "epoch": 0.4152871449008788, "grad_norm": 0.6531154137355331, "learning_rate": 3.248499594484996e-06, "loss": 0.578, "step": 14224 }, { "epoch": 0.4153163411287262, "grad_norm": 0.7034509671217867, "learning_rate": 3.248337388483374e-06, "loss": 0.6307, "step": 14225 }, { "epoch": 0.41534553735657354, "grad_norm": 0.7219690221601572, "learning_rate": 3.248175182481752e-06, "loss": 0.665, "step": 14226 }, { "epoch": 0.4153747335844209, "grad_norm": 0.6870453093993301, "learning_rate": 3.2480129764801297e-06, "loss": 0.5907, "step": 14227 }, { "epoch": 0.41540392981226826, "grad_norm": 0.779250113175147, "learning_rate": 3.2478507704785078e-06, "loss": 0.7368, "step": 14228 }, { "epoch": 0.4154331260401156, "grad_norm": 0.7462628025881767, "learning_rate": 3.2476885644768858e-06, "loss": 0.593, "step": 14229 }, { "epoch": 0.415462322267963, "grad_norm": 0.7915711197991235, "learning_rate": 3.2475263584752638e-06, "loss": 0.7858, "step": 14230 }, { "epoch": 0.41549151849581034, "grad_norm": 0.7743024691590155, "learning_rate": 3.2473641524736414e-06, "loss": 0.7027, "step": 14231 }, { "epoch": 0.4155207147236577, "grad_norm": 0.7156836560362284, "learning_rate": 3.2472019464720194e-06, "loss": 0.6588, "step": 14232 }, { "epoch": 0.41554991095150506, "grad_norm": 0.7633668381358599, "learning_rate": 3.247039740470398e-06, "loss": 0.6681, "step": 14233 }, { "epoch": 0.4155791071793524, "grad_norm": 0.7254623115875574, "learning_rate": 3.246877534468776e-06, "loss": 0.6877, "step": 14234 }, { "epoch": 0.4156083034071998, "grad_norm": 0.7494500353108867, "learning_rate": 3.246715328467154e-06, "loss": 0.6718, "step": 14235 }, { "epoch": 0.41563749963504715, "grad_norm": 0.7376439998405706, "learning_rate": 3.246553122465532e-06, "loss": 0.6776, "step": 14236 }, { "epoch": 0.4156666958628945, "grad_norm": 0.7232209507735246, "learning_rate": 3.2463909164639094e-06, "loss": 0.6298, "step": 14237 }, { "epoch": 0.41569589209074187, "grad_norm": 0.7625522268609786, "learning_rate": 3.2462287104622874e-06, "loss": 0.7122, "step": 14238 }, { "epoch": 0.41572508831858923, "grad_norm": 0.7217685119098449, "learning_rate": 3.2460665044606654e-06, "loss": 0.6347, "step": 14239 }, { "epoch": 0.4157542845464366, "grad_norm": 0.7724989394133273, "learning_rate": 3.2459042984590434e-06, "loss": 0.7272, "step": 14240 }, { "epoch": 0.41578348077428395, "grad_norm": 0.8226659535452777, "learning_rate": 3.2457420924574214e-06, "loss": 0.629, "step": 14241 }, { "epoch": 0.4158126770021313, "grad_norm": 0.7826524583824112, "learning_rate": 3.245579886455799e-06, "loss": 0.7179, "step": 14242 }, { "epoch": 0.4158418732299787, "grad_norm": 0.7480414290295965, "learning_rate": 3.245417680454177e-06, "loss": 0.6962, "step": 14243 }, { "epoch": 0.41587106945782604, "grad_norm": 0.8229481702752061, "learning_rate": 3.245255474452555e-06, "loss": 0.7596, "step": 14244 }, { "epoch": 0.4159002656856734, "grad_norm": 0.7539795093929861, "learning_rate": 3.245093268450933e-06, "loss": 0.7104, "step": 14245 }, { "epoch": 0.41592946191352076, "grad_norm": 0.7908076406755828, "learning_rate": 3.2449310624493106e-06, "loss": 0.6553, "step": 14246 }, { "epoch": 0.4159586581413681, "grad_norm": 0.6993943714300571, "learning_rate": 3.2447688564476886e-06, "loss": 0.5445, "step": 14247 }, { "epoch": 0.4159878543692155, "grad_norm": 0.7117871922950445, "learning_rate": 3.2446066504460666e-06, "loss": 0.6279, "step": 14248 }, { "epoch": 0.41601705059706284, "grad_norm": 0.7408887165085982, "learning_rate": 3.2444444444444446e-06, "loss": 0.6289, "step": 14249 }, { "epoch": 0.4160462468249102, "grad_norm": 0.734136765632401, "learning_rate": 3.244282238442822e-06, "loss": 0.6623, "step": 14250 }, { "epoch": 0.41607544305275757, "grad_norm": 0.6898803539342662, "learning_rate": 3.2441200324412002e-06, "loss": 0.6187, "step": 14251 }, { "epoch": 0.4161046392806049, "grad_norm": 0.7103198273001627, "learning_rate": 3.2439578264395786e-06, "loss": 0.6178, "step": 14252 }, { "epoch": 0.4161338355084523, "grad_norm": 0.7208977515072432, "learning_rate": 3.2437956204379567e-06, "loss": 0.6003, "step": 14253 }, { "epoch": 0.41616303173629965, "grad_norm": 0.7998875903691903, "learning_rate": 3.2436334144363347e-06, "loss": 0.6585, "step": 14254 }, { "epoch": 0.416192227964147, "grad_norm": 0.6898031383477575, "learning_rate": 3.2434712084347127e-06, "loss": 0.6049, "step": 14255 }, { "epoch": 0.41622142419199437, "grad_norm": 0.736706950917406, "learning_rate": 3.2433090024330903e-06, "loss": 0.6518, "step": 14256 }, { "epoch": 0.41625062041984173, "grad_norm": 0.7030090876773237, "learning_rate": 3.2431467964314683e-06, "loss": 0.6415, "step": 14257 }, { "epoch": 0.4162798166476891, "grad_norm": 0.779725059307504, "learning_rate": 3.2429845904298463e-06, "loss": 0.6111, "step": 14258 }, { "epoch": 0.41630901287553645, "grad_norm": 0.7329319016325474, "learning_rate": 3.2428223844282243e-06, "loss": 0.6155, "step": 14259 }, { "epoch": 0.4163382091033838, "grad_norm": 0.8170223309789232, "learning_rate": 3.2426601784266023e-06, "loss": 0.7232, "step": 14260 }, { "epoch": 0.4163674053312312, "grad_norm": 0.7547738830813009, "learning_rate": 3.24249797242498e-06, "loss": 0.682, "step": 14261 }, { "epoch": 0.4163966015590786, "grad_norm": 0.680763007544226, "learning_rate": 3.242335766423358e-06, "loss": 0.6204, "step": 14262 }, { "epoch": 0.41642579778692596, "grad_norm": 0.7454274915208832, "learning_rate": 3.242173560421736e-06, "loss": 0.6746, "step": 14263 }, { "epoch": 0.4164549940147733, "grad_norm": 0.7059535661769301, "learning_rate": 3.242011354420114e-06, "loss": 0.6127, "step": 14264 }, { "epoch": 0.4164841902426207, "grad_norm": 0.7255246386564554, "learning_rate": 3.2418491484184915e-06, "loss": 0.6076, "step": 14265 }, { "epoch": 0.41651338647046804, "grad_norm": 0.7137343678072567, "learning_rate": 3.2416869424168695e-06, "loss": 0.6127, "step": 14266 }, { "epoch": 0.4165425826983154, "grad_norm": 0.7325090966232072, "learning_rate": 3.2415247364152475e-06, "loss": 0.6945, "step": 14267 }, { "epoch": 0.41657177892616276, "grad_norm": 0.812405852982731, "learning_rate": 3.2413625304136255e-06, "loss": 0.6611, "step": 14268 }, { "epoch": 0.4166009751540101, "grad_norm": 0.7087714425156073, "learning_rate": 3.241200324412003e-06, "loss": 0.6299, "step": 14269 }, { "epoch": 0.4166301713818575, "grad_norm": 0.6821349705357794, "learning_rate": 3.241038118410381e-06, "loss": 0.6149, "step": 14270 }, { "epoch": 0.41665936760970484, "grad_norm": 0.8484250028221826, "learning_rate": 3.2408759124087595e-06, "loss": 0.7726, "step": 14271 }, { "epoch": 0.4166885638375522, "grad_norm": 0.778179664844209, "learning_rate": 3.2407137064071375e-06, "loss": 0.7242, "step": 14272 }, { "epoch": 0.41671776006539957, "grad_norm": 0.7880137135262296, "learning_rate": 3.2405515004055155e-06, "loss": 0.7087, "step": 14273 }, { "epoch": 0.41674695629324693, "grad_norm": 0.6671997119432294, "learning_rate": 3.2403892944038935e-06, "loss": 0.5425, "step": 14274 }, { "epoch": 0.4167761525210943, "grad_norm": 0.7061320415086662, "learning_rate": 3.240227088402271e-06, "loss": 0.5761, "step": 14275 }, { "epoch": 0.41680534874894165, "grad_norm": 0.7244493266890445, "learning_rate": 3.240064882400649e-06, "loss": 0.6967, "step": 14276 }, { "epoch": 0.416834544976789, "grad_norm": 0.6989358330423622, "learning_rate": 3.239902676399027e-06, "loss": 0.6386, "step": 14277 }, { "epoch": 0.4168637412046364, "grad_norm": 0.6986480950908891, "learning_rate": 3.239740470397405e-06, "loss": 0.621, "step": 14278 }, { "epoch": 0.41689293743248373, "grad_norm": 0.8011453321453185, "learning_rate": 3.239578264395783e-06, "loss": 0.7048, "step": 14279 }, { "epoch": 0.4169221336603311, "grad_norm": 0.7422199883874608, "learning_rate": 3.2394160583941607e-06, "loss": 0.7009, "step": 14280 }, { "epoch": 0.41695132988817846, "grad_norm": 0.7534960015341522, "learning_rate": 3.2392538523925387e-06, "loss": 0.6852, "step": 14281 }, { "epoch": 0.4169805261160258, "grad_norm": 0.7185682423395678, "learning_rate": 3.2390916463909167e-06, "loss": 0.6069, "step": 14282 }, { "epoch": 0.4170097223438732, "grad_norm": 0.7739587900094625, "learning_rate": 3.2389294403892947e-06, "loss": 0.7046, "step": 14283 }, { "epoch": 0.41703891857172054, "grad_norm": 0.7087583961924613, "learning_rate": 3.2387672343876723e-06, "loss": 0.6044, "step": 14284 }, { "epoch": 0.4170681147995679, "grad_norm": 0.7666855562577846, "learning_rate": 3.2386050283860503e-06, "loss": 0.6888, "step": 14285 }, { "epoch": 0.41709731102741526, "grad_norm": 0.7218751330992674, "learning_rate": 3.2384428223844283e-06, "loss": 0.6597, "step": 14286 }, { "epoch": 0.4171265072552626, "grad_norm": 0.7264026962263511, "learning_rate": 3.2382806163828063e-06, "loss": 0.6109, "step": 14287 }, { "epoch": 0.41715570348311, "grad_norm": 0.6815590790379212, "learning_rate": 3.238118410381184e-06, "loss": 0.6284, "step": 14288 }, { "epoch": 0.41718489971095735, "grad_norm": 0.7898528144868217, "learning_rate": 3.237956204379562e-06, "loss": 0.6946, "step": 14289 }, { "epoch": 0.4172140959388047, "grad_norm": 0.7064807109123583, "learning_rate": 3.2377939983779404e-06, "loss": 0.6185, "step": 14290 }, { "epoch": 0.41724329216665207, "grad_norm": 0.739393657602578, "learning_rate": 3.2376317923763184e-06, "loss": 0.7166, "step": 14291 }, { "epoch": 0.41727248839449943, "grad_norm": 0.6890200079172466, "learning_rate": 3.2374695863746964e-06, "loss": 0.5942, "step": 14292 }, { "epoch": 0.4173016846223468, "grad_norm": 0.7373479498622965, "learning_rate": 3.2373073803730744e-06, "loss": 0.7143, "step": 14293 }, { "epoch": 0.41733088085019415, "grad_norm": 0.7678468855062895, "learning_rate": 3.237145174371452e-06, "loss": 0.7039, "step": 14294 }, { "epoch": 0.4173600770780415, "grad_norm": 0.7334314690500348, "learning_rate": 3.23698296836983e-06, "loss": 0.6691, "step": 14295 }, { "epoch": 0.4173892733058889, "grad_norm": 0.7683860689490954, "learning_rate": 3.236820762368208e-06, "loss": 0.7068, "step": 14296 }, { "epoch": 0.41741846953373624, "grad_norm": 0.7444906733646975, "learning_rate": 3.236658556366586e-06, "loss": 0.6281, "step": 14297 }, { "epoch": 0.4174476657615836, "grad_norm": 0.7876898319382767, "learning_rate": 3.236496350364964e-06, "loss": 0.7891, "step": 14298 }, { "epoch": 0.41747686198943096, "grad_norm": 0.6965206292875744, "learning_rate": 3.2363341443633416e-06, "loss": 0.6074, "step": 14299 }, { "epoch": 0.4175060582172783, "grad_norm": 0.7411458619783926, "learning_rate": 3.2361719383617196e-06, "loss": 0.6832, "step": 14300 }, { "epoch": 0.4175352544451257, "grad_norm": 0.7376375515113227, "learning_rate": 3.2360097323600976e-06, "loss": 0.6791, "step": 14301 }, { "epoch": 0.41756445067297304, "grad_norm": 0.7403690103402858, "learning_rate": 3.2358475263584756e-06, "loss": 0.6632, "step": 14302 }, { "epoch": 0.4175936469008204, "grad_norm": 0.7867869523795935, "learning_rate": 3.235685320356853e-06, "loss": 0.6808, "step": 14303 }, { "epoch": 0.41762284312866776, "grad_norm": 0.7874460813783012, "learning_rate": 3.235523114355231e-06, "loss": 0.7858, "step": 14304 }, { "epoch": 0.4176520393565151, "grad_norm": 0.7385981918171407, "learning_rate": 3.235360908353609e-06, "loss": 0.6172, "step": 14305 }, { "epoch": 0.4176812355843625, "grad_norm": 0.7221387254209467, "learning_rate": 3.235198702351987e-06, "loss": 0.6087, "step": 14306 }, { "epoch": 0.41771043181220985, "grad_norm": 0.8560435881603594, "learning_rate": 3.2350364963503648e-06, "loss": 0.7393, "step": 14307 }, { "epoch": 0.4177396280400572, "grad_norm": 0.690341680902707, "learning_rate": 3.234874290348743e-06, "loss": 0.5803, "step": 14308 }, { "epoch": 0.41776882426790457, "grad_norm": 0.6859273842588112, "learning_rate": 3.2347120843471212e-06, "loss": 0.5847, "step": 14309 }, { "epoch": 0.41779802049575193, "grad_norm": 0.7238792523040815, "learning_rate": 3.2345498783454992e-06, "loss": 0.6776, "step": 14310 }, { "epoch": 0.4178272167235993, "grad_norm": 0.9337038893046046, "learning_rate": 3.2343876723438772e-06, "loss": 0.8175, "step": 14311 }, { "epoch": 0.41785641295144665, "grad_norm": 0.7473495605396148, "learning_rate": 3.2342254663422552e-06, "loss": 0.5976, "step": 14312 }, { "epoch": 0.417885609179294, "grad_norm": 0.7202880593629378, "learning_rate": 3.234063260340633e-06, "loss": 0.6317, "step": 14313 }, { "epoch": 0.4179148054071414, "grad_norm": 0.7002186980936573, "learning_rate": 3.233901054339011e-06, "loss": 0.6111, "step": 14314 }, { "epoch": 0.41794400163498874, "grad_norm": 0.7051991070190262, "learning_rate": 3.233738848337389e-06, "loss": 0.63, "step": 14315 }, { "epoch": 0.4179731978628361, "grad_norm": 0.759848296627724, "learning_rate": 3.233576642335767e-06, "loss": 0.7363, "step": 14316 }, { "epoch": 0.41800239409068346, "grad_norm": 0.8140000300759798, "learning_rate": 3.233414436334145e-06, "loss": 0.7462, "step": 14317 }, { "epoch": 0.4180315903185308, "grad_norm": 0.6768078267652734, "learning_rate": 3.2332522303325224e-06, "loss": 0.5748, "step": 14318 }, { "epoch": 0.4180607865463782, "grad_norm": 0.7357592380249728, "learning_rate": 3.2330900243309004e-06, "loss": 0.6697, "step": 14319 }, { "epoch": 0.41808998277422554, "grad_norm": 0.7803734703812383, "learning_rate": 3.2329278183292785e-06, "loss": 0.7379, "step": 14320 }, { "epoch": 0.4181191790020729, "grad_norm": 0.761136868461959, "learning_rate": 3.2327656123276565e-06, "loss": 0.6731, "step": 14321 }, { "epoch": 0.4181483752299203, "grad_norm": 0.7252230929062434, "learning_rate": 3.232603406326034e-06, "loss": 0.6721, "step": 14322 }, { "epoch": 0.4181775714577677, "grad_norm": 0.6694821664069783, "learning_rate": 3.232441200324412e-06, "loss": 0.5409, "step": 14323 }, { "epoch": 0.41820676768561504, "grad_norm": 0.7553354519464386, "learning_rate": 3.23227899432279e-06, "loss": 0.6692, "step": 14324 }, { "epoch": 0.4182359639134624, "grad_norm": 0.7278935246847672, "learning_rate": 3.232116788321168e-06, "loss": 0.6853, "step": 14325 }, { "epoch": 0.41826516014130977, "grad_norm": 0.7645050286833267, "learning_rate": 3.2319545823195456e-06, "loss": 0.7215, "step": 14326 }, { "epoch": 0.4182943563691571, "grad_norm": 0.7195607638940347, "learning_rate": 3.2317923763179237e-06, "loss": 0.6465, "step": 14327 }, { "epoch": 0.4183235525970045, "grad_norm": 0.7248796509528942, "learning_rate": 3.231630170316302e-06, "loss": 0.6612, "step": 14328 }, { "epoch": 0.41835274882485185, "grad_norm": 0.7763194321030992, "learning_rate": 3.23146796431468e-06, "loss": 0.7409, "step": 14329 }, { "epoch": 0.4183819450526992, "grad_norm": 0.7516322206128254, "learning_rate": 3.231305758313058e-06, "loss": 0.6949, "step": 14330 }, { "epoch": 0.4184111412805466, "grad_norm": 0.7446455017495491, "learning_rate": 3.231143552311436e-06, "loss": 0.738, "step": 14331 }, { "epoch": 0.41844033750839393, "grad_norm": 0.7207857897846621, "learning_rate": 3.2309813463098137e-06, "loss": 0.6194, "step": 14332 }, { "epoch": 0.4184695337362413, "grad_norm": 0.6909463904619693, "learning_rate": 3.2308191403081917e-06, "loss": 0.6237, "step": 14333 }, { "epoch": 0.41849872996408866, "grad_norm": 0.7401942516133464, "learning_rate": 3.2306569343065697e-06, "loss": 0.6794, "step": 14334 }, { "epoch": 0.418527926191936, "grad_norm": 0.7719124451608661, "learning_rate": 3.2304947283049477e-06, "loss": 0.6082, "step": 14335 }, { "epoch": 0.4185571224197834, "grad_norm": 0.7226293233392176, "learning_rate": 3.2303325223033257e-06, "loss": 0.6962, "step": 14336 }, { "epoch": 0.41858631864763074, "grad_norm": 0.6689882150047851, "learning_rate": 3.2301703163017033e-06, "loss": 0.5597, "step": 14337 }, { "epoch": 0.4186155148754781, "grad_norm": 0.7478775769857523, "learning_rate": 3.2300081103000813e-06, "loss": 0.6837, "step": 14338 }, { "epoch": 0.41864471110332546, "grad_norm": 0.7335521007004963, "learning_rate": 3.2298459042984593e-06, "loss": 0.6629, "step": 14339 }, { "epoch": 0.4186739073311728, "grad_norm": 0.7273857438798473, "learning_rate": 3.2296836982968373e-06, "loss": 0.6772, "step": 14340 }, { "epoch": 0.4187031035590202, "grad_norm": 0.7372657238790659, "learning_rate": 3.229521492295215e-06, "loss": 0.7322, "step": 14341 }, { "epoch": 0.41873229978686755, "grad_norm": 0.8078792599582155, "learning_rate": 3.229359286293593e-06, "loss": 0.7596, "step": 14342 }, { "epoch": 0.4187614960147149, "grad_norm": 0.7315072809905483, "learning_rate": 3.229197080291971e-06, "loss": 0.5767, "step": 14343 }, { "epoch": 0.41879069224256227, "grad_norm": 0.8084938637719521, "learning_rate": 3.229034874290349e-06, "loss": 0.743, "step": 14344 }, { "epoch": 0.41881988847040963, "grad_norm": 0.7478354968383784, "learning_rate": 3.2288726682887265e-06, "loss": 0.6776, "step": 14345 }, { "epoch": 0.418849084698257, "grad_norm": 0.7449259155733705, "learning_rate": 3.2287104622871054e-06, "loss": 0.7067, "step": 14346 }, { "epoch": 0.41887828092610435, "grad_norm": 0.7040155251936598, "learning_rate": 3.228548256285483e-06, "loss": 0.6443, "step": 14347 }, { "epoch": 0.4189074771539517, "grad_norm": 0.7415639915202267, "learning_rate": 3.228386050283861e-06, "loss": 0.6831, "step": 14348 }, { "epoch": 0.4189366733817991, "grad_norm": 0.763195793583169, "learning_rate": 3.228223844282239e-06, "loss": 0.7252, "step": 14349 }, { "epoch": 0.41896586960964644, "grad_norm": 0.8855116644509653, "learning_rate": 3.228061638280617e-06, "loss": 0.6536, "step": 14350 }, { "epoch": 0.4189950658374938, "grad_norm": 0.7491490027497296, "learning_rate": 3.2278994322789945e-06, "loss": 0.6732, "step": 14351 }, { "epoch": 0.41902426206534116, "grad_norm": 0.7655086296127096, "learning_rate": 3.2277372262773726e-06, "loss": 0.6983, "step": 14352 }, { "epoch": 0.4190534582931885, "grad_norm": 0.7579590017911316, "learning_rate": 3.2275750202757506e-06, "loss": 0.7293, "step": 14353 }, { "epoch": 0.4190826545210359, "grad_norm": 0.846474760101425, "learning_rate": 3.2274128142741286e-06, "loss": 0.763, "step": 14354 }, { "epoch": 0.41911185074888324, "grad_norm": 0.7357624988718615, "learning_rate": 3.227250608272506e-06, "loss": 0.6544, "step": 14355 }, { "epoch": 0.4191410469767306, "grad_norm": 0.7728884137667428, "learning_rate": 3.227088402270884e-06, "loss": 0.728, "step": 14356 }, { "epoch": 0.41917024320457796, "grad_norm": 0.7495303723700314, "learning_rate": 3.226926196269262e-06, "loss": 0.6937, "step": 14357 }, { "epoch": 0.4191994394324253, "grad_norm": 0.7716480068044398, "learning_rate": 3.22676399026764e-06, "loss": 0.7181, "step": 14358 }, { "epoch": 0.4192286356602727, "grad_norm": 0.7278337519732068, "learning_rate": 3.226601784266018e-06, "loss": 0.6663, "step": 14359 }, { "epoch": 0.41925783188812005, "grad_norm": 0.7470932193987877, "learning_rate": 3.2264395782643958e-06, "loss": 0.6409, "step": 14360 }, { "epoch": 0.4192870281159674, "grad_norm": 0.7587861572203247, "learning_rate": 3.2262773722627738e-06, "loss": 0.6848, "step": 14361 }, { "epoch": 0.41931622434381477, "grad_norm": 0.7922738264916039, "learning_rate": 3.2261151662611518e-06, "loss": 0.648, "step": 14362 }, { "epoch": 0.41934542057166213, "grad_norm": 0.646358477493505, "learning_rate": 3.2259529602595298e-06, "loss": 0.5775, "step": 14363 }, { "epoch": 0.4193746167995095, "grad_norm": 0.7013934761300907, "learning_rate": 3.2257907542579074e-06, "loss": 0.6678, "step": 14364 }, { "epoch": 0.41940381302735685, "grad_norm": 0.8251026607917384, "learning_rate": 3.2256285482562862e-06, "loss": 0.6871, "step": 14365 }, { "epoch": 0.4194330092552042, "grad_norm": 0.7399110669452874, "learning_rate": 3.225466342254664e-06, "loss": 0.6555, "step": 14366 }, { "epoch": 0.4194622054830516, "grad_norm": 0.6954966885082291, "learning_rate": 3.225304136253042e-06, "loss": 0.6476, "step": 14367 }, { "epoch": 0.41949140171089894, "grad_norm": 0.7612646741249981, "learning_rate": 3.22514193025142e-06, "loss": 0.7623, "step": 14368 }, { "epoch": 0.4195205979387463, "grad_norm": 0.7099244468269151, "learning_rate": 3.224979724249798e-06, "loss": 0.6186, "step": 14369 }, { "epoch": 0.41954979416659366, "grad_norm": 0.6944264208596315, "learning_rate": 3.2248175182481754e-06, "loss": 0.6023, "step": 14370 }, { "epoch": 0.419578990394441, "grad_norm": 0.7370308030221469, "learning_rate": 3.2246553122465534e-06, "loss": 0.6343, "step": 14371 }, { "epoch": 0.4196081866222884, "grad_norm": 0.7561140573117053, "learning_rate": 3.2244931062449314e-06, "loss": 0.7208, "step": 14372 }, { "epoch": 0.41963738285013574, "grad_norm": 0.7254952390736727, "learning_rate": 3.2243309002433094e-06, "loss": 0.6307, "step": 14373 }, { "epoch": 0.4196665790779831, "grad_norm": 1.0222986362475215, "learning_rate": 3.224168694241687e-06, "loss": 0.6443, "step": 14374 }, { "epoch": 0.41969577530583047, "grad_norm": 0.8015058510957558, "learning_rate": 3.224006488240065e-06, "loss": 0.7579, "step": 14375 }, { "epoch": 0.4197249715336778, "grad_norm": 0.7693635739399658, "learning_rate": 3.223844282238443e-06, "loss": 0.7292, "step": 14376 }, { "epoch": 0.4197541677615252, "grad_norm": 0.6883917248530272, "learning_rate": 3.223682076236821e-06, "loss": 0.576, "step": 14377 }, { "epoch": 0.41978336398937255, "grad_norm": 0.7751082010902753, "learning_rate": 3.223519870235199e-06, "loss": 0.6985, "step": 14378 }, { "epoch": 0.4198125602172199, "grad_norm": 0.7029428318617119, "learning_rate": 3.2233576642335766e-06, "loss": 0.6641, "step": 14379 }, { "epoch": 0.41984175644506727, "grad_norm": 0.6533310995445929, "learning_rate": 3.2231954582319546e-06, "loss": 0.5442, "step": 14380 }, { "epoch": 0.41987095267291463, "grad_norm": 0.7258390789622058, "learning_rate": 3.2230332522303326e-06, "loss": 0.701, "step": 14381 }, { "epoch": 0.41990014890076205, "grad_norm": 0.7423744885350665, "learning_rate": 3.2228710462287106e-06, "loss": 0.695, "step": 14382 }, { "epoch": 0.4199293451286094, "grad_norm": 0.7713589501449228, "learning_rate": 3.2227088402270882e-06, "loss": 0.7203, "step": 14383 }, { "epoch": 0.41995854135645677, "grad_norm": 0.746703725196921, "learning_rate": 3.222546634225467e-06, "loss": 0.699, "step": 14384 }, { "epoch": 0.41998773758430413, "grad_norm": 0.7259597546426321, "learning_rate": 3.2223844282238447e-06, "loss": 0.6752, "step": 14385 }, { "epoch": 0.4200169338121515, "grad_norm": 0.7929220148728543, "learning_rate": 3.2222222222222227e-06, "loss": 0.7475, "step": 14386 }, { "epoch": 0.42004613003999886, "grad_norm": 0.6601791760737661, "learning_rate": 3.2220600162206007e-06, "loss": 0.5421, "step": 14387 }, { "epoch": 0.4200753262678462, "grad_norm": 0.695138083783874, "learning_rate": 3.2218978102189787e-06, "loss": 0.59, "step": 14388 }, { "epoch": 0.4201045224956936, "grad_norm": 0.755320208190298, "learning_rate": 3.2217356042173563e-06, "loss": 0.7183, "step": 14389 }, { "epoch": 0.42013371872354094, "grad_norm": 0.7491956006835103, "learning_rate": 3.2215733982157343e-06, "loss": 0.6076, "step": 14390 }, { "epoch": 0.4201629149513883, "grad_norm": 0.6703628560823711, "learning_rate": 3.2214111922141123e-06, "loss": 0.5636, "step": 14391 }, { "epoch": 0.42019211117923566, "grad_norm": 0.7225224123779787, "learning_rate": 3.2212489862124903e-06, "loss": 0.6379, "step": 14392 }, { "epoch": 0.420221307407083, "grad_norm": 0.7859451280592876, "learning_rate": 3.221086780210868e-06, "loss": 0.7227, "step": 14393 }, { "epoch": 0.4202505036349304, "grad_norm": 0.7067500318186473, "learning_rate": 3.220924574209246e-06, "loss": 0.6346, "step": 14394 }, { "epoch": 0.42027969986277774, "grad_norm": 0.6729163406921632, "learning_rate": 3.220762368207624e-06, "loss": 0.5435, "step": 14395 }, { "epoch": 0.4203088960906251, "grad_norm": 0.8186618388919319, "learning_rate": 3.220600162206002e-06, "loss": 0.6846, "step": 14396 }, { "epoch": 0.42033809231847247, "grad_norm": 0.7143865722206464, "learning_rate": 3.22043795620438e-06, "loss": 0.6502, "step": 14397 }, { "epoch": 0.42036728854631983, "grad_norm": 0.7758236169519513, "learning_rate": 3.2202757502027575e-06, "loss": 0.6963, "step": 14398 }, { "epoch": 0.4203964847741672, "grad_norm": 0.7052326422815537, "learning_rate": 3.2201135442011355e-06, "loss": 0.6255, "step": 14399 }, { "epoch": 0.42042568100201455, "grad_norm": 0.7029998033148047, "learning_rate": 3.2199513381995135e-06, "loss": 0.6235, "step": 14400 }, { "epoch": 0.4204548772298619, "grad_norm": 0.7111276170535904, "learning_rate": 3.2197891321978915e-06, "loss": 0.5569, "step": 14401 }, { "epoch": 0.4204840734577093, "grad_norm": 0.6837853895798766, "learning_rate": 3.219626926196269e-06, "loss": 0.571, "step": 14402 }, { "epoch": 0.42051326968555663, "grad_norm": 0.7506165901143127, "learning_rate": 3.219464720194648e-06, "loss": 0.684, "step": 14403 }, { "epoch": 0.420542465913404, "grad_norm": 0.7318521625428558, "learning_rate": 3.2193025141930255e-06, "loss": 0.6371, "step": 14404 }, { "epoch": 0.42057166214125136, "grad_norm": 0.6904468821688092, "learning_rate": 3.2191403081914035e-06, "loss": 0.6302, "step": 14405 }, { "epoch": 0.4206008583690987, "grad_norm": 0.6639125391516606, "learning_rate": 3.2189781021897815e-06, "loss": 0.5431, "step": 14406 }, { "epoch": 0.4206300545969461, "grad_norm": 0.7130264935035491, "learning_rate": 3.2188158961881595e-06, "loss": 0.6582, "step": 14407 }, { "epoch": 0.42065925082479344, "grad_norm": 0.7298617974359738, "learning_rate": 3.218653690186537e-06, "loss": 0.6442, "step": 14408 }, { "epoch": 0.4206884470526408, "grad_norm": 0.7759879211436919, "learning_rate": 3.218491484184915e-06, "loss": 0.6519, "step": 14409 }, { "epoch": 0.42071764328048816, "grad_norm": 0.7275738275313716, "learning_rate": 3.218329278183293e-06, "loss": 0.6484, "step": 14410 }, { "epoch": 0.4207468395083355, "grad_norm": 0.7204019859606127, "learning_rate": 3.218167072181671e-06, "loss": 0.6821, "step": 14411 }, { "epoch": 0.4207760357361829, "grad_norm": 0.7096777450668831, "learning_rate": 3.2180048661800487e-06, "loss": 0.6284, "step": 14412 }, { "epoch": 0.42080523196403025, "grad_norm": 0.7067884954924548, "learning_rate": 3.2178426601784267e-06, "loss": 0.6136, "step": 14413 }, { "epoch": 0.4208344281918776, "grad_norm": 0.7736585131091925, "learning_rate": 3.2176804541768047e-06, "loss": 0.6917, "step": 14414 }, { "epoch": 0.42086362441972497, "grad_norm": 0.7110284375243473, "learning_rate": 3.2175182481751827e-06, "loss": 0.6347, "step": 14415 }, { "epoch": 0.42089282064757233, "grad_norm": 0.7161217795202752, "learning_rate": 3.2173560421735608e-06, "loss": 0.6449, "step": 14416 }, { "epoch": 0.4209220168754197, "grad_norm": 0.7379230057706224, "learning_rate": 3.2171938361719383e-06, "loss": 0.646, "step": 14417 }, { "epoch": 0.42095121310326705, "grad_norm": 0.7210579520686491, "learning_rate": 3.2170316301703163e-06, "loss": 0.6403, "step": 14418 }, { "epoch": 0.4209804093311144, "grad_norm": 0.7435159170420991, "learning_rate": 3.2168694241686943e-06, "loss": 0.6845, "step": 14419 }, { "epoch": 0.4210096055589618, "grad_norm": 0.7199307444641287, "learning_rate": 3.2167072181670724e-06, "loss": 0.655, "step": 14420 }, { "epoch": 0.42103880178680914, "grad_norm": 0.747645546897713, "learning_rate": 3.21654501216545e-06, "loss": 0.6367, "step": 14421 }, { "epoch": 0.4210679980146565, "grad_norm": 0.741213543360716, "learning_rate": 3.216382806163829e-06, "loss": 0.6389, "step": 14422 }, { "epoch": 0.42109719424250386, "grad_norm": 0.7075034480332907, "learning_rate": 3.2162206001622064e-06, "loss": 0.6102, "step": 14423 }, { "epoch": 0.4211263904703512, "grad_norm": 0.7009292764921212, "learning_rate": 3.2160583941605844e-06, "loss": 0.6207, "step": 14424 }, { "epoch": 0.4211555866981986, "grad_norm": 0.7754952974116549, "learning_rate": 3.2158961881589624e-06, "loss": 0.5862, "step": 14425 }, { "epoch": 0.42118478292604594, "grad_norm": 0.7865128566186066, "learning_rate": 3.2157339821573404e-06, "loss": 0.7118, "step": 14426 }, { "epoch": 0.4212139791538933, "grad_norm": 0.7150453756621481, "learning_rate": 3.215571776155718e-06, "loss": 0.6152, "step": 14427 }, { "epoch": 0.42124317538174066, "grad_norm": 0.7604905773891245, "learning_rate": 3.215409570154096e-06, "loss": 0.629, "step": 14428 }, { "epoch": 0.421272371609588, "grad_norm": 0.7263548109620457, "learning_rate": 3.215247364152474e-06, "loss": 0.6786, "step": 14429 }, { "epoch": 0.4213015678374354, "grad_norm": 0.7003731319996132, "learning_rate": 3.215085158150852e-06, "loss": 0.5777, "step": 14430 }, { "epoch": 0.42133076406528275, "grad_norm": 0.7486189654593045, "learning_rate": 3.2149229521492296e-06, "loss": 0.6628, "step": 14431 }, { "epoch": 0.4213599602931301, "grad_norm": 0.758413116578401, "learning_rate": 3.2147607461476076e-06, "loss": 0.701, "step": 14432 }, { "epoch": 0.42138915652097747, "grad_norm": 0.7240163390628126, "learning_rate": 3.2145985401459856e-06, "loss": 0.7212, "step": 14433 }, { "epoch": 0.42141835274882483, "grad_norm": 0.7279057710375986, "learning_rate": 3.2144363341443636e-06, "loss": 0.6766, "step": 14434 }, { "epoch": 0.4214475489766722, "grad_norm": 0.7394998766512358, "learning_rate": 3.2142741281427416e-06, "loss": 0.6849, "step": 14435 }, { "epoch": 0.42147674520451955, "grad_norm": 0.7295999863239282, "learning_rate": 3.214111922141119e-06, "loss": 0.6792, "step": 14436 }, { "epoch": 0.4215059414323669, "grad_norm": 0.8491185792164253, "learning_rate": 3.213949716139497e-06, "loss": 0.5942, "step": 14437 }, { "epoch": 0.4215351376602143, "grad_norm": 0.7494219132184062, "learning_rate": 3.213787510137875e-06, "loss": 0.7402, "step": 14438 }, { "epoch": 0.42156433388806164, "grad_norm": 0.7157875630562193, "learning_rate": 3.2136253041362532e-06, "loss": 0.6139, "step": 14439 }, { "epoch": 0.421593530115909, "grad_norm": 0.7139521191491359, "learning_rate": 3.213463098134631e-06, "loss": 0.6836, "step": 14440 }, { "epoch": 0.42162272634375636, "grad_norm": 0.692590462019896, "learning_rate": 3.2133008921330097e-06, "loss": 0.6096, "step": 14441 }, { "epoch": 0.4216519225716037, "grad_norm": 0.8511455649601276, "learning_rate": 3.2131386861313872e-06, "loss": 0.5891, "step": 14442 }, { "epoch": 0.42168111879945114, "grad_norm": 0.694015989105088, "learning_rate": 3.2129764801297652e-06, "loss": 0.6079, "step": 14443 }, { "epoch": 0.4217103150272985, "grad_norm": 0.7257852007630643, "learning_rate": 3.2128142741281432e-06, "loss": 0.6596, "step": 14444 }, { "epoch": 0.42173951125514586, "grad_norm": 0.7280303734553087, "learning_rate": 3.2126520681265213e-06, "loss": 0.6926, "step": 14445 }, { "epoch": 0.4217687074829932, "grad_norm": 0.7777904715492675, "learning_rate": 3.212489862124899e-06, "loss": 0.718, "step": 14446 }, { "epoch": 0.4217979037108406, "grad_norm": 0.7008448059537751, "learning_rate": 3.212327656123277e-06, "loss": 0.6283, "step": 14447 }, { "epoch": 0.42182709993868794, "grad_norm": 0.6585589578388491, "learning_rate": 3.212165450121655e-06, "loss": 0.5512, "step": 14448 }, { "epoch": 0.4218562961665353, "grad_norm": 0.7172923362308694, "learning_rate": 3.212003244120033e-06, "loss": 0.631, "step": 14449 }, { "epoch": 0.42188549239438267, "grad_norm": 0.7477972116885551, "learning_rate": 3.2118410381184104e-06, "loss": 0.6748, "step": 14450 }, { "epoch": 0.42191468862223, "grad_norm": 0.7837684832265865, "learning_rate": 3.2116788321167884e-06, "loss": 0.7614, "step": 14451 }, { "epoch": 0.4219438848500774, "grad_norm": 0.7597704474167163, "learning_rate": 3.2115166261151665e-06, "loss": 0.6509, "step": 14452 }, { "epoch": 0.42197308107792475, "grad_norm": 0.8352210451120348, "learning_rate": 3.2113544201135445e-06, "loss": 0.7583, "step": 14453 }, { "epoch": 0.4220022773057721, "grad_norm": 0.7126809995482728, "learning_rate": 3.2111922141119225e-06, "loss": 0.6575, "step": 14454 }, { "epoch": 0.4220314735336195, "grad_norm": 0.8408835413561244, "learning_rate": 3.2110300081103e-06, "loss": 0.6891, "step": 14455 }, { "epoch": 0.42206066976146683, "grad_norm": 0.7292003077504594, "learning_rate": 3.210867802108678e-06, "loss": 0.6497, "step": 14456 }, { "epoch": 0.4220898659893142, "grad_norm": 0.7384434500120393, "learning_rate": 3.210705596107056e-06, "loss": 0.6463, "step": 14457 }, { "epoch": 0.42211906221716156, "grad_norm": 0.7229957655838314, "learning_rate": 3.210543390105434e-06, "loss": 0.6458, "step": 14458 }, { "epoch": 0.4221482584450089, "grad_norm": 0.7428924022361033, "learning_rate": 3.2103811841038117e-06, "loss": 0.602, "step": 14459 }, { "epoch": 0.4221774546728563, "grad_norm": 0.8636302409927703, "learning_rate": 3.2102189781021905e-06, "loss": 0.7101, "step": 14460 }, { "epoch": 0.42220665090070364, "grad_norm": 0.697731009491589, "learning_rate": 3.210056772100568e-06, "loss": 0.6158, "step": 14461 }, { "epoch": 0.422235847128551, "grad_norm": 0.7777185639426951, "learning_rate": 3.209894566098946e-06, "loss": 0.6838, "step": 14462 }, { "epoch": 0.42226504335639836, "grad_norm": 1.1032872531785625, "learning_rate": 3.209732360097324e-06, "loss": 0.6469, "step": 14463 }, { "epoch": 0.4222942395842457, "grad_norm": 0.7621659214930067, "learning_rate": 3.209570154095702e-06, "loss": 0.6606, "step": 14464 }, { "epoch": 0.4223234358120931, "grad_norm": 0.7592171485777052, "learning_rate": 3.2094079480940797e-06, "loss": 0.7081, "step": 14465 }, { "epoch": 0.42235263203994045, "grad_norm": 0.7726259593053545, "learning_rate": 3.2092457420924577e-06, "loss": 0.716, "step": 14466 }, { "epoch": 0.4223818282677878, "grad_norm": 0.7447925647713871, "learning_rate": 3.2090835360908357e-06, "loss": 0.6594, "step": 14467 }, { "epoch": 0.42241102449563517, "grad_norm": 0.7711988617883584, "learning_rate": 3.2089213300892137e-06, "loss": 0.6748, "step": 14468 }, { "epoch": 0.42244022072348253, "grad_norm": 0.827034808012254, "learning_rate": 3.2087591240875913e-06, "loss": 0.7275, "step": 14469 }, { "epoch": 0.4224694169513299, "grad_norm": 0.7513693693049741, "learning_rate": 3.2085969180859693e-06, "loss": 0.7029, "step": 14470 }, { "epoch": 0.42249861317917725, "grad_norm": 0.7476503154674412, "learning_rate": 3.2084347120843473e-06, "loss": 0.6422, "step": 14471 }, { "epoch": 0.4225278094070246, "grad_norm": 0.7193478922696013, "learning_rate": 3.2082725060827253e-06, "loss": 0.57, "step": 14472 }, { "epoch": 0.422557005634872, "grad_norm": 0.750959355368673, "learning_rate": 3.2081103000811033e-06, "loss": 0.7327, "step": 14473 }, { "epoch": 0.42258620186271934, "grad_norm": 0.7467223966222093, "learning_rate": 3.207948094079481e-06, "loss": 0.6018, "step": 14474 }, { "epoch": 0.4226153980905667, "grad_norm": 0.787025458660387, "learning_rate": 3.207785888077859e-06, "loss": 0.6592, "step": 14475 }, { "epoch": 0.42264459431841406, "grad_norm": 0.7639520937357264, "learning_rate": 3.207623682076237e-06, "loss": 0.6486, "step": 14476 }, { "epoch": 0.4226737905462614, "grad_norm": 0.7199789514050534, "learning_rate": 3.207461476074615e-06, "loss": 0.6344, "step": 14477 }, { "epoch": 0.4227029867741088, "grad_norm": 0.7842766315206497, "learning_rate": 3.2072992700729925e-06, "loss": 0.7017, "step": 14478 }, { "epoch": 0.42273218300195614, "grad_norm": 0.7139548931929086, "learning_rate": 3.207137064071371e-06, "loss": 0.6977, "step": 14479 }, { "epoch": 0.4227613792298035, "grad_norm": 0.6833664622866916, "learning_rate": 3.206974858069749e-06, "loss": 0.5691, "step": 14480 }, { "epoch": 0.42279057545765086, "grad_norm": 0.7682051134107541, "learning_rate": 3.206812652068127e-06, "loss": 0.691, "step": 14481 }, { "epoch": 0.4228197716854982, "grad_norm": 0.7225862287915359, "learning_rate": 3.206650446066505e-06, "loss": 0.6196, "step": 14482 }, { "epoch": 0.4228489679133456, "grad_norm": 0.7519745866942089, "learning_rate": 3.206488240064883e-06, "loss": 0.736, "step": 14483 }, { "epoch": 0.42287816414119295, "grad_norm": 0.6951675350161547, "learning_rate": 3.2063260340632606e-06, "loss": 0.6374, "step": 14484 }, { "epoch": 0.4229073603690403, "grad_norm": 0.7597670333461719, "learning_rate": 3.2061638280616386e-06, "loss": 0.7415, "step": 14485 }, { "epoch": 0.42293655659688767, "grad_norm": 0.7497685677487974, "learning_rate": 3.2060016220600166e-06, "loss": 0.6944, "step": 14486 }, { "epoch": 0.42296575282473503, "grad_norm": 0.7257808918231614, "learning_rate": 3.2058394160583946e-06, "loss": 0.6457, "step": 14487 }, { "epoch": 0.4229949490525824, "grad_norm": 0.7905334296001018, "learning_rate": 3.205677210056772e-06, "loss": 0.6433, "step": 14488 }, { "epoch": 0.42302414528042975, "grad_norm": 0.7363701319041427, "learning_rate": 3.20551500405515e-06, "loss": 0.6662, "step": 14489 }, { "epoch": 0.4230533415082771, "grad_norm": 0.7074592537467312, "learning_rate": 3.205352798053528e-06, "loss": 0.6445, "step": 14490 }, { "epoch": 0.4230825377361245, "grad_norm": 0.8044603611616055, "learning_rate": 3.205190592051906e-06, "loss": 0.6759, "step": 14491 }, { "epoch": 0.42311173396397184, "grad_norm": 0.6780784418397482, "learning_rate": 3.205028386050284e-06, "loss": 0.5909, "step": 14492 }, { "epoch": 0.4231409301918192, "grad_norm": 0.749884672915658, "learning_rate": 3.2048661800486618e-06, "loss": 0.697, "step": 14493 }, { "epoch": 0.42317012641966656, "grad_norm": 0.7099167176031155, "learning_rate": 3.2047039740470398e-06, "loss": 0.6431, "step": 14494 }, { "epoch": 0.4231993226475139, "grad_norm": 0.66401054352494, "learning_rate": 3.2045417680454178e-06, "loss": 0.5608, "step": 14495 }, { "epoch": 0.4232285188753613, "grad_norm": 0.7630504409171815, "learning_rate": 3.204379562043796e-06, "loss": 0.7105, "step": 14496 }, { "epoch": 0.42325771510320864, "grad_norm": 0.7361797303075894, "learning_rate": 3.2042173560421742e-06, "loss": 0.6649, "step": 14497 }, { "epoch": 0.423286911331056, "grad_norm": 0.7286966181210025, "learning_rate": 3.204055150040552e-06, "loss": 0.673, "step": 14498 }, { "epoch": 0.42331610755890337, "grad_norm": 0.7549935501242587, "learning_rate": 3.20389294403893e-06, "loss": 0.7033, "step": 14499 }, { "epoch": 0.4233453037867507, "grad_norm": 0.731044937697623, "learning_rate": 3.203730738037308e-06, "loss": 0.6945, "step": 14500 }, { "epoch": 0.4233745000145981, "grad_norm": 0.7048239334049086, "learning_rate": 3.203568532035686e-06, "loss": 0.5635, "step": 14501 }, { "epoch": 0.42340369624244545, "grad_norm": 0.7400290673573087, "learning_rate": 3.203406326034064e-06, "loss": 0.6887, "step": 14502 }, { "epoch": 0.42343289247029287, "grad_norm": 0.6915333980868648, "learning_rate": 3.2032441200324414e-06, "loss": 0.5982, "step": 14503 }, { "epoch": 0.4234620886981402, "grad_norm": 0.7367117991829615, "learning_rate": 3.2030819140308194e-06, "loss": 0.667, "step": 14504 }, { "epoch": 0.4234912849259876, "grad_norm": 0.7400811158312239, "learning_rate": 3.2029197080291974e-06, "loss": 0.6896, "step": 14505 }, { "epoch": 0.42352048115383495, "grad_norm": 0.7028140299987203, "learning_rate": 3.2027575020275754e-06, "loss": 0.6325, "step": 14506 }, { "epoch": 0.4235496773816823, "grad_norm": 0.6969750143389313, "learning_rate": 3.202595296025953e-06, "loss": 0.5715, "step": 14507 }, { "epoch": 0.42357887360952967, "grad_norm": 0.7987077333571287, "learning_rate": 3.202433090024331e-06, "loss": 0.7544, "step": 14508 }, { "epoch": 0.42360806983737703, "grad_norm": 0.75155814588533, "learning_rate": 3.202270884022709e-06, "loss": 0.7099, "step": 14509 }, { "epoch": 0.4236372660652244, "grad_norm": 0.7382587648996937, "learning_rate": 3.202108678021087e-06, "loss": 0.5833, "step": 14510 }, { "epoch": 0.42366646229307175, "grad_norm": 0.7477232078490295, "learning_rate": 3.201946472019465e-06, "loss": 0.7114, "step": 14511 }, { "epoch": 0.4236956585209191, "grad_norm": 0.6973131560686127, "learning_rate": 3.2017842660178426e-06, "loss": 0.655, "step": 14512 }, { "epoch": 0.4237248547487665, "grad_norm": 0.6975021545791408, "learning_rate": 3.2016220600162206e-06, "loss": 0.6402, "step": 14513 }, { "epoch": 0.42375405097661384, "grad_norm": 0.7512371268617205, "learning_rate": 3.2014598540145986e-06, "loss": 0.6392, "step": 14514 }, { "epoch": 0.4237832472044612, "grad_norm": 0.776432023282323, "learning_rate": 3.2012976480129766e-06, "loss": 0.7218, "step": 14515 }, { "epoch": 0.42381244343230856, "grad_norm": 0.7014258249827315, "learning_rate": 3.201135442011355e-06, "loss": 0.5992, "step": 14516 }, { "epoch": 0.4238416396601559, "grad_norm": 0.6841133313880685, "learning_rate": 3.2009732360097327e-06, "loss": 0.6201, "step": 14517 }, { "epoch": 0.4238708358880033, "grad_norm": 0.7644418490106599, "learning_rate": 3.2008110300081107e-06, "loss": 0.745, "step": 14518 }, { "epoch": 0.42390003211585064, "grad_norm": 0.7525838585817507, "learning_rate": 3.2006488240064887e-06, "loss": 0.6637, "step": 14519 }, { "epoch": 0.423929228343698, "grad_norm": 0.7728217308473129, "learning_rate": 3.2004866180048667e-06, "loss": 0.6741, "step": 14520 }, { "epoch": 0.42395842457154537, "grad_norm": 0.7413804101337442, "learning_rate": 3.2003244120032447e-06, "loss": 0.6841, "step": 14521 }, { "epoch": 0.42398762079939273, "grad_norm": 0.7555736103479869, "learning_rate": 3.2001622060016223e-06, "loss": 0.6359, "step": 14522 }, { "epoch": 0.4240168170272401, "grad_norm": 0.7183147315326678, "learning_rate": 3.2000000000000003e-06, "loss": 0.6723, "step": 14523 }, { "epoch": 0.42404601325508745, "grad_norm": 0.7178014731880982, "learning_rate": 3.1998377939983783e-06, "loss": 0.6464, "step": 14524 }, { "epoch": 0.4240752094829348, "grad_norm": 0.785329958893381, "learning_rate": 3.1996755879967563e-06, "loss": 0.6772, "step": 14525 }, { "epoch": 0.4241044057107822, "grad_norm": 0.722093932598228, "learning_rate": 3.199513381995134e-06, "loss": 0.6331, "step": 14526 }, { "epoch": 0.42413360193862953, "grad_norm": 0.7961678685571619, "learning_rate": 3.199351175993512e-06, "loss": 0.733, "step": 14527 }, { "epoch": 0.4241627981664769, "grad_norm": 0.7535900068515018, "learning_rate": 3.19918896999189e-06, "loss": 0.6691, "step": 14528 }, { "epoch": 0.42419199439432426, "grad_norm": 0.6720421733025095, "learning_rate": 3.199026763990268e-06, "loss": 0.5726, "step": 14529 }, { "epoch": 0.4242211906221716, "grad_norm": 0.6933160951658894, "learning_rate": 3.198864557988646e-06, "loss": 0.5709, "step": 14530 }, { "epoch": 0.424250386850019, "grad_norm": 0.792299046343935, "learning_rate": 3.1987023519870235e-06, "loss": 0.7109, "step": 14531 }, { "epoch": 0.42427958307786634, "grad_norm": 0.7783002985244677, "learning_rate": 3.1985401459854015e-06, "loss": 0.692, "step": 14532 }, { "epoch": 0.4243087793057137, "grad_norm": 0.714010323698986, "learning_rate": 3.1983779399837795e-06, "loss": 0.6194, "step": 14533 }, { "epoch": 0.42433797553356106, "grad_norm": 0.6609042832449463, "learning_rate": 3.1982157339821575e-06, "loss": 0.5743, "step": 14534 }, { "epoch": 0.4243671717614084, "grad_norm": 0.7167212164245288, "learning_rate": 3.198053527980536e-06, "loss": 0.6707, "step": 14535 }, { "epoch": 0.4243963679892558, "grad_norm": 0.7495343144687682, "learning_rate": 3.1978913219789135e-06, "loss": 0.6839, "step": 14536 }, { "epoch": 0.42442556421710315, "grad_norm": 0.7331677476184417, "learning_rate": 3.1977291159772915e-06, "loss": 0.6646, "step": 14537 }, { "epoch": 0.4244547604449505, "grad_norm": 0.6982691824872218, "learning_rate": 3.1975669099756695e-06, "loss": 0.5964, "step": 14538 }, { "epoch": 0.42448395667279787, "grad_norm": 0.7689657603283963, "learning_rate": 3.1974047039740475e-06, "loss": 0.6685, "step": 14539 }, { "epoch": 0.42451315290064523, "grad_norm": 0.8051816756084567, "learning_rate": 3.1972424979724255e-06, "loss": 0.7188, "step": 14540 }, { "epoch": 0.4245423491284926, "grad_norm": 0.7336979670408423, "learning_rate": 3.197080291970803e-06, "loss": 0.6113, "step": 14541 }, { "epoch": 0.42457154535633995, "grad_norm": 0.7150998577360161, "learning_rate": 3.196918085969181e-06, "loss": 0.6969, "step": 14542 }, { "epoch": 0.4246007415841873, "grad_norm": 0.7383171205910979, "learning_rate": 3.196755879967559e-06, "loss": 0.6501, "step": 14543 }, { "epoch": 0.4246299378120347, "grad_norm": 0.7423404977584441, "learning_rate": 3.196593673965937e-06, "loss": 0.607, "step": 14544 }, { "epoch": 0.42465913403988204, "grad_norm": 0.7871841346153202, "learning_rate": 3.1964314679643147e-06, "loss": 0.7308, "step": 14545 }, { "epoch": 0.4246883302677294, "grad_norm": 0.7756994182212762, "learning_rate": 3.1962692619626927e-06, "loss": 0.7235, "step": 14546 }, { "epoch": 0.42471752649557676, "grad_norm": 0.7183060567944684, "learning_rate": 3.1961070559610707e-06, "loss": 0.6317, "step": 14547 }, { "epoch": 0.4247467227234241, "grad_norm": 0.7335218928079912, "learning_rate": 3.1959448499594488e-06, "loss": 0.6812, "step": 14548 }, { "epoch": 0.4247759189512715, "grad_norm": 0.7087944160851886, "learning_rate": 3.1957826439578268e-06, "loss": 0.6076, "step": 14549 }, { "epoch": 0.42480511517911884, "grad_norm": 0.73752857117461, "learning_rate": 3.1956204379562043e-06, "loss": 0.6456, "step": 14550 }, { "epoch": 0.4248343114069662, "grad_norm": 0.7020443062329643, "learning_rate": 3.1954582319545824e-06, "loss": 0.6439, "step": 14551 }, { "epoch": 0.42486350763481356, "grad_norm": 0.7303849060414812, "learning_rate": 3.1952960259529604e-06, "loss": 0.7006, "step": 14552 }, { "epoch": 0.4248927038626609, "grad_norm": 0.7757591944543948, "learning_rate": 3.1951338199513384e-06, "loss": 0.7277, "step": 14553 }, { "epoch": 0.4249219000905083, "grad_norm": 0.7646596496272509, "learning_rate": 3.194971613949717e-06, "loss": 0.73, "step": 14554 }, { "epoch": 0.42495109631835565, "grad_norm": 0.7493747731759812, "learning_rate": 3.1948094079480944e-06, "loss": 0.5887, "step": 14555 }, { "epoch": 0.424980292546203, "grad_norm": 0.7528649736158208, "learning_rate": 3.1946472019464724e-06, "loss": 0.628, "step": 14556 }, { "epoch": 0.42500948877405037, "grad_norm": 1.032477131559399, "learning_rate": 3.1944849959448504e-06, "loss": 0.7411, "step": 14557 }, { "epoch": 0.42503868500189773, "grad_norm": 0.7638173458016319, "learning_rate": 3.1943227899432284e-06, "loss": 0.6604, "step": 14558 }, { "epoch": 0.4250678812297451, "grad_norm": 0.7087426285331717, "learning_rate": 3.1941605839416064e-06, "loss": 0.6219, "step": 14559 }, { "epoch": 0.42509707745759245, "grad_norm": 0.6747379697287995, "learning_rate": 3.193998377939984e-06, "loss": 0.5619, "step": 14560 }, { "epoch": 0.4251262736854398, "grad_norm": 0.6669335128226234, "learning_rate": 3.193836171938362e-06, "loss": 0.5903, "step": 14561 }, { "epoch": 0.4251554699132872, "grad_norm": 0.8251829237489174, "learning_rate": 3.19367396593674e-06, "loss": 0.6938, "step": 14562 }, { "epoch": 0.4251846661411346, "grad_norm": 0.7310489932209985, "learning_rate": 3.193511759935118e-06, "loss": 0.6021, "step": 14563 }, { "epoch": 0.42521386236898195, "grad_norm": 0.6892605249559826, "learning_rate": 3.1933495539334956e-06, "loss": 0.6175, "step": 14564 }, { "epoch": 0.4252430585968293, "grad_norm": 0.761200563917063, "learning_rate": 3.1931873479318736e-06, "loss": 0.692, "step": 14565 }, { "epoch": 0.4252722548246767, "grad_norm": 0.7646664436310988, "learning_rate": 3.1930251419302516e-06, "loss": 0.6666, "step": 14566 }, { "epoch": 0.42530145105252404, "grad_norm": 0.74463441940858, "learning_rate": 3.1928629359286296e-06, "loss": 0.681, "step": 14567 }, { "epoch": 0.4253306472803714, "grad_norm": 0.7552243855421756, "learning_rate": 3.1927007299270076e-06, "loss": 0.7369, "step": 14568 }, { "epoch": 0.42535984350821876, "grad_norm": 0.7833040700676782, "learning_rate": 3.192538523925385e-06, "loss": 0.6569, "step": 14569 }, { "epoch": 0.4253890397360661, "grad_norm": 0.7510801364335078, "learning_rate": 3.192376317923763e-06, "loss": 0.615, "step": 14570 }, { "epoch": 0.4254182359639135, "grad_norm": 0.7557870088924719, "learning_rate": 3.1922141119221412e-06, "loss": 0.7021, "step": 14571 }, { "epoch": 0.42544743219176084, "grad_norm": 0.658422646213745, "learning_rate": 3.1920519059205192e-06, "loss": 0.5876, "step": 14572 }, { "epoch": 0.4254766284196082, "grad_norm": 0.7440575932111553, "learning_rate": 3.1918896999188977e-06, "loss": 0.6502, "step": 14573 }, { "epoch": 0.42550582464745557, "grad_norm": 0.7024566419557334, "learning_rate": 3.1917274939172752e-06, "loss": 0.6322, "step": 14574 }, { "epoch": 0.4255350208753029, "grad_norm": 0.7096338539883357, "learning_rate": 3.1915652879156532e-06, "loss": 0.6351, "step": 14575 }, { "epoch": 0.4255642171031503, "grad_norm": 0.7424972364250906, "learning_rate": 3.1914030819140313e-06, "loss": 0.6446, "step": 14576 }, { "epoch": 0.42559341333099765, "grad_norm": 0.7668697560898089, "learning_rate": 3.1912408759124093e-06, "loss": 0.746, "step": 14577 }, { "epoch": 0.425622609558845, "grad_norm": 0.7927813365773536, "learning_rate": 3.1910786699107873e-06, "loss": 0.7096, "step": 14578 }, { "epoch": 0.42565180578669237, "grad_norm": 0.7867700015815603, "learning_rate": 3.190916463909165e-06, "loss": 0.6997, "step": 14579 }, { "epoch": 0.42568100201453973, "grad_norm": 0.7020671588107766, "learning_rate": 3.190754257907543e-06, "loss": 0.6209, "step": 14580 }, { "epoch": 0.4257101982423871, "grad_norm": 0.6887934459971258, "learning_rate": 3.190592051905921e-06, "loss": 0.5823, "step": 14581 }, { "epoch": 0.42573939447023446, "grad_norm": 0.7476490786182698, "learning_rate": 3.190429845904299e-06, "loss": 0.574, "step": 14582 }, { "epoch": 0.4257685906980818, "grad_norm": 0.7312792305062775, "learning_rate": 3.1902676399026765e-06, "loss": 0.6974, "step": 14583 }, { "epoch": 0.4257977869259292, "grad_norm": 0.7331625636210615, "learning_rate": 3.1901054339010545e-06, "loss": 0.6149, "step": 14584 }, { "epoch": 0.42582698315377654, "grad_norm": 0.7477507690171868, "learning_rate": 3.1899432278994325e-06, "loss": 0.7006, "step": 14585 }, { "epoch": 0.4258561793816239, "grad_norm": 0.8273137411765088, "learning_rate": 3.1897810218978105e-06, "loss": 0.7761, "step": 14586 }, { "epoch": 0.42588537560947126, "grad_norm": 0.7266964150708733, "learning_rate": 3.1896188158961885e-06, "loss": 0.6433, "step": 14587 }, { "epoch": 0.4259145718373186, "grad_norm": 0.7098019364967909, "learning_rate": 3.189456609894566e-06, "loss": 0.6509, "step": 14588 }, { "epoch": 0.425943768065166, "grad_norm": 0.728652923451352, "learning_rate": 3.189294403892944e-06, "loss": 0.6448, "step": 14589 }, { "epoch": 0.42597296429301335, "grad_norm": 0.7294873484412376, "learning_rate": 3.189132197891322e-06, "loss": 0.6253, "step": 14590 }, { "epoch": 0.4260021605208607, "grad_norm": 0.765110937827032, "learning_rate": 3.1889699918897e-06, "loss": 0.7128, "step": 14591 }, { "epoch": 0.42603135674870807, "grad_norm": 0.7673587403114197, "learning_rate": 3.1888077858880785e-06, "loss": 0.7108, "step": 14592 }, { "epoch": 0.42606055297655543, "grad_norm": 0.7127448166317407, "learning_rate": 3.188645579886456e-06, "loss": 0.6475, "step": 14593 }, { "epoch": 0.4260897492044028, "grad_norm": 0.8029222772824002, "learning_rate": 3.188483373884834e-06, "loss": 0.6708, "step": 14594 }, { "epoch": 0.42611894543225015, "grad_norm": 0.7209775562991904, "learning_rate": 3.188321167883212e-06, "loss": 0.6717, "step": 14595 }, { "epoch": 0.4261481416600975, "grad_norm": 0.7488356699941976, "learning_rate": 3.18815896188159e-06, "loss": 0.6339, "step": 14596 }, { "epoch": 0.4261773378879449, "grad_norm": 0.8317750336396105, "learning_rate": 3.187996755879968e-06, "loss": 0.6779, "step": 14597 }, { "epoch": 0.42620653411579223, "grad_norm": 0.7453948220157399, "learning_rate": 3.1878345498783457e-06, "loss": 0.6995, "step": 14598 }, { "epoch": 0.4262357303436396, "grad_norm": 0.7527473241410405, "learning_rate": 3.1876723438767237e-06, "loss": 0.704, "step": 14599 }, { "epoch": 0.42626492657148696, "grad_norm": 0.6963051471068125, "learning_rate": 3.1875101378751017e-06, "loss": 0.6385, "step": 14600 }, { "epoch": 0.4262941227993343, "grad_norm": 0.6850409642938095, "learning_rate": 3.1873479318734797e-06, "loss": 0.5672, "step": 14601 }, { "epoch": 0.4263233190271817, "grad_norm": 0.6692340278232357, "learning_rate": 3.1871857258718573e-06, "loss": 0.5922, "step": 14602 }, { "epoch": 0.42635251525502904, "grad_norm": 0.7752935890776848, "learning_rate": 3.1870235198702353e-06, "loss": 0.6582, "step": 14603 }, { "epoch": 0.4263817114828764, "grad_norm": 0.7328926309478642, "learning_rate": 3.1868613138686133e-06, "loss": 0.6521, "step": 14604 }, { "epoch": 0.42641090771072376, "grad_norm": 0.696129861327037, "learning_rate": 3.1866991078669913e-06, "loss": 0.5938, "step": 14605 }, { "epoch": 0.4264401039385711, "grad_norm": 0.7697329879483972, "learning_rate": 3.186536901865369e-06, "loss": 0.7037, "step": 14606 }, { "epoch": 0.4264693001664185, "grad_norm": 0.7655412394878924, "learning_rate": 3.186374695863747e-06, "loss": 0.6285, "step": 14607 }, { "epoch": 0.42649849639426585, "grad_norm": 0.8393891778207754, "learning_rate": 3.186212489862125e-06, "loss": 0.698, "step": 14608 }, { "epoch": 0.4265276926221132, "grad_norm": 1.1812200664751926, "learning_rate": 3.186050283860503e-06, "loss": 0.6293, "step": 14609 }, { "epoch": 0.42655688884996057, "grad_norm": 0.9119307151649678, "learning_rate": 3.185888077858881e-06, "loss": 0.7321, "step": 14610 }, { "epoch": 0.42658608507780793, "grad_norm": 0.7346137273450731, "learning_rate": 3.1857258718572594e-06, "loss": 0.6801, "step": 14611 }, { "epoch": 0.4266152813056553, "grad_norm": 0.6896992415227198, "learning_rate": 3.185563665855637e-06, "loss": 0.5773, "step": 14612 }, { "epoch": 0.42664447753350265, "grad_norm": 0.6709057006544416, "learning_rate": 3.185401459854015e-06, "loss": 0.5629, "step": 14613 }, { "epoch": 0.42667367376135, "grad_norm": 0.7860265058196724, "learning_rate": 3.185239253852393e-06, "loss": 0.6707, "step": 14614 }, { "epoch": 0.4267028699891974, "grad_norm": 0.7791275424764366, "learning_rate": 3.185077047850771e-06, "loss": 0.7791, "step": 14615 }, { "epoch": 0.42673206621704474, "grad_norm": 0.7294217568128496, "learning_rate": 3.184914841849149e-06, "loss": 0.6954, "step": 14616 }, { "epoch": 0.4267612624448921, "grad_norm": 0.7524089588971538, "learning_rate": 3.1847526358475266e-06, "loss": 0.6258, "step": 14617 }, { "epoch": 0.42679045867273946, "grad_norm": 0.688357717502894, "learning_rate": 3.1845904298459046e-06, "loss": 0.5569, "step": 14618 }, { "epoch": 0.4268196549005868, "grad_norm": 0.6910475792648952, "learning_rate": 3.1844282238442826e-06, "loss": 0.6279, "step": 14619 }, { "epoch": 0.4268488511284342, "grad_norm": 0.7503802320560379, "learning_rate": 3.1842660178426606e-06, "loss": 0.6859, "step": 14620 }, { "epoch": 0.42687804735628154, "grad_norm": 0.7480510753215692, "learning_rate": 3.184103811841038e-06, "loss": 0.6755, "step": 14621 }, { "epoch": 0.4269072435841289, "grad_norm": 0.7336711123712414, "learning_rate": 3.183941605839416e-06, "loss": 0.6486, "step": 14622 }, { "epoch": 0.42693643981197626, "grad_norm": 0.7251558175388845, "learning_rate": 3.183779399837794e-06, "loss": 0.6247, "step": 14623 }, { "epoch": 0.4269656360398237, "grad_norm": 0.708156949804155, "learning_rate": 3.183617193836172e-06, "loss": 0.6565, "step": 14624 }, { "epoch": 0.42699483226767104, "grad_norm": 0.754827910429783, "learning_rate": 3.1834549878345498e-06, "loss": 0.588, "step": 14625 }, { "epoch": 0.4270240284955184, "grad_norm": 0.785026826608115, "learning_rate": 3.1832927818329278e-06, "loss": 0.635, "step": 14626 }, { "epoch": 0.42705322472336577, "grad_norm": 0.9140360618412927, "learning_rate": 3.1831305758313058e-06, "loss": 0.6816, "step": 14627 }, { "epoch": 0.4270824209512131, "grad_norm": 0.7655167797357985, "learning_rate": 3.182968369829684e-06, "loss": 0.7135, "step": 14628 }, { "epoch": 0.4271116171790605, "grad_norm": 0.7395589454984686, "learning_rate": 3.182806163828062e-06, "loss": 0.6395, "step": 14629 }, { "epoch": 0.42714081340690785, "grad_norm": 0.7451504918622619, "learning_rate": 3.1826439578264402e-06, "loss": 0.6344, "step": 14630 }, { "epoch": 0.4271700096347552, "grad_norm": 0.7617955908330811, "learning_rate": 3.182481751824818e-06, "loss": 0.6126, "step": 14631 }, { "epoch": 0.42719920586260257, "grad_norm": 0.8198187683280446, "learning_rate": 3.182319545823196e-06, "loss": 0.707, "step": 14632 }, { "epoch": 0.42722840209044993, "grad_norm": 0.7018482444377294, "learning_rate": 3.182157339821574e-06, "loss": 0.6331, "step": 14633 }, { "epoch": 0.4272575983182973, "grad_norm": 0.6979895587106058, "learning_rate": 3.181995133819952e-06, "loss": 0.6501, "step": 14634 }, { "epoch": 0.42728679454614465, "grad_norm": 0.7101331679726385, "learning_rate": 3.18183292781833e-06, "loss": 0.5811, "step": 14635 }, { "epoch": 0.427315990773992, "grad_norm": 0.8561767659706818, "learning_rate": 3.1816707218167074e-06, "loss": 0.8715, "step": 14636 }, { "epoch": 0.4273451870018394, "grad_norm": 0.804989687826878, "learning_rate": 3.1815085158150854e-06, "loss": 0.6944, "step": 14637 }, { "epoch": 0.42737438322968674, "grad_norm": 0.7477991271708474, "learning_rate": 3.1813463098134634e-06, "loss": 0.6874, "step": 14638 }, { "epoch": 0.4274035794575341, "grad_norm": 0.692791573504359, "learning_rate": 3.1811841038118414e-06, "loss": 0.6166, "step": 14639 }, { "epoch": 0.42743277568538146, "grad_norm": 0.7773065305269795, "learning_rate": 3.181021897810219e-06, "loss": 0.6913, "step": 14640 }, { "epoch": 0.4274619719132288, "grad_norm": 0.7422298762342859, "learning_rate": 3.180859691808597e-06, "loss": 0.643, "step": 14641 }, { "epoch": 0.4274911681410762, "grad_norm": 0.7553499601399773, "learning_rate": 3.180697485806975e-06, "loss": 0.7008, "step": 14642 }, { "epoch": 0.42752036436892354, "grad_norm": 0.74738098654555, "learning_rate": 3.180535279805353e-06, "loss": 0.6793, "step": 14643 }, { "epoch": 0.4275495605967709, "grad_norm": 0.736884057615817, "learning_rate": 3.1803730738037306e-06, "loss": 0.6679, "step": 14644 }, { "epoch": 0.42757875682461827, "grad_norm": 0.7295107105026842, "learning_rate": 3.1802108678021086e-06, "loss": 0.6088, "step": 14645 }, { "epoch": 0.42760795305246563, "grad_norm": 0.7474549765166079, "learning_rate": 3.1800486618004866e-06, "loss": 0.6647, "step": 14646 }, { "epoch": 0.427637149280313, "grad_norm": 0.7317814682871838, "learning_rate": 3.1798864557988647e-06, "loss": 0.6499, "step": 14647 }, { "epoch": 0.42766634550816035, "grad_norm": 0.7436506317348378, "learning_rate": 3.179724249797243e-06, "loss": 0.6675, "step": 14648 }, { "epoch": 0.4276955417360077, "grad_norm": 0.7153060494326858, "learning_rate": 3.179562043795621e-06, "loss": 0.6257, "step": 14649 }, { "epoch": 0.4277247379638551, "grad_norm": 0.7157477416503609, "learning_rate": 3.1793998377939987e-06, "loss": 0.6325, "step": 14650 }, { "epoch": 0.42775393419170243, "grad_norm": 0.7032707555404604, "learning_rate": 3.1792376317923767e-06, "loss": 0.6227, "step": 14651 }, { "epoch": 0.4277831304195498, "grad_norm": 0.73432118053205, "learning_rate": 3.1790754257907547e-06, "loss": 0.6514, "step": 14652 }, { "epoch": 0.42781232664739716, "grad_norm": 0.6547204927838147, "learning_rate": 3.1789132197891327e-06, "loss": 0.559, "step": 14653 }, { "epoch": 0.4278415228752445, "grad_norm": 0.7426881768978222, "learning_rate": 3.1787510137875107e-06, "loss": 0.6406, "step": 14654 }, { "epoch": 0.4278707191030919, "grad_norm": 0.6980870536956478, "learning_rate": 3.1785888077858883e-06, "loss": 0.5889, "step": 14655 }, { "epoch": 0.42789991533093924, "grad_norm": 0.7178417581851675, "learning_rate": 3.1784266017842663e-06, "loss": 0.6521, "step": 14656 }, { "epoch": 0.4279291115587866, "grad_norm": 0.8211278852044642, "learning_rate": 3.1782643957826443e-06, "loss": 0.7951, "step": 14657 }, { "epoch": 0.42795830778663396, "grad_norm": 0.7227032969723278, "learning_rate": 3.1781021897810223e-06, "loss": 0.6349, "step": 14658 }, { "epoch": 0.4279875040144813, "grad_norm": 0.7316579817268274, "learning_rate": 3.1779399837794e-06, "loss": 0.6629, "step": 14659 }, { "epoch": 0.4280167002423287, "grad_norm": 0.7082631910321594, "learning_rate": 3.177777777777778e-06, "loss": 0.5903, "step": 14660 }, { "epoch": 0.42804589647017605, "grad_norm": 0.6873684308057888, "learning_rate": 3.177615571776156e-06, "loss": 0.6024, "step": 14661 }, { "epoch": 0.4280750926980234, "grad_norm": 0.7555748727285618, "learning_rate": 3.177453365774534e-06, "loss": 0.6472, "step": 14662 }, { "epoch": 0.42810428892587077, "grad_norm": 0.6804130690408737, "learning_rate": 3.1772911597729115e-06, "loss": 0.5988, "step": 14663 }, { "epoch": 0.42813348515371813, "grad_norm": 0.8331856693689629, "learning_rate": 3.1771289537712895e-06, "loss": 0.6991, "step": 14664 }, { "epoch": 0.4281626813815655, "grad_norm": 0.7626308644075623, "learning_rate": 3.1769667477696675e-06, "loss": 0.6718, "step": 14665 }, { "epoch": 0.42819187760941285, "grad_norm": 0.7175473166949972, "learning_rate": 3.1768045417680455e-06, "loss": 0.6699, "step": 14666 }, { "epoch": 0.4282210738372602, "grad_norm": 0.7644322228419278, "learning_rate": 3.176642335766424e-06, "loss": 0.7397, "step": 14667 }, { "epoch": 0.4282502700651076, "grad_norm": 0.743101623786177, "learning_rate": 3.176480129764802e-06, "loss": 0.7005, "step": 14668 }, { "epoch": 0.42827946629295494, "grad_norm": 0.7379717414028057, "learning_rate": 3.1763179237631795e-06, "loss": 0.679, "step": 14669 }, { "epoch": 0.4283086625208023, "grad_norm": 0.7676613370805285, "learning_rate": 3.1761557177615575e-06, "loss": 0.726, "step": 14670 }, { "epoch": 0.42833785874864966, "grad_norm": 0.7315517250885658, "learning_rate": 3.1759935117599355e-06, "loss": 0.578, "step": 14671 }, { "epoch": 0.428367054976497, "grad_norm": 0.7399521860627648, "learning_rate": 3.1758313057583136e-06, "loss": 0.6943, "step": 14672 }, { "epoch": 0.4283962512043444, "grad_norm": 0.7393595915259945, "learning_rate": 3.1756690997566916e-06, "loss": 0.6705, "step": 14673 }, { "epoch": 0.42842544743219174, "grad_norm": 0.7438138592192083, "learning_rate": 3.175506893755069e-06, "loss": 0.6432, "step": 14674 }, { "epoch": 0.4284546436600391, "grad_norm": 0.6998289590078147, "learning_rate": 3.175344687753447e-06, "loss": 0.569, "step": 14675 }, { "epoch": 0.42848383988788646, "grad_norm": 0.7357317451726424, "learning_rate": 3.175182481751825e-06, "loss": 0.6691, "step": 14676 }, { "epoch": 0.4285130361157338, "grad_norm": 0.7270160816939212, "learning_rate": 3.175020275750203e-06, "loss": 0.6189, "step": 14677 }, { "epoch": 0.4285422323435812, "grad_norm": 0.8331340599949307, "learning_rate": 3.1748580697485807e-06, "loss": 0.7324, "step": 14678 }, { "epoch": 0.42857142857142855, "grad_norm": 0.7361574661366753, "learning_rate": 3.1746958637469588e-06, "loss": 0.6249, "step": 14679 }, { "epoch": 0.4286006247992759, "grad_norm": 0.7933797407675983, "learning_rate": 3.1745336577453368e-06, "loss": 0.8202, "step": 14680 }, { "epoch": 0.42862982102712327, "grad_norm": 0.7305370538555022, "learning_rate": 3.1743714517437148e-06, "loss": 0.6686, "step": 14681 }, { "epoch": 0.42865901725497063, "grad_norm": 0.6869403748423549, "learning_rate": 3.1742092457420923e-06, "loss": 0.5877, "step": 14682 }, { "epoch": 0.428688213482818, "grad_norm": 0.8618477780351972, "learning_rate": 3.1740470397404704e-06, "loss": 0.6886, "step": 14683 }, { "epoch": 0.4287174097106654, "grad_norm": 0.6756389047639031, "learning_rate": 3.1738848337388484e-06, "loss": 0.5702, "step": 14684 }, { "epoch": 0.42874660593851277, "grad_norm": 0.8540273366497569, "learning_rate": 3.1737226277372264e-06, "loss": 0.7292, "step": 14685 }, { "epoch": 0.42877580216636013, "grad_norm": 0.7024232378596412, "learning_rate": 3.173560421735605e-06, "loss": 0.6358, "step": 14686 }, { "epoch": 0.4288049983942075, "grad_norm": 0.7107377566274614, "learning_rate": 3.173398215733983e-06, "loss": 0.5829, "step": 14687 }, { "epoch": 0.42883419462205485, "grad_norm": 0.7761340316905229, "learning_rate": 3.1732360097323604e-06, "loss": 0.7112, "step": 14688 }, { "epoch": 0.4288633908499022, "grad_norm": 0.737252052348634, "learning_rate": 3.1730738037307384e-06, "loss": 0.6782, "step": 14689 }, { "epoch": 0.4288925870777496, "grad_norm": 0.7439555000661724, "learning_rate": 3.1729115977291164e-06, "loss": 0.7072, "step": 14690 }, { "epoch": 0.42892178330559694, "grad_norm": 0.7579987018370185, "learning_rate": 3.1727493917274944e-06, "loss": 0.7041, "step": 14691 }, { "epoch": 0.4289509795334443, "grad_norm": 0.6893833348152206, "learning_rate": 3.1725871857258724e-06, "loss": 0.5577, "step": 14692 }, { "epoch": 0.42898017576129166, "grad_norm": 0.7570743905301632, "learning_rate": 3.17242497972425e-06, "loss": 0.6546, "step": 14693 }, { "epoch": 0.429009371989139, "grad_norm": 0.754305219049268, "learning_rate": 3.172262773722628e-06, "loss": 0.7168, "step": 14694 }, { "epoch": 0.4290385682169864, "grad_norm": 0.7465161038544286, "learning_rate": 3.172100567721006e-06, "loss": 0.658, "step": 14695 }, { "epoch": 0.42906776444483374, "grad_norm": 0.7160974511302904, "learning_rate": 3.171938361719384e-06, "loss": 0.665, "step": 14696 }, { "epoch": 0.4290969606726811, "grad_norm": 0.677933414128606, "learning_rate": 3.1717761557177616e-06, "loss": 0.5934, "step": 14697 }, { "epoch": 0.42912615690052847, "grad_norm": 0.767378880821126, "learning_rate": 3.1716139497161396e-06, "loss": 0.7411, "step": 14698 }, { "epoch": 0.4291553531283758, "grad_norm": 0.7492035709699588, "learning_rate": 3.1714517437145176e-06, "loss": 0.6505, "step": 14699 }, { "epoch": 0.4291845493562232, "grad_norm": 0.7285203123423835, "learning_rate": 3.1712895377128956e-06, "loss": 0.6376, "step": 14700 }, { "epoch": 0.42921374558407055, "grad_norm": 0.7869431151759347, "learning_rate": 3.171127331711273e-06, "loss": 0.6838, "step": 14701 }, { "epoch": 0.4292429418119179, "grad_norm": 0.6954544084045989, "learning_rate": 3.1709651257096512e-06, "loss": 0.6014, "step": 14702 }, { "epoch": 0.42927213803976527, "grad_norm": 0.6537861630344945, "learning_rate": 3.1708029197080292e-06, "loss": 0.5119, "step": 14703 }, { "epoch": 0.42930133426761263, "grad_norm": 0.7107898552561498, "learning_rate": 3.1706407137064072e-06, "loss": 0.6005, "step": 14704 }, { "epoch": 0.42933053049546, "grad_norm": 0.8021990068102591, "learning_rate": 3.1704785077047857e-06, "loss": 0.654, "step": 14705 }, { "epoch": 0.42935972672330736, "grad_norm": 0.6777900846956985, "learning_rate": 3.1703163017031637e-06, "loss": 0.5789, "step": 14706 }, { "epoch": 0.4293889229511547, "grad_norm": 0.6982875304169112, "learning_rate": 3.1701540957015412e-06, "loss": 0.6069, "step": 14707 }, { "epoch": 0.4294181191790021, "grad_norm": 0.7311692618965312, "learning_rate": 3.1699918896999193e-06, "loss": 0.6614, "step": 14708 }, { "epoch": 0.42944731540684944, "grad_norm": 0.7227056384702627, "learning_rate": 3.1698296836982973e-06, "loss": 0.6687, "step": 14709 }, { "epoch": 0.4294765116346968, "grad_norm": 0.7495467047513634, "learning_rate": 3.1696674776966753e-06, "loss": 0.7061, "step": 14710 }, { "epoch": 0.42950570786254416, "grad_norm": 0.7894229191625328, "learning_rate": 3.1695052716950533e-06, "loss": 0.7546, "step": 14711 }, { "epoch": 0.4295349040903915, "grad_norm": 0.8422353759687378, "learning_rate": 3.169343065693431e-06, "loss": 0.7278, "step": 14712 }, { "epoch": 0.4295641003182389, "grad_norm": 0.6766706854651152, "learning_rate": 3.169180859691809e-06, "loss": 0.5753, "step": 14713 }, { "epoch": 0.42959329654608625, "grad_norm": 0.6991511214313244, "learning_rate": 3.169018653690187e-06, "loss": 0.6068, "step": 14714 }, { "epoch": 0.4296224927739336, "grad_norm": 0.7081399979261842, "learning_rate": 3.168856447688565e-06, "loss": 0.6031, "step": 14715 }, { "epoch": 0.42965168900178097, "grad_norm": 0.7897107695519684, "learning_rate": 3.1686942416869425e-06, "loss": 0.7403, "step": 14716 }, { "epoch": 0.42968088522962833, "grad_norm": 0.7713698069293722, "learning_rate": 3.1685320356853205e-06, "loss": 0.7011, "step": 14717 }, { "epoch": 0.4297100814574757, "grad_norm": 0.8633530431341169, "learning_rate": 3.1683698296836985e-06, "loss": 0.7801, "step": 14718 }, { "epoch": 0.42973927768532305, "grad_norm": 0.7783793108516645, "learning_rate": 3.1682076236820765e-06, "loss": 0.7234, "step": 14719 }, { "epoch": 0.4297684739131704, "grad_norm": 0.7466270827883432, "learning_rate": 3.168045417680454e-06, "loss": 0.6475, "step": 14720 }, { "epoch": 0.4297976701410178, "grad_norm": 0.7440581305911569, "learning_rate": 3.167883211678832e-06, "loss": 0.6907, "step": 14721 }, { "epoch": 0.42982686636886513, "grad_norm": 0.7321539885030705, "learning_rate": 3.16772100567721e-06, "loss": 0.7086, "step": 14722 }, { "epoch": 0.4298560625967125, "grad_norm": 0.6874447771342278, "learning_rate": 3.167558799675588e-06, "loss": 0.544, "step": 14723 }, { "epoch": 0.42988525882455986, "grad_norm": 0.7863053494099103, "learning_rate": 3.1673965936739665e-06, "loss": 0.7479, "step": 14724 }, { "epoch": 0.4299144550524072, "grad_norm": 0.7415359719977397, "learning_rate": 3.1672343876723445e-06, "loss": 0.6947, "step": 14725 }, { "epoch": 0.4299436512802546, "grad_norm": 0.8104097827704205, "learning_rate": 3.167072181670722e-06, "loss": 0.6904, "step": 14726 }, { "epoch": 0.42997284750810194, "grad_norm": 0.7555060501804843, "learning_rate": 3.1669099756691e-06, "loss": 0.7021, "step": 14727 }, { "epoch": 0.4300020437359493, "grad_norm": 0.755283945189597, "learning_rate": 3.166747769667478e-06, "loss": 0.6195, "step": 14728 }, { "epoch": 0.43003123996379666, "grad_norm": 0.6870611632066345, "learning_rate": 3.166585563665856e-06, "loss": 0.6084, "step": 14729 }, { "epoch": 0.430060436191644, "grad_norm": 0.78520481803887, "learning_rate": 3.1664233576642337e-06, "loss": 0.7053, "step": 14730 }, { "epoch": 0.4300896324194914, "grad_norm": 0.6981861221604878, "learning_rate": 3.1662611516626117e-06, "loss": 0.5688, "step": 14731 }, { "epoch": 0.43011882864733875, "grad_norm": 0.7760505459731493, "learning_rate": 3.1660989456609897e-06, "loss": 0.7292, "step": 14732 }, { "epoch": 0.4301480248751861, "grad_norm": 0.7008428908453187, "learning_rate": 3.1659367396593677e-06, "loss": 0.6076, "step": 14733 }, { "epoch": 0.43017722110303347, "grad_norm": 0.7956195660486981, "learning_rate": 3.1657745336577457e-06, "loss": 0.6862, "step": 14734 }, { "epoch": 0.43020641733088083, "grad_norm": 0.7900872045812936, "learning_rate": 3.1656123276561233e-06, "loss": 0.6237, "step": 14735 }, { "epoch": 0.4302356135587282, "grad_norm": 0.7693395072474681, "learning_rate": 3.1654501216545013e-06, "loss": 0.6871, "step": 14736 }, { "epoch": 0.43026480978657555, "grad_norm": 0.7464564033713976, "learning_rate": 3.1652879156528793e-06, "loss": 0.6821, "step": 14737 }, { "epoch": 0.4302940060144229, "grad_norm": 0.7562046567687736, "learning_rate": 3.1651257096512573e-06, "loss": 0.6628, "step": 14738 }, { "epoch": 0.4303232022422703, "grad_norm": 0.7520209626972618, "learning_rate": 3.164963503649635e-06, "loss": 0.6618, "step": 14739 }, { "epoch": 0.43035239847011764, "grad_norm": 0.6767292577082668, "learning_rate": 3.164801297648013e-06, "loss": 0.5988, "step": 14740 }, { "epoch": 0.430381594697965, "grad_norm": 0.7246635943912964, "learning_rate": 3.164639091646391e-06, "loss": 0.6427, "step": 14741 }, { "epoch": 0.43041079092581236, "grad_norm": 0.7748962223644665, "learning_rate": 3.164476885644769e-06, "loss": 0.6758, "step": 14742 }, { "epoch": 0.4304399871536597, "grad_norm": 0.7334936350728217, "learning_rate": 3.1643146796431474e-06, "loss": 0.6416, "step": 14743 }, { "epoch": 0.43046918338150714, "grad_norm": 0.7137650477178082, "learning_rate": 3.1641524736415254e-06, "loss": 0.6371, "step": 14744 }, { "epoch": 0.4304983796093545, "grad_norm": 0.7459590788017225, "learning_rate": 3.163990267639903e-06, "loss": 0.6393, "step": 14745 }, { "epoch": 0.43052757583720186, "grad_norm": 0.713920771284555, "learning_rate": 3.163828061638281e-06, "loss": 0.6152, "step": 14746 }, { "epoch": 0.4305567720650492, "grad_norm": 0.6929102561758576, "learning_rate": 3.163665855636659e-06, "loss": 0.6357, "step": 14747 }, { "epoch": 0.4305859682928966, "grad_norm": 0.6960507889372403, "learning_rate": 3.163503649635037e-06, "loss": 0.6061, "step": 14748 }, { "epoch": 0.43061516452074394, "grad_norm": 0.7060472161457169, "learning_rate": 3.1633414436334146e-06, "loss": 0.6162, "step": 14749 }, { "epoch": 0.4306443607485913, "grad_norm": 0.718541995595472, "learning_rate": 3.1631792376317926e-06, "loss": 0.6276, "step": 14750 }, { "epoch": 0.43067355697643867, "grad_norm": 0.7421945664839074, "learning_rate": 3.1630170316301706e-06, "loss": 0.6127, "step": 14751 }, { "epoch": 0.430702753204286, "grad_norm": 0.7115144714329463, "learning_rate": 3.1628548256285486e-06, "loss": 0.6575, "step": 14752 }, { "epoch": 0.4307319494321334, "grad_norm": 0.7227352545589865, "learning_rate": 3.1626926196269266e-06, "loss": 0.6822, "step": 14753 }, { "epoch": 0.43076114565998075, "grad_norm": 0.6848513543086863, "learning_rate": 3.162530413625304e-06, "loss": 0.5302, "step": 14754 }, { "epoch": 0.4307903418878281, "grad_norm": 0.919492510919092, "learning_rate": 3.162368207623682e-06, "loss": 0.7589, "step": 14755 }, { "epoch": 0.43081953811567547, "grad_norm": 0.8456944719296653, "learning_rate": 3.16220600162206e-06, "loss": 0.6522, "step": 14756 }, { "epoch": 0.43084873434352283, "grad_norm": 0.7020138710284419, "learning_rate": 3.162043795620438e-06, "loss": 0.5601, "step": 14757 }, { "epoch": 0.4308779305713702, "grad_norm": 0.7190217180822819, "learning_rate": 3.1618815896188158e-06, "loss": 0.6533, "step": 14758 }, { "epoch": 0.43090712679921755, "grad_norm": 0.722663003301564, "learning_rate": 3.161719383617194e-06, "loss": 0.6151, "step": 14759 }, { "epoch": 0.4309363230270649, "grad_norm": 0.7144114635272975, "learning_rate": 3.161557177615572e-06, "loss": 0.6065, "step": 14760 }, { "epoch": 0.4309655192549123, "grad_norm": 0.7891220182124977, "learning_rate": 3.16139497161395e-06, "loss": 0.6677, "step": 14761 }, { "epoch": 0.43099471548275964, "grad_norm": 0.7471738112386772, "learning_rate": 3.1612327656123282e-06, "loss": 0.6838, "step": 14762 }, { "epoch": 0.431023911710607, "grad_norm": 0.7586940631326113, "learning_rate": 3.1610705596107062e-06, "loss": 0.7737, "step": 14763 }, { "epoch": 0.43105310793845436, "grad_norm": 0.7917722885630829, "learning_rate": 3.160908353609084e-06, "loss": 0.6876, "step": 14764 }, { "epoch": 0.4310823041663017, "grad_norm": 0.7361891423537319, "learning_rate": 3.160746147607462e-06, "loss": 0.639, "step": 14765 }, { "epoch": 0.4311115003941491, "grad_norm": 0.7413164032642549, "learning_rate": 3.16058394160584e-06, "loss": 0.6542, "step": 14766 }, { "epoch": 0.43114069662199644, "grad_norm": 0.7145009416906815, "learning_rate": 3.160421735604218e-06, "loss": 0.5917, "step": 14767 }, { "epoch": 0.4311698928498438, "grad_norm": 0.7451575001263733, "learning_rate": 3.1602595296025954e-06, "loss": 0.6598, "step": 14768 }, { "epoch": 0.43119908907769117, "grad_norm": 0.7738531065954053, "learning_rate": 3.1600973236009734e-06, "loss": 0.7372, "step": 14769 }, { "epoch": 0.43122828530553853, "grad_norm": 0.6550252102431464, "learning_rate": 3.1599351175993514e-06, "loss": 0.5599, "step": 14770 }, { "epoch": 0.4312574815333859, "grad_norm": 0.734704347576757, "learning_rate": 3.1597729115977294e-06, "loss": 0.6705, "step": 14771 }, { "epoch": 0.43128667776123325, "grad_norm": 0.7454285024875921, "learning_rate": 3.1596107055961075e-06, "loss": 0.7315, "step": 14772 }, { "epoch": 0.4313158739890806, "grad_norm": 0.6762279041375081, "learning_rate": 3.159448499594485e-06, "loss": 0.5889, "step": 14773 }, { "epoch": 0.431345070216928, "grad_norm": 0.812543361660859, "learning_rate": 3.159286293592863e-06, "loss": 0.779, "step": 14774 }, { "epoch": 0.43137426644477533, "grad_norm": 0.7741874182077858, "learning_rate": 3.159124087591241e-06, "loss": 0.4969, "step": 14775 }, { "epoch": 0.4314034626726227, "grad_norm": 0.7228852638968564, "learning_rate": 3.158961881589619e-06, "loss": 0.6828, "step": 14776 }, { "epoch": 0.43143265890047006, "grad_norm": 0.7339521234865466, "learning_rate": 3.1587996755879966e-06, "loss": 0.662, "step": 14777 }, { "epoch": 0.4314618551283174, "grad_norm": 0.7217567785893052, "learning_rate": 3.1586374695863746e-06, "loss": 0.6253, "step": 14778 }, { "epoch": 0.4314910513561648, "grad_norm": 0.7684580003520924, "learning_rate": 3.1584752635847527e-06, "loss": 0.6917, "step": 14779 }, { "epoch": 0.43152024758401214, "grad_norm": 0.6990317778428189, "learning_rate": 3.1583130575831307e-06, "loss": 0.6529, "step": 14780 }, { "epoch": 0.4315494438118595, "grad_norm": 0.8315256158595412, "learning_rate": 3.158150851581509e-06, "loss": 0.7703, "step": 14781 }, { "epoch": 0.43157864003970686, "grad_norm": 0.7974427899258113, "learning_rate": 3.157988645579887e-06, "loss": 0.721, "step": 14782 }, { "epoch": 0.4316078362675542, "grad_norm": 0.751112655811259, "learning_rate": 3.1578264395782647e-06, "loss": 0.7352, "step": 14783 }, { "epoch": 0.4316370324954016, "grad_norm": 0.850634645737019, "learning_rate": 3.1576642335766427e-06, "loss": 0.6927, "step": 14784 }, { "epoch": 0.43166622872324895, "grad_norm": 0.6583540470584756, "learning_rate": 3.1575020275750207e-06, "loss": 0.5356, "step": 14785 }, { "epoch": 0.4316954249510963, "grad_norm": 0.7684242788820164, "learning_rate": 3.1573398215733987e-06, "loss": 0.6396, "step": 14786 }, { "epoch": 0.43172462117894367, "grad_norm": 0.7016687148199876, "learning_rate": 3.1571776155717763e-06, "loss": 0.5861, "step": 14787 }, { "epoch": 0.43175381740679103, "grad_norm": 0.7289874933455623, "learning_rate": 3.1570154095701543e-06, "loss": 0.6543, "step": 14788 }, { "epoch": 0.4317830136346384, "grad_norm": 0.8452126760310525, "learning_rate": 3.1568532035685323e-06, "loss": 0.7359, "step": 14789 }, { "epoch": 0.43181220986248575, "grad_norm": 0.7342242835061085, "learning_rate": 3.1566909975669103e-06, "loss": 0.6325, "step": 14790 }, { "epoch": 0.4318414060903331, "grad_norm": 0.7651220835712877, "learning_rate": 3.1565287915652883e-06, "loss": 0.7822, "step": 14791 }, { "epoch": 0.4318706023181805, "grad_norm": 0.7607020404055067, "learning_rate": 3.156366585563666e-06, "loss": 0.6646, "step": 14792 }, { "epoch": 0.43189979854602784, "grad_norm": 0.6983594001348308, "learning_rate": 3.156204379562044e-06, "loss": 0.5823, "step": 14793 }, { "epoch": 0.4319289947738752, "grad_norm": 0.761432163453513, "learning_rate": 3.156042173560422e-06, "loss": 0.6813, "step": 14794 }, { "epoch": 0.43195819100172256, "grad_norm": 0.751910989885315, "learning_rate": 3.1558799675588e-06, "loss": 0.6972, "step": 14795 }, { "epoch": 0.4319873872295699, "grad_norm": 0.7567690479396922, "learning_rate": 3.1557177615571775e-06, "loss": 0.7608, "step": 14796 }, { "epoch": 0.4320165834574173, "grad_norm": 0.8231767905932035, "learning_rate": 3.1555555555555555e-06, "loss": 0.671, "step": 14797 }, { "epoch": 0.43204577968526464, "grad_norm": 0.7237497282498372, "learning_rate": 3.1553933495539335e-06, "loss": 0.6425, "step": 14798 }, { "epoch": 0.432074975913112, "grad_norm": 0.7013273611546532, "learning_rate": 3.155231143552312e-06, "loss": 0.6392, "step": 14799 }, { "epoch": 0.43210417214095936, "grad_norm": 0.7755364131424881, "learning_rate": 3.15506893755069e-06, "loss": 0.7272, "step": 14800 }, { "epoch": 0.4321333683688067, "grad_norm": 0.7254832802096427, "learning_rate": 3.154906731549068e-06, "loss": 0.667, "step": 14801 }, { "epoch": 0.4321625645966541, "grad_norm": 0.7324412311805562, "learning_rate": 3.1547445255474455e-06, "loss": 0.6611, "step": 14802 }, { "epoch": 0.43219176082450145, "grad_norm": 0.8137490407724053, "learning_rate": 3.1545823195458235e-06, "loss": 0.5767, "step": 14803 }, { "epoch": 0.43222095705234886, "grad_norm": 0.7715258483814176, "learning_rate": 3.1544201135442016e-06, "loss": 0.7074, "step": 14804 }, { "epoch": 0.4322501532801962, "grad_norm": 0.8118781284949341, "learning_rate": 3.1542579075425796e-06, "loss": 0.7721, "step": 14805 }, { "epoch": 0.4322793495080436, "grad_norm": 0.7122046328681069, "learning_rate": 3.154095701540957e-06, "loss": 0.5971, "step": 14806 }, { "epoch": 0.43230854573589095, "grad_norm": 0.7106358914892031, "learning_rate": 3.153933495539335e-06, "loss": 0.661, "step": 14807 }, { "epoch": 0.4323377419637383, "grad_norm": 0.7166526741522531, "learning_rate": 3.153771289537713e-06, "loss": 0.6401, "step": 14808 }, { "epoch": 0.43236693819158567, "grad_norm": 0.7062701797285603, "learning_rate": 3.153609083536091e-06, "loss": 0.5971, "step": 14809 }, { "epoch": 0.43239613441943303, "grad_norm": 0.799369848991797, "learning_rate": 3.153446877534469e-06, "loss": 0.6783, "step": 14810 }, { "epoch": 0.4324253306472804, "grad_norm": 0.7473802317155979, "learning_rate": 3.1532846715328468e-06, "loss": 0.6855, "step": 14811 }, { "epoch": 0.43245452687512775, "grad_norm": 0.7169183084301443, "learning_rate": 3.1531224655312248e-06, "loss": 0.5694, "step": 14812 }, { "epoch": 0.4324837231029751, "grad_norm": 0.7474813189299595, "learning_rate": 3.1529602595296028e-06, "loss": 0.6881, "step": 14813 }, { "epoch": 0.4325129193308225, "grad_norm": 0.6958817250729988, "learning_rate": 3.1527980535279808e-06, "loss": 0.6402, "step": 14814 }, { "epoch": 0.43254211555866984, "grad_norm": 0.772832503137067, "learning_rate": 3.1526358475263584e-06, "loss": 0.694, "step": 14815 }, { "epoch": 0.4325713117865172, "grad_norm": 0.8392472892295937, "learning_rate": 3.1524736415247364e-06, "loss": 0.5986, "step": 14816 }, { "epoch": 0.43260050801436456, "grad_norm": 0.764990699592652, "learning_rate": 3.1523114355231144e-06, "loss": 0.6743, "step": 14817 }, { "epoch": 0.4326297042422119, "grad_norm": 0.7602921912627775, "learning_rate": 3.152149229521493e-06, "loss": 0.7179, "step": 14818 }, { "epoch": 0.4326589004700593, "grad_norm": 0.661793990695837, "learning_rate": 3.151987023519871e-06, "loss": 0.5578, "step": 14819 }, { "epoch": 0.43268809669790664, "grad_norm": 0.7434373919226829, "learning_rate": 3.151824817518249e-06, "loss": 0.6562, "step": 14820 }, { "epoch": 0.432717292925754, "grad_norm": 0.7098221996667669, "learning_rate": 3.1516626115166264e-06, "loss": 0.6576, "step": 14821 }, { "epoch": 0.43274648915360137, "grad_norm": 0.6840565413388415, "learning_rate": 3.1515004055150044e-06, "loss": 0.6259, "step": 14822 }, { "epoch": 0.4327756853814487, "grad_norm": 0.7218658246021011, "learning_rate": 3.1513381995133824e-06, "loss": 0.6584, "step": 14823 }, { "epoch": 0.4328048816092961, "grad_norm": 0.7407551969911115, "learning_rate": 3.1511759935117604e-06, "loss": 0.7224, "step": 14824 }, { "epoch": 0.43283407783714345, "grad_norm": 0.7193432170237651, "learning_rate": 3.151013787510138e-06, "loss": 0.6741, "step": 14825 }, { "epoch": 0.4328632740649908, "grad_norm": 0.6919178029255819, "learning_rate": 3.150851581508516e-06, "loss": 0.5913, "step": 14826 }, { "epoch": 0.43289247029283817, "grad_norm": 0.7471953024429748, "learning_rate": 3.150689375506894e-06, "loss": 0.6249, "step": 14827 }, { "epoch": 0.43292166652068553, "grad_norm": 0.6704845423971008, "learning_rate": 3.150527169505272e-06, "loss": 0.5556, "step": 14828 }, { "epoch": 0.4329508627485329, "grad_norm": 0.691465424612613, "learning_rate": 3.15036496350365e-06, "loss": 0.6255, "step": 14829 }, { "epoch": 0.43298005897638026, "grad_norm": 0.7326991108675486, "learning_rate": 3.1502027575020276e-06, "loss": 0.6758, "step": 14830 }, { "epoch": 0.4330092552042276, "grad_norm": 0.7294421716745519, "learning_rate": 3.1500405515004056e-06, "loss": 0.689, "step": 14831 }, { "epoch": 0.433038451432075, "grad_norm": 0.7238943523780629, "learning_rate": 3.1498783454987836e-06, "loss": 0.6409, "step": 14832 }, { "epoch": 0.43306764765992234, "grad_norm": 0.7674520276788821, "learning_rate": 3.1497161394971616e-06, "loss": 0.7252, "step": 14833 }, { "epoch": 0.4330968438877697, "grad_norm": 0.7260115995232236, "learning_rate": 3.1495539334955392e-06, "loss": 0.6475, "step": 14834 }, { "epoch": 0.43312604011561706, "grad_norm": 0.7464250499235605, "learning_rate": 3.1493917274939172e-06, "loss": 0.6766, "step": 14835 }, { "epoch": 0.4331552363434644, "grad_norm": 0.7177580110215395, "learning_rate": 3.1492295214922952e-06, "loss": 0.6254, "step": 14836 }, { "epoch": 0.4331844325713118, "grad_norm": 0.6722909680217454, "learning_rate": 3.1490673154906737e-06, "loss": 0.5877, "step": 14837 }, { "epoch": 0.43321362879915915, "grad_norm": 0.6878338881943098, "learning_rate": 3.1489051094890517e-06, "loss": 0.5749, "step": 14838 }, { "epoch": 0.4332428250270065, "grad_norm": 0.6805193509013152, "learning_rate": 3.1487429034874297e-06, "loss": 0.5823, "step": 14839 }, { "epoch": 0.43327202125485387, "grad_norm": 0.7450655851096244, "learning_rate": 3.1485806974858073e-06, "loss": 0.7068, "step": 14840 }, { "epoch": 0.43330121748270123, "grad_norm": 0.7004948233026735, "learning_rate": 3.1484184914841853e-06, "loss": 0.5864, "step": 14841 }, { "epoch": 0.4333304137105486, "grad_norm": 0.767309083581472, "learning_rate": 3.1482562854825633e-06, "loss": 0.6983, "step": 14842 }, { "epoch": 0.43335960993839595, "grad_norm": 0.7162100829647721, "learning_rate": 3.1480940794809413e-06, "loss": 0.6398, "step": 14843 }, { "epoch": 0.4333888061662433, "grad_norm": 0.7978431494103306, "learning_rate": 3.147931873479319e-06, "loss": 0.7353, "step": 14844 }, { "epoch": 0.4334180023940907, "grad_norm": 0.7653639033978571, "learning_rate": 3.147769667477697e-06, "loss": 0.6078, "step": 14845 }, { "epoch": 0.43344719862193803, "grad_norm": 0.7784213871803226, "learning_rate": 3.147607461476075e-06, "loss": 0.6756, "step": 14846 }, { "epoch": 0.4334763948497854, "grad_norm": 0.7988007253873792, "learning_rate": 3.147445255474453e-06, "loss": 0.6578, "step": 14847 }, { "epoch": 0.43350559107763276, "grad_norm": 0.6891632968556757, "learning_rate": 3.147283049472831e-06, "loss": 0.5847, "step": 14848 }, { "epoch": 0.4335347873054801, "grad_norm": 0.7774333864206129, "learning_rate": 3.1471208434712085e-06, "loss": 0.6649, "step": 14849 }, { "epoch": 0.4335639835333275, "grad_norm": 0.7336730800630954, "learning_rate": 3.1469586374695865e-06, "loss": 0.6491, "step": 14850 }, { "epoch": 0.43359317976117484, "grad_norm": 0.6825973184166328, "learning_rate": 3.1467964314679645e-06, "loss": 0.6352, "step": 14851 }, { "epoch": 0.4336223759890222, "grad_norm": 0.7496884066925842, "learning_rate": 3.1466342254663425e-06, "loss": 0.683, "step": 14852 }, { "epoch": 0.43365157221686956, "grad_norm": 0.747771530267415, "learning_rate": 3.14647201946472e-06, "loss": 0.7156, "step": 14853 }, { "epoch": 0.4336807684447169, "grad_norm": 0.6935576473501166, "learning_rate": 3.146309813463098e-06, "loss": 0.64, "step": 14854 }, { "epoch": 0.4337099646725643, "grad_norm": 0.71034323492151, "learning_rate": 3.146147607461476e-06, "loss": 0.5814, "step": 14855 }, { "epoch": 0.43373916090041165, "grad_norm": 0.7318151247954805, "learning_rate": 3.1459854014598545e-06, "loss": 0.6485, "step": 14856 }, { "epoch": 0.433768357128259, "grad_norm": 0.708943396459826, "learning_rate": 3.1458231954582325e-06, "loss": 0.6508, "step": 14857 }, { "epoch": 0.43379755335610637, "grad_norm": 0.767108233748658, "learning_rate": 3.1456609894566105e-06, "loss": 0.7732, "step": 14858 }, { "epoch": 0.43382674958395373, "grad_norm": 0.7807816681605048, "learning_rate": 3.145498783454988e-06, "loss": 0.6999, "step": 14859 }, { "epoch": 0.4338559458118011, "grad_norm": 0.6901043145332799, "learning_rate": 3.145336577453366e-06, "loss": 0.5984, "step": 14860 }, { "epoch": 0.43388514203964845, "grad_norm": 0.727705930265756, "learning_rate": 3.145174371451744e-06, "loss": 0.6031, "step": 14861 }, { "epoch": 0.4339143382674958, "grad_norm": 0.7086240840868074, "learning_rate": 3.145012165450122e-06, "loss": 0.6095, "step": 14862 }, { "epoch": 0.4339435344953432, "grad_norm": 0.7709541077437568, "learning_rate": 3.1448499594484997e-06, "loss": 0.7192, "step": 14863 }, { "epoch": 0.43397273072319054, "grad_norm": 0.7077570237493742, "learning_rate": 3.1446877534468777e-06, "loss": 0.6163, "step": 14864 }, { "epoch": 0.43400192695103795, "grad_norm": 0.6832218235658325, "learning_rate": 3.1445255474452557e-06, "loss": 0.5364, "step": 14865 }, { "epoch": 0.4340311231788853, "grad_norm": 0.7682030708412236, "learning_rate": 3.1443633414436337e-06, "loss": 0.7512, "step": 14866 }, { "epoch": 0.4340603194067327, "grad_norm": 0.742514493914038, "learning_rate": 3.1442011354420117e-06, "loss": 0.6994, "step": 14867 }, { "epoch": 0.43408951563458004, "grad_norm": 0.7260159679336279, "learning_rate": 3.1440389294403893e-06, "loss": 0.6384, "step": 14868 }, { "epoch": 0.4341187118624274, "grad_norm": 0.6851855701213839, "learning_rate": 3.1438767234387673e-06, "loss": 0.5717, "step": 14869 }, { "epoch": 0.43414790809027476, "grad_norm": 0.8073707443875763, "learning_rate": 3.1437145174371453e-06, "loss": 0.7158, "step": 14870 }, { "epoch": 0.4341771043181221, "grad_norm": 0.7240846621616668, "learning_rate": 3.1435523114355234e-06, "loss": 0.6298, "step": 14871 }, { "epoch": 0.4342063005459695, "grad_norm": 0.6880124289471815, "learning_rate": 3.143390105433901e-06, "loss": 0.5989, "step": 14872 }, { "epoch": 0.43423549677381684, "grad_norm": 0.7872527725783991, "learning_rate": 3.143227899432279e-06, "loss": 0.7385, "step": 14873 }, { "epoch": 0.4342646930016642, "grad_norm": 0.7782087763745177, "learning_rate": 3.143065693430657e-06, "loss": 0.7171, "step": 14874 }, { "epoch": 0.43429388922951156, "grad_norm": 0.7276579765516603, "learning_rate": 3.1429034874290354e-06, "loss": 0.6201, "step": 14875 }, { "epoch": 0.4343230854573589, "grad_norm": 0.7807144516164586, "learning_rate": 3.1427412814274134e-06, "loss": 0.6949, "step": 14876 }, { "epoch": 0.4343522816852063, "grad_norm": 0.730758796865655, "learning_rate": 3.1425790754257914e-06, "loss": 0.6375, "step": 14877 }, { "epoch": 0.43438147791305365, "grad_norm": 0.8109717045552004, "learning_rate": 3.142416869424169e-06, "loss": 0.5864, "step": 14878 }, { "epoch": 0.434410674140901, "grad_norm": 0.7286396560753431, "learning_rate": 3.142254663422547e-06, "loss": 0.6561, "step": 14879 }, { "epoch": 0.43443987036874837, "grad_norm": 0.6781107013048463, "learning_rate": 3.142092457420925e-06, "loss": 0.568, "step": 14880 }, { "epoch": 0.43446906659659573, "grad_norm": 0.7426855926588218, "learning_rate": 3.141930251419303e-06, "loss": 0.6008, "step": 14881 }, { "epoch": 0.4344982628244431, "grad_norm": 0.7262555758574084, "learning_rate": 3.1417680454176806e-06, "loss": 0.6641, "step": 14882 }, { "epoch": 0.43452745905229045, "grad_norm": 0.6854059435447212, "learning_rate": 3.1416058394160586e-06, "loss": 0.5893, "step": 14883 }, { "epoch": 0.4345566552801378, "grad_norm": 0.7760337433931161, "learning_rate": 3.1414436334144366e-06, "loss": 0.6775, "step": 14884 }, { "epoch": 0.4345858515079852, "grad_norm": 0.6908881243079272, "learning_rate": 3.1412814274128146e-06, "loss": 0.5441, "step": 14885 }, { "epoch": 0.43461504773583254, "grad_norm": 0.7339601201067117, "learning_rate": 3.1411192214111926e-06, "loss": 0.7066, "step": 14886 }, { "epoch": 0.4346442439636799, "grad_norm": 0.6597565489893332, "learning_rate": 3.14095701540957e-06, "loss": 0.5184, "step": 14887 }, { "epoch": 0.43467344019152726, "grad_norm": 0.7143530881836316, "learning_rate": 3.140794809407948e-06, "loss": 0.6694, "step": 14888 }, { "epoch": 0.4347026364193746, "grad_norm": 0.7435128453442676, "learning_rate": 3.140632603406326e-06, "loss": 0.7088, "step": 14889 }, { "epoch": 0.434731832647222, "grad_norm": 0.7718702687960624, "learning_rate": 3.140470397404704e-06, "loss": 0.6805, "step": 14890 }, { "epoch": 0.43476102887506934, "grad_norm": 0.7100080747181867, "learning_rate": 3.140308191403082e-06, "loss": 0.6211, "step": 14891 }, { "epoch": 0.4347902251029167, "grad_norm": 0.7481484210132722, "learning_rate": 3.14014598540146e-06, "loss": 0.6871, "step": 14892 }, { "epoch": 0.43481942133076407, "grad_norm": 0.7913654099875747, "learning_rate": 3.139983779399838e-06, "loss": 0.7391, "step": 14893 }, { "epoch": 0.4348486175586114, "grad_norm": 0.6845664478895421, "learning_rate": 3.1398215733982162e-06, "loss": 0.5735, "step": 14894 }, { "epoch": 0.4348778137864588, "grad_norm": 0.7503752464553394, "learning_rate": 3.1396593673965942e-06, "loss": 0.7153, "step": 14895 }, { "epoch": 0.43490701001430615, "grad_norm": 0.7047538331394456, "learning_rate": 3.1394971613949723e-06, "loss": 0.5727, "step": 14896 }, { "epoch": 0.4349362062421535, "grad_norm": 0.8157765343005732, "learning_rate": 3.13933495539335e-06, "loss": 0.6931, "step": 14897 }, { "epoch": 0.4349654024700009, "grad_norm": 0.6872470895235686, "learning_rate": 3.139172749391728e-06, "loss": 0.5982, "step": 14898 }, { "epoch": 0.43499459869784823, "grad_norm": 0.6777317715763843, "learning_rate": 3.139010543390106e-06, "loss": 0.5864, "step": 14899 }, { "epoch": 0.4350237949256956, "grad_norm": 0.6904446847616893, "learning_rate": 3.138848337388484e-06, "loss": 0.5371, "step": 14900 }, { "epoch": 0.43505299115354296, "grad_norm": 0.7416735908986082, "learning_rate": 3.1386861313868614e-06, "loss": 0.6511, "step": 14901 }, { "epoch": 0.4350821873813903, "grad_norm": 0.7922561553690156, "learning_rate": 3.1385239253852394e-06, "loss": 0.715, "step": 14902 }, { "epoch": 0.4351113836092377, "grad_norm": 0.7442819136957438, "learning_rate": 3.1383617193836175e-06, "loss": 0.7093, "step": 14903 }, { "epoch": 0.43514057983708504, "grad_norm": 0.7995827475982953, "learning_rate": 3.1381995133819955e-06, "loss": 0.5637, "step": 14904 }, { "epoch": 0.4351697760649324, "grad_norm": 1.0849085458517251, "learning_rate": 3.1380373073803735e-06, "loss": 0.6919, "step": 14905 }, { "epoch": 0.43519897229277976, "grad_norm": 0.7627358092540519, "learning_rate": 3.137875101378751e-06, "loss": 0.7352, "step": 14906 }, { "epoch": 0.4352281685206271, "grad_norm": 0.8354546506031482, "learning_rate": 3.137712895377129e-06, "loss": 0.6928, "step": 14907 }, { "epoch": 0.4352573647484745, "grad_norm": 0.7746218747406347, "learning_rate": 3.137550689375507e-06, "loss": 0.7032, "step": 14908 }, { "epoch": 0.43528656097632185, "grad_norm": 0.7137834519329385, "learning_rate": 3.137388483373885e-06, "loss": 0.65, "step": 14909 }, { "epoch": 0.4353157572041692, "grad_norm": 0.7545366990245407, "learning_rate": 3.1372262773722627e-06, "loss": 0.7134, "step": 14910 }, { "epoch": 0.43534495343201657, "grad_norm": 0.6920790361097042, "learning_rate": 3.1370640713706407e-06, "loss": 0.6485, "step": 14911 }, { "epoch": 0.43537414965986393, "grad_norm": 0.6988818587042847, "learning_rate": 3.1369018653690187e-06, "loss": 0.6271, "step": 14912 }, { "epoch": 0.4354033458877113, "grad_norm": 0.7296853991619409, "learning_rate": 3.136739659367397e-06, "loss": 0.581, "step": 14913 }, { "epoch": 0.43543254211555865, "grad_norm": 0.6731851066304783, "learning_rate": 3.136577453365775e-06, "loss": 0.5746, "step": 14914 }, { "epoch": 0.435461738343406, "grad_norm": 0.7158110128092907, "learning_rate": 3.136415247364153e-06, "loss": 0.5938, "step": 14915 }, { "epoch": 0.4354909345712534, "grad_norm": 0.743808754406463, "learning_rate": 3.1362530413625307e-06, "loss": 0.6956, "step": 14916 }, { "epoch": 0.43552013079910074, "grad_norm": 0.6873875019259105, "learning_rate": 3.1360908353609087e-06, "loss": 0.5732, "step": 14917 }, { "epoch": 0.4355493270269481, "grad_norm": 0.6866357821835491, "learning_rate": 3.1359286293592867e-06, "loss": 0.5491, "step": 14918 }, { "epoch": 0.43557852325479546, "grad_norm": 0.738683538760368, "learning_rate": 3.1357664233576647e-06, "loss": 0.632, "step": 14919 }, { "epoch": 0.4356077194826428, "grad_norm": 0.7473506274948915, "learning_rate": 3.1356042173560423e-06, "loss": 0.609, "step": 14920 }, { "epoch": 0.4356369157104902, "grad_norm": 0.7499009277826822, "learning_rate": 3.1354420113544203e-06, "loss": 0.6783, "step": 14921 }, { "epoch": 0.43566611193833754, "grad_norm": 0.7796608629355404, "learning_rate": 3.1352798053527983e-06, "loss": 0.7277, "step": 14922 }, { "epoch": 0.4356953081661849, "grad_norm": 0.7421600350507419, "learning_rate": 3.1351175993511763e-06, "loss": 0.6956, "step": 14923 }, { "epoch": 0.43572450439403226, "grad_norm": 0.8761315457835686, "learning_rate": 3.1349553933495543e-06, "loss": 0.6603, "step": 14924 }, { "epoch": 0.4357537006218797, "grad_norm": 0.7120631439579536, "learning_rate": 3.134793187347932e-06, "loss": 0.6406, "step": 14925 }, { "epoch": 0.43578289684972704, "grad_norm": 0.7085061907264094, "learning_rate": 3.13463098134631e-06, "loss": 0.5903, "step": 14926 }, { "epoch": 0.4358120930775744, "grad_norm": 0.7589990047106001, "learning_rate": 3.134468775344688e-06, "loss": 0.6516, "step": 14927 }, { "epoch": 0.43584128930542176, "grad_norm": 0.7496061066421797, "learning_rate": 3.134306569343066e-06, "loss": 0.7219, "step": 14928 }, { "epoch": 0.4358704855332691, "grad_norm": 0.7416136977131635, "learning_rate": 3.1341443633414435e-06, "loss": 0.6569, "step": 14929 }, { "epoch": 0.4358996817611165, "grad_norm": 0.6597072871032994, "learning_rate": 3.1339821573398215e-06, "loss": 0.5255, "step": 14930 }, { "epoch": 0.43592887798896385, "grad_norm": 0.7473605384438634, "learning_rate": 3.1338199513381995e-06, "loss": 0.6588, "step": 14931 }, { "epoch": 0.4359580742168112, "grad_norm": 0.7721258774212203, "learning_rate": 3.133657745336578e-06, "loss": 0.6007, "step": 14932 }, { "epoch": 0.43598727044465857, "grad_norm": 0.6853534365034095, "learning_rate": 3.133495539334956e-06, "loss": 0.6064, "step": 14933 }, { "epoch": 0.43601646667250593, "grad_norm": 0.800595041560243, "learning_rate": 3.133333333333334e-06, "loss": 0.7254, "step": 14934 }, { "epoch": 0.4360456629003533, "grad_norm": 0.7203604267054623, "learning_rate": 3.1331711273317116e-06, "loss": 0.6496, "step": 14935 }, { "epoch": 0.43607485912820065, "grad_norm": 0.7045753128918731, "learning_rate": 3.1330089213300896e-06, "loss": 0.609, "step": 14936 }, { "epoch": 0.436104055356048, "grad_norm": 0.6734271993101814, "learning_rate": 3.1328467153284676e-06, "loss": 0.5937, "step": 14937 }, { "epoch": 0.4361332515838954, "grad_norm": 0.6556360023920835, "learning_rate": 3.1326845093268456e-06, "loss": 0.5325, "step": 14938 }, { "epoch": 0.43616244781174274, "grad_norm": 0.7206042962555191, "learning_rate": 3.132522303325223e-06, "loss": 0.6454, "step": 14939 }, { "epoch": 0.4361916440395901, "grad_norm": 0.7058391911517341, "learning_rate": 3.132360097323601e-06, "loss": 0.5892, "step": 14940 }, { "epoch": 0.43622084026743746, "grad_norm": 0.6981821380393435, "learning_rate": 3.132197891321979e-06, "loss": 0.5734, "step": 14941 }, { "epoch": 0.4362500364952848, "grad_norm": 0.6822737150375309, "learning_rate": 3.132035685320357e-06, "loss": 0.5708, "step": 14942 }, { "epoch": 0.4362792327231322, "grad_norm": 0.7471310886784595, "learning_rate": 3.131873479318735e-06, "loss": 0.6889, "step": 14943 }, { "epoch": 0.43630842895097954, "grad_norm": 0.7436517180829428, "learning_rate": 3.1317112733171128e-06, "loss": 0.6221, "step": 14944 }, { "epoch": 0.4363376251788269, "grad_norm": 0.7697343704661185, "learning_rate": 3.1315490673154908e-06, "loss": 0.7126, "step": 14945 }, { "epoch": 0.43636682140667427, "grad_norm": 0.713567370133243, "learning_rate": 3.1313868613138688e-06, "loss": 0.6112, "step": 14946 }, { "epoch": 0.4363960176345216, "grad_norm": 0.773060200351299, "learning_rate": 3.1312246553122468e-06, "loss": 0.7122, "step": 14947 }, { "epoch": 0.436425213862369, "grad_norm": 0.7387723497001147, "learning_rate": 3.1310624493106244e-06, "loss": 0.6782, "step": 14948 }, { "epoch": 0.43645441009021635, "grad_norm": 0.7322132027344136, "learning_rate": 3.1309002433090024e-06, "loss": 0.6407, "step": 14949 }, { "epoch": 0.4364836063180637, "grad_norm": 0.6910397693717615, "learning_rate": 3.130738037307381e-06, "loss": 0.6055, "step": 14950 }, { "epoch": 0.43651280254591107, "grad_norm": 0.750760969530746, "learning_rate": 3.130575831305759e-06, "loss": 0.7351, "step": 14951 }, { "epoch": 0.43654199877375843, "grad_norm": 0.7282506347747708, "learning_rate": 3.130413625304137e-06, "loss": 0.625, "step": 14952 }, { "epoch": 0.4365711950016058, "grad_norm": 0.7331852103205169, "learning_rate": 3.130251419302515e-06, "loss": 0.6434, "step": 14953 }, { "epoch": 0.43660039122945316, "grad_norm": 0.7253688789987585, "learning_rate": 3.1300892133008924e-06, "loss": 0.6505, "step": 14954 }, { "epoch": 0.4366295874573005, "grad_norm": 0.6929921881504879, "learning_rate": 3.1299270072992704e-06, "loss": 0.598, "step": 14955 }, { "epoch": 0.4366587836851479, "grad_norm": 0.760080264167824, "learning_rate": 3.1297648012976484e-06, "loss": 0.6527, "step": 14956 }, { "epoch": 0.43668797991299524, "grad_norm": 0.7736074868016118, "learning_rate": 3.1296025952960264e-06, "loss": 0.7314, "step": 14957 }, { "epoch": 0.4367171761408426, "grad_norm": 0.7726318584535101, "learning_rate": 3.129440389294404e-06, "loss": 0.7296, "step": 14958 }, { "epoch": 0.43674637236868996, "grad_norm": 0.7602733103965037, "learning_rate": 3.129278183292782e-06, "loss": 0.6082, "step": 14959 }, { "epoch": 0.4367755685965373, "grad_norm": 0.6736110658608236, "learning_rate": 3.12911597729116e-06, "loss": 0.58, "step": 14960 }, { "epoch": 0.4368047648243847, "grad_norm": 0.7191839597251717, "learning_rate": 3.128953771289538e-06, "loss": 0.6739, "step": 14961 }, { "epoch": 0.43683396105223204, "grad_norm": 0.7932216477427834, "learning_rate": 3.128791565287916e-06, "loss": 0.6885, "step": 14962 }, { "epoch": 0.4368631572800794, "grad_norm": 0.7086626372154206, "learning_rate": 3.1286293592862936e-06, "loss": 0.5887, "step": 14963 }, { "epoch": 0.43689235350792677, "grad_norm": 0.7345921569593242, "learning_rate": 3.1284671532846716e-06, "loss": 0.5987, "step": 14964 }, { "epoch": 0.43692154973577413, "grad_norm": 0.7025734333156014, "learning_rate": 3.1283049472830496e-06, "loss": 0.6125, "step": 14965 }, { "epoch": 0.4369507459636215, "grad_norm": 0.6934174726308204, "learning_rate": 3.1281427412814276e-06, "loss": 0.6497, "step": 14966 }, { "epoch": 0.43697994219146885, "grad_norm": 0.7604494141738334, "learning_rate": 3.1279805352798052e-06, "loss": 0.711, "step": 14967 }, { "epoch": 0.4370091384193162, "grad_norm": 0.7797734903054672, "learning_rate": 3.1278183292781832e-06, "loss": 0.7826, "step": 14968 }, { "epoch": 0.4370383346471636, "grad_norm": 0.8498170194646485, "learning_rate": 3.1276561232765617e-06, "loss": 0.7547, "step": 14969 }, { "epoch": 0.43706753087501093, "grad_norm": 0.7477174264788421, "learning_rate": 3.1274939172749397e-06, "loss": 0.675, "step": 14970 }, { "epoch": 0.4370967271028583, "grad_norm": 0.7468657912113728, "learning_rate": 3.1273317112733177e-06, "loss": 0.6557, "step": 14971 }, { "epoch": 0.43712592333070566, "grad_norm": 0.7477462864532916, "learning_rate": 3.1271695052716957e-06, "loss": 0.6477, "step": 14972 }, { "epoch": 0.437155119558553, "grad_norm": 0.7558754874444629, "learning_rate": 3.1270072992700733e-06, "loss": 0.6788, "step": 14973 }, { "epoch": 0.4371843157864004, "grad_norm": 0.7734389760013118, "learning_rate": 3.1268450932684513e-06, "loss": 0.631, "step": 14974 }, { "epoch": 0.43721351201424774, "grad_norm": 0.7741164137421463, "learning_rate": 3.1266828872668293e-06, "loss": 0.6778, "step": 14975 }, { "epoch": 0.4372427082420951, "grad_norm": 0.712218995931718, "learning_rate": 3.1265206812652073e-06, "loss": 0.6816, "step": 14976 }, { "epoch": 0.43727190446994246, "grad_norm": 0.6889858343050727, "learning_rate": 3.126358475263585e-06, "loss": 0.6188, "step": 14977 }, { "epoch": 0.4373011006977898, "grad_norm": 0.724404245978181, "learning_rate": 3.126196269261963e-06, "loss": 0.6466, "step": 14978 }, { "epoch": 0.4373302969256372, "grad_norm": 0.7530586316187341, "learning_rate": 3.126034063260341e-06, "loss": 0.5929, "step": 14979 }, { "epoch": 0.43735949315348455, "grad_norm": 0.7186843537234978, "learning_rate": 3.125871857258719e-06, "loss": 0.6327, "step": 14980 }, { "epoch": 0.4373886893813319, "grad_norm": 0.7822726826209986, "learning_rate": 3.125709651257097e-06, "loss": 0.6736, "step": 14981 }, { "epoch": 0.43741788560917927, "grad_norm": 0.7896079452174896, "learning_rate": 3.1255474452554745e-06, "loss": 0.6125, "step": 14982 }, { "epoch": 0.43744708183702663, "grad_norm": 0.7884052693747143, "learning_rate": 3.1253852392538525e-06, "loss": 0.7756, "step": 14983 }, { "epoch": 0.437476278064874, "grad_norm": 0.73739093567961, "learning_rate": 3.1252230332522305e-06, "loss": 0.6466, "step": 14984 }, { "epoch": 0.4375054742927214, "grad_norm": 0.7074988593654123, "learning_rate": 3.1250608272506085e-06, "loss": 0.6311, "step": 14985 }, { "epoch": 0.43753467052056877, "grad_norm": 0.7375607015705142, "learning_rate": 3.124898621248986e-06, "loss": 0.6056, "step": 14986 }, { "epoch": 0.43756386674841613, "grad_norm": 0.7628934895356742, "learning_rate": 3.124736415247364e-06, "loss": 0.7262, "step": 14987 }, { "epoch": 0.4375930629762635, "grad_norm": 0.7950686516777346, "learning_rate": 3.1245742092457425e-06, "loss": 0.7572, "step": 14988 }, { "epoch": 0.43762225920411085, "grad_norm": 0.7790455929384211, "learning_rate": 3.1244120032441205e-06, "loss": 0.6381, "step": 14989 }, { "epoch": 0.4376514554319582, "grad_norm": 0.8392740123661753, "learning_rate": 3.1242497972424985e-06, "loss": 0.6843, "step": 14990 }, { "epoch": 0.4376806516598056, "grad_norm": 0.7180540078906715, "learning_rate": 3.1240875912408765e-06, "loss": 0.6282, "step": 14991 }, { "epoch": 0.43770984788765294, "grad_norm": 0.7436326290733205, "learning_rate": 3.123925385239254e-06, "loss": 0.6238, "step": 14992 }, { "epoch": 0.4377390441155003, "grad_norm": 0.7393833858164099, "learning_rate": 3.123763179237632e-06, "loss": 0.6177, "step": 14993 }, { "epoch": 0.43776824034334766, "grad_norm": 0.72332694681332, "learning_rate": 3.12360097323601e-06, "loss": 0.6169, "step": 14994 }, { "epoch": 0.437797436571195, "grad_norm": 0.7109727128137616, "learning_rate": 3.123438767234388e-06, "loss": 0.6313, "step": 14995 }, { "epoch": 0.4378266327990424, "grad_norm": 0.7547296246000654, "learning_rate": 3.1232765612327657e-06, "loss": 0.7266, "step": 14996 }, { "epoch": 0.43785582902688974, "grad_norm": 0.6921070109780544, "learning_rate": 3.1231143552311437e-06, "loss": 0.623, "step": 14997 }, { "epoch": 0.4378850252547371, "grad_norm": 0.8092169866357104, "learning_rate": 3.1229521492295217e-06, "loss": 0.634, "step": 14998 }, { "epoch": 0.43791422148258446, "grad_norm": 0.6591830532075009, "learning_rate": 3.1227899432278998e-06, "loss": 0.55, "step": 14999 }, { "epoch": 0.4379434177104318, "grad_norm": 0.7725632679845645, "learning_rate": 3.1226277372262773e-06, "loss": 0.6888, "step": 15000 }, { "epoch": 0.4379726139382792, "grad_norm": 0.727117549369948, "learning_rate": 3.1224655312246553e-06, "loss": 0.6768, "step": 15001 }, { "epoch": 0.43800181016612655, "grad_norm": 0.7131387346345571, "learning_rate": 3.1223033252230333e-06, "loss": 0.6491, "step": 15002 }, { "epoch": 0.4380310063939739, "grad_norm": 0.7289060943146612, "learning_rate": 3.1221411192214114e-06, "loss": 0.6302, "step": 15003 }, { "epoch": 0.43806020262182127, "grad_norm": 0.7316424430556976, "learning_rate": 3.1219789132197894e-06, "loss": 0.6659, "step": 15004 }, { "epoch": 0.43808939884966863, "grad_norm": 0.9099270808187313, "learning_rate": 3.121816707218167e-06, "loss": 0.6627, "step": 15005 }, { "epoch": 0.438118595077516, "grad_norm": 0.7569550380190908, "learning_rate": 3.121654501216545e-06, "loss": 0.7385, "step": 15006 }, { "epoch": 0.43814779130536335, "grad_norm": 0.7697269761548414, "learning_rate": 3.1214922952149234e-06, "loss": 0.7932, "step": 15007 }, { "epoch": 0.4381769875332107, "grad_norm": 0.7941450997449635, "learning_rate": 3.1213300892133014e-06, "loss": 0.7353, "step": 15008 }, { "epoch": 0.4382061837610581, "grad_norm": 0.7995311366873833, "learning_rate": 3.1211678832116794e-06, "loss": 0.7035, "step": 15009 }, { "epoch": 0.43823537998890544, "grad_norm": 0.7523322511766989, "learning_rate": 3.1210056772100574e-06, "loss": 0.6908, "step": 15010 }, { "epoch": 0.4382645762167528, "grad_norm": 0.6664867054016076, "learning_rate": 3.120843471208435e-06, "loss": 0.5799, "step": 15011 }, { "epoch": 0.43829377244460016, "grad_norm": 0.6131811002462935, "learning_rate": 3.120681265206813e-06, "loss": 0.4738, "step": 15012 }, { "epoch": 0.4383229686724475, "grad_norm": 0.7110590428115547, "learning_rate": 3.120519059205191e-06, "loss": 0.6054, "step": 15013 }, { "epoch": 0.4383521649002949, "grad_norm": 0.7287423136060514, "learning_rate": 3.120356853203569e-06, "loss": 0.6407, "step": 15014 }, { "epoch": 0.43838136112814224, "grad_norm": 0.7630693175761761, "learning_rate": 3.1201946472019466e-06, "loss": 0.7513, "step": 15015 }, { "epoch": 0.4384105573559896, "grad_norm": 0.6970296240871604, "learning_rate": 3.1200324412003246e-06, "loss": 0.5958, "step": 15016 }, { "epoch": 0.43843975358383697, "grad_norm": 0.6533752128165118, "learning_rate": 3.1198702351987026e-06, "loss": 0.5254, "step": 15017 }, { "epoch": 0.4384689498116843, "grad_norm": 0.7872992310167981, "learning_rate": 3.1197080291970806e-06, "loss": 0.7281, "step": 15018 }, { "epoch": 0.4384981460395317, "grad_norm": 0.7314397661746805, "learning_rate": 3.119545823195458e-06, "loss": 0.6838, "step": 15019 }, { "epoch": 0.43852734226737905, "grad_norm": 0.715183490460196, "learning_rate": 3.119383617193836e-06, "loss": 0.6579, "step": 15020 }, { "epoch": 0.4385565384952264, "grad_norm": 0.7088351767781811, "learning_rate": 3.119221411192214e-06, "loss": 0.6061, "step": 15021 }, { "epoch": 0.4385857347230738, "grad_norm": 0.7267111722576716, "learning_rate": 3.1190592051905922e-06, "loss": 0.6638, "step": 15022 }, { "epoch": 0.43861493095092113, "grad_norm": 0.7324410478369301, "learning_rate": 3.1188969991889702e-06, "loss": 0.6481, "step": 15023 }, { "epoch": 0.4386441271787685, "grad_norm": 0.716194434367556, "learning_rate": 3.118734793187348e-06, "loss": 0.5993, "step": 15024 }, { "epoch": 0.43867332340661586, "grad_norm": 0.8214700933054131, "learning_rate": 3.118572587185726e-06, "loss": 0.6802, "step": 15025 }, { "epoch": 0.4387025196344632, "grad_norm": 0.7180485045314461, "learning_rate": 3.1184103811841042e-06, "loss": 0.624, "step": 15026 }, { "epoch": 0.4387317158623106, "grad_norm": 0.6982059103394321, "learning_rate": 3.1182481751824822e-06, "loss": 0.6198, "step": 15027 }, { "epoch": 0.43876091209015794, "grad_norm": 0.7167029809693147, "learning_rate": 3.1180859691808603e-06, "loss": 0.6352, "step": 15028 }, { "epoch": 0.4387901083180053, "grad_norm": 0.7421049898841453, "learning_rate": 3.1179237631792383e-06, "loss": 0.6336, "step": 15029 }, { "epoch": 0.43881930454585266, "grad_norm": 0.6307187369903604, "learning_rate": 3.117761557177616e-06, "loss": 0.4555, "step": 15030 }, { "epoch": 0.4388485007737, "grad_norm": 0.7903958339296125, "learning_rate": 3.117599351175994e-06, "loss": 0.6694, "step": 15031 }, { "epoch": 0.4388776970015474, "grad_norm": 0.7675752798945474, "learning_rate": 3.117437145174372e-06, "loss": 0.6476, "step": 15032 }, { "epoch": 0.43890689322939475, "grad_norm": 0.728261511372983, "learning_rate": 3.11727493917275e-06, "loss": 0.641, "step": 15033 }, { "epoch": 0.4389360894572421, "grad_norm": 0.7208732073993064, "learning_rate": 3.1171127331711274e-06, "loss": 0.6247, "step": 15034 }, { "epoch": 0.43896528568508947, "grad_norm": 0.7364559872467732, "learning_rate": 3.1169505271695055e-06, "loss": 0.6672, "step": 15035 }, { "epoch": 0.43899448191293683, "grad_norm": 0.7213555033039987, "learning_rate": 3.1167883211678835e-06, "loss": 0.6244, "step": 15036 }, { "epoch": 0.4390236781407842, "grad_norm": 0.7493852407381519, "learning_rate": 3.1166261151662615e-06, "loss": 0.7167, "step": 15037 }, { "epoch": 0.43905287436863155, "grad_norm": 0.6927747375664595, "learning_rate": 3.116463909164639e-06, "loss": 0.6471, "step": 15038 }, { "epoch": 0.4390820705964789, "grad_norm": 0.7073548232797897, "learning_rate": 3.116301703163017e-06, "loss": 0.6641, "step": 15039 }, { "epoch": 0.4391112668243263, "grad_norm": 0.7386433078512353, "learning_rate": 3.116139497161395e-06, "loss": 0.6608, "step": 15040 }, { "epoch": 0.43914046305217364, "grad_norm": 0.7465539151954635, "learning_rate": 3.115977291159773e-06, "loss": 0.7056, "step": 15041 }, { "epoch": 0.439169659280021, "grad_norm": 0.6735488469414771, "learning_rate": 3.115815085158151e-06, "loss": 0.5502, "step": 15042 }, { "epoch": 0.43919885550786836, "grad_norm": 0.7026017558074991, "learning_rate": 3.1156528791565287e-06, "loss": 0.6493, "step": 15043 }, { "epoch": 0.4392280517357157, "grad_norm": 0.7664175062050806, "learning_rate": 3.1154906731549067e-06, "loss": 0.6586, "step": 15044 }, { "epoch": 0.4392572479635631, "grad_norm": 0.8161284472845979, "learning_rate": 3.115328467153285e-06, "loss": 0.7373, "step": 15045 }, { "epoch": 0.4392864441914105, "grad_norm": 0.7724366365626205, "learning_rate": 3.115166261151663e-06, "loss": 0.7128, "step": 15046 }, { "epoch": 0.43931564041925786, "grad_norm": 0.7146606246211697, "learning_rate": 3.115004055150041e-06, "loss": 0.6591, "step": 15047 }, { "epoch": 0.4393448366471052, "grad_norm": 0.7962303523886285, "learning_rate": 3.114841849148419e-06, "loss": 0.7747, "step": 15048 }, { "epoch": 0.4393740328749526, "grad_norm": 0.760907835855877, "learning_rate": 3.1146796431467967e-06, "loss": 0.7023, "step": 15049 }, { "epoch": 0.43940322910279994, "grad_norm": 0.7650857876139151, "learning_rate": 3.1145174371451747e-06, "loss": 0.6925, "step": 15050 }, { "epoch": 0.4394324253306473, "grad_norm": 0.7248318586192737, "learning_rate": 3.1143552311435527e-06, "loss": 0.5818, "step": 15051 }, { "epoch": 0.43946162155849466, "grad_norm": 0.7698996284134803, "learning_rate": 3.1141930251419307e-06, "loss": 0.6799, "step": 15052 }, { "epoch": 0.439490817786342, "grad_norm": 0.993462240761863, "learning_rate": 3.1140308191403083e-06, "loss": 0.7009, "step": 15053 }, { "epoch": 0.4395200140141894, "grad_norm": 0.7018885931847453, "learning_rate": 3.1138686131386863e-06, "loss": 0.5668, "step": 15054 }, { "epoch": 0.43954921024203675, "grad_norm": 0.7869232581440468, "learning_rate": 3.1137064071370643e-06, "loss": 0.7223, "step": 15055 }, { "epoch": 0.4395784064698841, "grad_norm": 0.8300241075734448, "learning_rate": 3.1135442011354423e-06, "loss": 0.6919, "step": 15056 }, { "epoch": 0.43960760269773147, "grad_norm": 0.7260707652723479, "learning_rate": 3.11338199513382e-06, "loss": 0.6336, "step": 15057 }, { "epoch": 0.43963679892557883, "grad_norm": 0.7263127080481196, "learning_rate": 3.113219789132198e-06, "loss": 0.5684, "step": 15058 }, { "epoch": 0.4396659951534262, "grad_norm": 0.7649347766330574, "learning_rate": 3.113057583130576e-06, "loss": 0.6958, "step": 15059 }, { "epoch": 0.43969519138127355, "grad_norm": 0.7153827075440774, "learning_rate": 3.112895377128954e-06, "loss": 0.6304, "step": 15060 }, { "epoch": 0.4397243876091209, "grad_norm": 0.705149494372412, "learning_rate": 3.112733171127332e-06, "loss": 0.6263, "step": 15061 }, { "epoch": 0.4397535838369683, "grad_norm": 0.6984204639174637, "learning_rate": 3.1125709651257095e-06, "loss": 0.6081, "step": 15062 }, { "epoch": 0.43978278006481564, "grad_norm": 0.7246165918020816, "learning_rate": 3.1124087591240875e-06, "loss": 0.6467, "step": 15063 }, { "epoch": 0.439811976292663, "grad_norm": 0.7419312951614444, "learning_rate": 3.112246553122466e-06, "loss": 0.6288, "step": 15064 }, { "epoch": 0.43984117252051036, "grad_norm": 0.7138298620349215, "learning_rate": 3.112084347120844e-06, "loss": 0.6207, "step": 15065 }, { "epoch": 0.4398703687483577, "grad_norm": 0.70924691576593, "learning_rate": 3.111922141119222e-06, "loss": 0.6631, "step": 15066 }, { "epoch": 0.4398995649762051, "grad_norm": 0.6409100773132836, "learning_rate": 3.1117599351176e-06, "loss": 0.509, "step": 15067 }, { "epoch": 0.43992876120405244, "grad_norm": 0.7593700690238074, "learning_rate": 3.1115977291159776e-06, "loss": 0.6752, "step": 15068 }, { "epoch": 0.4399579574318998, "grad_norm": 0.9469167925172471, "learning_rate": 3.1114355231143556e-06, "loss": 0.7977, "step": 15069 }, { "epoch": 0.43998715365974717, "grad_norm": 0.8460578028521132, "learning_rate": 3.1112733171127336e-06, "loss": 0.6874, "step": 15070 }, { "epoch": 0.4400163498875945, "grad_norm": 0.6784817195037882, "learning_rate": 3.1111111111111116e-06, "loss": 0.5863, "step": 15071 }, { "epoch": 0.4400455461154419, "grad_norm": 0.7292022639597497, "learning_rate": 3.110948905109489e-06, "loss": 0.703, "step": 15072 }, { "epoch": 0.44007474234328925, "grad_norm": 0.7046843144698111, "learning_rate": 3.110786699107867e-06, "loss": 0.6498, "step": 15073 }, { "epoch": 0.4401039385711366, "grad_norm": 0.6859334621858549, "learning_rate": 3.110624493106245e-06, "loss": 0.5775, "step": 15074 }, { "epoch": 0.44013313479898397, "grad_norm": 0.6991532070247634, "learning_rate": 3.110462287104623e-06, "loss": 0.5762, "step": 15075 }, { "epoch": 0.44016233102683133, "grad_norm": 0.9043799898613937, "learning_rate": 3.1103000811030008e-06, "loss": 0.7323, "step": 15076 }, { "epoch": 0.4401915272546787, "grad_norm": 0.8147018156337736, "learning_rate": 3.1101378751013788e-06, "loss": 0.7121, "step": 15077 }, { "epoch": 0.44022072348252606, "grad_norm": 0.6668294885056371, "learning_rate": 3.1099756690997568e-06, "loss": 0.5385, "step": 15078 }, { "epoch": 0.4402499197103734, "grad_norm": 0.8381347135380892, "learning_rate": 3.109813463098135e-06, "loss": 0.7781, "step": 15079 }, { "epoch": 0.4402791159382208, "grad_norm": 0.7544660802229753, "learning_rate": 3.109651257096513e-06, "loss": 0.676, "step": 15080 }, { "epoch": 0.44030831216606814, "grad_norm": 0.7645983147082652, "learning_rate": 3.1094890510948904e-06, "loss": 0.7389, "step": 15081 }, { "epoch": 0.4403375083939155, "grad_norm": 0.7139892013204291, "learning_rate": 3.1093268450932684e-06, "loss": 0.6661, "step": 15082 }, { "epoch": 0.44036670462176286, "grad_norm": 0.7493254437814242, "learning_rate": 3.109164639091647e-06, "loss": 0.6691, "step": 15083 }, { "epoch": 0.4403959008496102, "grad_norm": 0.7672519546671127, "learning_rate": 3.109002433090025e-06, "loss": 0.715, "step": 15084 }, { "epoch": 0.4404250970774576, "grad_norm": 0.7631098572927033, "learning_rate": 3.108840227088403e-06, "loss": 0.6393, "step": 15085 }, { "epoch": 0.44045429330530494, "grad_norm": 0.7180204514803367, "learning_rate": 3.108678021086781e-06, "loss": 0.6357, "step": 15086 }, { "epoch": 0.4404834895331523, "grad_norm": 0.7393012911598487, "learning_rate": 3.1085158150851584e-06, "loss": 0.7033, "step": 15087 }, { "epoch": 0.44051268576099967, "grad_norm": 0.6967010906697028, "learning_rate": 3.1083536090835364e-06, "loss": 0.6153, "step": 15088 }, { "epoch": 0.44054188198884703, "grad_norm": 0.723356046961662, "learning_rate": 3.1081914030819144e-06, "loss": 0.6613, "step": 15089 }, { "epoch": 0.4405710782166944, "grad_norm": 0.7363523556876435, "learning_rate": 3.1080291970802924e-06, "loss": 0.6578, "step": 15090 }, { "epoch": 0.44060027444454175, "grad_norm": 0.747459571941321, "learning_rate": 3.10786699107867e-06, "loss": 0.6937, "step": 15091 }, { "epoch": 0.4406294706723891, "grad_norm": 0.7649383445039277, "learning_rate": 3.107704785077048e-06, "loss": 0.7254, "step": 15092 }, { "epoch": 0.4406586669002365, "grad_norm": 0.7998296264987773, "learning_rate": 3.107542579075426e-06, "loss": 0.6595, "step": 15093 }, { "epoch": 0.44068786312808383, "grad_norm": 0.758277716269838, "learning_rate": 3.107380373073804e-06, "loss": 0.6601, "step": 15094 }, { "epoch": 0.4407170593559312, "grad_norm": 0.7420781870400871, "learning_rate": 3.1072181670721816e-06, "loss": 0.7051, "step": 15095 }, { "epoch": 0.44074625558377856, "grad_norm": 0.8055877639286957, "learning_rate": 3.1070559610705596e-06, "loss": 0.7679, "step": 15096 }, { "epoch": 0.4407754518116259, "grad_norm": 0.6711372379753798, "learning_rate": 3.1068937550689376e-06, "loss": 0.589, "step": 15097 }, { "epoch": 0.4408046480394733, "grad_norm": 0.7328855007067974, "learning_rate": 3.1067315490673156e-06, "loss": 0.658, "step": 15098 }, { "epoch": 0.44083384426732064, "grad_norm": 0.7541583131546878, "learning_rate": 3.1065693430656937e-06, "loss": 0.6618, "step": 15099 }, { "epoch": 0.440863040495168, "grad_norm": 0.6814194231398158, "learning_rate": 3.1064071370640712e-06, "loss": 0.576, "step": 15100 }, { "epoch": 0.44089223672301536, "grad_norm": 0.6718840124852564, "learning_rate": 3.1062449310624497e-06, "loss": 0.5788, "step": 15101 }, { "epoch": 0.4409214329508627, "grad_norm": 0.7569357466540546, "learning_rate": 3.1060827250608277e-06, "loss": 0.6422, "step": 15102 }, { "epoch": 0.4409506291787101, "grad_norm": 0.802431318895516, "learning_rate": 3.1059205190592057e-06, "loss": 0.8145, "step": 15103 }, { "epoch": 0.44097982540655745, "grad_norm": 0.740299328765487, "learning_rate": 3.1057583130575837e-06, "loss": 0.6748, "step": 15104 }, { "epoch": 0.4410090216344048, "grad_norm": 0.6883711966170989, "learning_rate": 3.1055961070559617e-06, "loss": 0.5815, "step": 15105 }, { "epoch": 0.4410382178622522, "grad_norm": 0.695879210503418, "learning_rate": 3.1054339010543393e-06, "loss": 0.6305, "step": 15106 }, { "epoch": 0.4410674140900996, "grad_norm": 0.7740487130824671, "learning_rate": 3.1052716950527173e-06, "loss": 0.6511, "step": 15107 }, { "epoch": 0.44109661031794695, "grad_norm": 0.7398046587811924, "learning_rate": 3.1051094890510953e-06, "loss": 0.686, "step": 15108 }, { "epoch": 0.4411258065457943, "grad_norm": 0.834718648023361, "learning_rate": 3.1049472830494733e-06, "loss": 0.6688, "step": 15109 }, { "epoch": 0.44115500277364167, "grad_norm": 0.6700829353049741, "learning_rate": 3.104785077047851e-06, "loss": 0.5796, "step": 15110 }, { "epoch": 0.44118419900148903, "grad_norm": 0.8934955553819889, "learning_rate": 3.104622871046229e-06, "loss": 0.6122, "step": 15111 }, { "epoch": 0.4412133952293364, "grad_norm": 0.7034588909239579, "learning_rate": 3.104460665044607e-06, "loss": 0.6707, "step": 15112 }, { "epoch": 0.44124259145718375, "grad_norm": 0.7105576469669733, "learning_rate": 3.104298459042985e-06, "loss": 0.6049, "step": 15113 }, { "epoch": 0.4412717876850311, "grad_norm": 0.7047508428184874, "learning_rate": 3.1041362530413625e-06, "loss": 0.6149, "step": 15114 }, { "epoch": 0.4413009839128785, "grad_norm": 0.7332191879093868, "learning_rate": 3.1039740470397405e-06, "loss": 0.6211, "step": 15115 }, { "epoch": 0.44133018014072584, "grad_norm": 0.7343548575528144, "learning_rate": 3.1038118410381185e-06, "loss": 0.6672, "step": 15116 }, { "epoch": 0.4413593763685732, "grad_norm": 0.7416828395079584, "learning_rate": 3.1036496350364965e-06, "loss": 0.688, "step": 15117 }, { "epoch": 0.44138857259642056, "grad_norm": 0.7255633815614367, "learning_rate": 3.1034874290348745e-06, "loss": 0.5994, "step": 15118 }, { "epoch": 0.4414177688242679, "grad_norm": 0.6971974984028553, "learning_rate": 3.103325223033252e-06, "loss": 0.575, "step": 15119 }, { "epoch": 0.4414469650521153, "grad_norm": 0.700859360348869, "learning_rate": 3.1031630170316305e-06, "loss": 0.6159, "step": 15120 }, { "epoch": 0.44147616127996264, "grad_norm": 0.7578692182192389, "learning_rate": 3.1030008110300085e-06, "loss": 0.7051, "step": 15121 }, { "epoch": 0.44150535750781, "grad_norm": 0.710269903388591, "learning_rate": 3.1028386050283865e-06, "loss": 0.6455, "step": 15122 }, { "epoch": 0.44153455373565736, "grad_norm": 0.7624954300029727, "learning_rate": 3.1026763990267645e-06, "loss": 0.6678, "step": 15123 }, { "epoch": 0.4415637499635047, "grad_norm": 0.8501593801993307, "learning_rate": 3.102514193025142e-06, "loss": 0.8002, "step": 15124 }, { "epoch": 0.4415929461913521, "grad_norm": 0.6912925360189367, "learning_rate": 3.10235198702352e-06, "loss": 0.593, "step": 15125 }, { "epoch": 0.44162214241919945, "grad_norm": 0.6907755147239792, "learning_rate": 3.102189781021898e-06, "loss": 0.5902, "step": 15126 }, { "epoch": 0.4416513386470468, "grad_norm": 0.7210238420412376, "learning_rate": 3.102027575020276e-06, "loss": 0.6214, "step": 15127 }, { "epoch": 0.44168053487489417, "grad_norm": 0.7466827626311995, "learning_rate": 3.101865369018654e-06, "loss": 0.6828, "step": 15128 }, { "epoch": 0.44170973110274153, "grad_norm": 0.69356667964423, "learning_rate": 3.1017031630170317e-06, "loss": 0.5571, "step": 15129 }, { "epoch": 0.4417389273305889, "grad_norm": 0.7056876478898262, "learning_rate": 3.1015409570154097e-06, "loss": 0.6463, "step": 15130 }, { "epoch": 0.44176812355843625, "grad_norm": 0.6954879266711187, "learning_rate": 3.1013787510137878e-06, "loss": 0.632, "step": 15131 }, { "epoch": 0.4417973197862836, "grad_norm": 0.7333914812268476, "learning_rate": 3.1012165450121658e-06, "loss": 0.6583, "step": 15132 }, { "epoch": 0.441826516014131, "grad_norm": 0.6827103930054413, "learning_rate": 3.1010543390105433e-06, "loss": 0.564, "step": 15133 }, { "epoch": 0.44185571224197834, "grad_norm": 0.7065635550842166, "learning_rate": 3.1008921330089214e-06, "loss": 0.6283, "step": 15134 }, { "epoch": 0.4418849084698257, "grad_norm": 0.7392030953995479, "learning_rate": 3.1007299270072994e-06, "loss": 0.6234, "step": 15135 }, { "epoch": 0.44191410469767306, "grad_norm": 0.7318488084389814, "learning_rate": 3.1005677210056774e-06, "loss": 0.6489, "step": 15136 }, { "epoch": 0.4419433009255204, "grad_norm": 0.8340454474381631, "learning_rate": 3.1004055150040554e-06, "loss": 0.7157, "step": 15137 }, { "epoch": 0.4419724971533678, "grad_norm": 0.696509773554071, "learning_rate": 3.100243309002433e-06, "loss": 0.6356, "step": 15138 }, { "epoch": 0.44200169338121514, "grad_norm": 0.7278776479356943, "learning_rate": 3.1000811030008114e-06, "loss": 0.6816, "step": 15139 }, { "epoch": 0.4420308896090625, "grad_norm": 0.7787636936565198, "learning_rate": 3.0999188969991894e-06, "loss": 0.7146, "step": 15140 }, { "epoch": 0.44206008583690987, "grad_norm": 0.8075331448098481, "learning_rate": 3.0997566909975674e-06, "loss": 0.6853, "step": 15141 }, { "epoch": 0.4420892820647572, "grad_norm": 0.7326956367472467, "learning_rate": 3.0995944849959454e-06, "loss": 0.6421, "step": 15142 }, { "epoch": 0.4421184782926046, "grad_norm": 0.6876223028914353, "learning_rate": 3.099432278994323e-06, "loss": 0.6379, "step": 15143 }, { "epoch": 0.44214767452045195, "grad_norm": 0.801238554326792, "learning_rate": 3.099270072992701e-06, "loss": 0.6485, "step": 15144 }, { "epoch": 0.4421768707482993, "grad_norm": 0.7957574325120084, "learning_rate": 3.099107866991079e-06, "loss": 0.7241, "step": 15145 }, { "epoch": 0.44220606697614667, "grad_norm": 0.6984460569484872, "learning_rate": 3.098945660989457e-06, "loss": 0.5901, "step": 15146 }, { "epoch": 0.44223526320399403, "grad_norm": 0.8328760393662232, "learning_rate": 3.098783454987835e-06, "loss": 0.6347, "step": 15147 }, { "epoch": 0.4422644594318414, "grad_norm": 0.7683824937292085, "learning_rate": 3.0986212489862126e-06, "loss": 0.7726, "step": 15148 }, { "epoch": 0.44229365565968876, "grad_norm": 0.7801103178830506, "learning_rate": 3.0984590429845906e-06, "loss": 0.7184, "step": 15149 }, { "epoch": 0.4423228518875361, "grad_norm": 0.714358391191559, "learning_rate": 3.0982968369829686e-06, "loss": 0.6318, "step": 15150 }, { "epoch": 0.4423520481153835, "grad_norm": 0.8007527576782584, "learning_rate": 3.0981346309813466e-06, "loss": 0.574, "step": 15151 }, { "epoch": 0.44238124434323084, "grad_norm": 0.7162070720175232, "learning_rate": 3.097972424979724e-06, "loss": 0.6574, "step": 15152 }, { "epoch": 0.4424104405710782, "grad_norm": 0.7624043076873126, "learning_rate": 3.097810218978102e-06, "loss": 0.6518, "step": 15153 }, { "epoch": 0.44243963679892556, "grad_norm": 0.6963811568801425, "learning_rate": 3.0976480129764802e-06, "loss": 0.6396, "step": 15154 }, { "epoch": 0.4424688330267729, "grad_norm": 0.7011055134830714, "learning_rate": 3.0974858069748582e-06, "loss": 0.6484, "step": 15155 }, { "epoch": 0.4424980292546203, "grad_norm": 0.6982740474171956, "learning_rate": 3.0973236009732362e-06, "loss": 0.6303, "step": 15156 }, { "epoch": 0.44252722548246765, "grad_norm": 0.7345496529061138, "learning_rate": 3.097161394971614e-06, "loss": 0.6418, "step": 15157 }, { "epoch": 0.442556421710315, "grad_norm": 0.7370901817089386, "learning_rate": 3.0969991889699922e-06, "loss": 0.6214, "step": 15158 }, { "epoch": 0.44258561793816237, "grad_norm": 0.7297353362715594, "learning_rate": 3.0968369829683703e-06, "loss": 0.6152, "step": 15159 }, { "epoch": 0.44261481416600973, "grad_norm": 0.834042317716397, "learning_rate": 3.0966747769667483e-06, "loss": 0.613, "step": 15160 }, { "epoch": 0.4426440103938571, "grad_norm": 0.782826802957982, "learning_rate": 3.0965125709651263e-06, "loss": 0.7652, "step": 15161 }, { "epoch": 0.44267320662170445, "grad_norm": 0.7292132232764837, "learning_rate": 3.096350364963504e-06, "loss": 0.6479, "step": 15162 }, { "epoch": 0.4427024028495518, "grad_norm": 0.7068920411321852, "learning_rate": 3.096188158961882e-06, "loss": 0.6397, "step": 15163 }, { "epoch": 0.4427315990773992, "grad_norm": 0.7708389988765578, "learning_rate": 3.09602595296026e-06, "loss": 0.7252, "step": 15164 }, { "epoch": 0.44276079530524654, "grad_norm": 0.7502258192490742, "learning_rate": 3.095863746958638e-06, "loss": 0.7202, "step": 15165 }, { "epoch": 0.44278999153309395, "grad_norm": 1.429898921617408, "learning_rate": 3.095701540957016e-06, "loss": 0.696, "step": 15166 }, { "epoch": 0.4428191877609413, "grad_norm": 0.7116378139022573, "learning_rate": 3.0955393349553935e-06, "loss": 0.6378, "step": 15167 }, { "epoch": 0.4428483839887887, "grad_norm": 0.785115361922866, "learning_rate": 3.0953771289537715e-06, "loss": 0.7419, "step": 15168 }, { "epoch": 0.44287758021663604, "grad_norm": 0.7661449051155111, "learning_rate": 3.0952149229521495e-06, "loss": 0.6465, "step": 15169 }, { "epoch": 0.4429067764444834, "grad_norm": 0.7261070686428936, "learning_rate": 3.0950527169505275e-06, "loss": 0.6169, "step": 15170 }, { "epoch": 0.44293597267233076, "grad_norm": 0.7182555258986209, "learning_rate": 3.094890510948905e-06, "loss": 0.6171, "step": 15171 }, { "epoch": 0.4429651689001781, "grad_norm": 0.7132235751172914, "learning_rate": 3.094728304947283e-06, "loss": 0.6359, "step": 15172 }, { "epoch": 0.4429943651280255, "grad_norm": 0.6953775968918249, "learning_rate": 3.094566098945661e-06, "loss": 0.5988, "step": 15173 }, { "epoch": 0.44302356135587284, "grad_norm": 0.7236328561713606, "learning_rate": 3.094403892944039e-06, "loss": 0.6685, "step": 15174 }, { "epoch": 0.4430527575837202, "grad_norm": 0.7225823611747153, "learning_rate": 3.094241686942417e-06, "loss": 0.6361, "step": 15175 }, { "epoch": 0.44308195381156756, "grad_norm": 0.7204031054295363, "learning_rate": 3.0940794809407947e-06, "loss": 0.5554, "step": 15176 }, { "epoch": 0.4431111500394149, "grad_norm": 0.7367954863154238, "learning_rate": 3.093917274939173e-06, "loss": 0.7382, "step": 15177 }, { "epoch": 0.4431403462672623, "grad_norm": 0.7804591403170257, "learning_rate": 3.093755068937551e-06, "loss": 0.6827, "step": 15178 }, { "epoch": 0.44316954249510965, "grad_norm": 0.7806917532271974, "learning_rate": 3.093592862935929e-06, "loss": 0.6531, "step": 15179 }, { "epoch": 0.443198738722957, "grad_norm": 0.6617226848426891, "learning_rate": 3.093430656934307e-06, "loss": 0.4959, "step": 15180 }, { "epoch": 0.44322793495080437, "grad_norm": 0.7483073993326409, "learning_rate": 3.0932684509326847e-06, "loss": 0.6518, "step": 15181 }, { "epoch": 0.44325713117865173, "grad_norm": 0.6960458294220876, "learning_rate": 3.0931062449310627e-06, "loss": 0.6152, "step": 15182 }, { "epoch": 0.4432863274064991, "grad_norm": 0.762839283368231, "learning_rate": 3.0929440389294407e-06, "loss": 0.6296, "step": 15183 }, { "epoch": 0.44331552363434645, "grad_norm": 0.757040085040882, "learning_rate": 3.0927818329278187e-06, "loss": 0.6966, "step": 15184 }, { "epoch": 0.4433447198621938, "grad_norm": 0.7562438210367883, "learning_rate": 3.0926196269261967e-06, "loss": 0.6712, "step": 15185 }, { "epoch": 0.4433739160900412, "grad_norm": 0.7177886223116552, "learning_rate": 3.0924574209245743e-06, "loss": 0.6685, "step": 15186 }, { "epoch": 0.44340311231788854, "grad_norm": 0.8653334573915289, "learning_rate": 3.0922952149229523e-06, "loss": 0.6847, "step": 15187 }, { "epoch": 0.4434323085457359, "grad_norm": 0.8243091217594976, "learning_rate": 3.0921330089213303e-06, "loss": 0.7902, "step": 15188 }, { "epoch": 0.44346150477358326, "grad_norm": 0.7204275745935248, "learning_rate": 3.0919708029197083e-06, "loss": 0.6645, "step": 15189 }, { "epoch": 0.4434907010014306, "grad_norm": 0.7362067922740335, "learning_rate": 3.091808596918086e-06, "loss": 0.6718, "step": 15190 }, { "epoch": 0.443519897229278, "grad_norm": 0.7671532377497706, "learning_rate": 3.091646390916464e-06, "loss": 0.6751, "step": 15191 }, { "epoch": 0.44354909345712534, "grad_norm": 0.7213072342389825, "learning_rate": 3.091484184914842e-06, "loss": 0.7019, "step": 15192 }, { "epoch": 0.4435782896849727, "grad_norm": 0.7444637253906361, "learning_rate": 3.09132197891322e-06, "loss": 0.6469, "step": 15193 }, { "epoch": 0.44360748591282007, "grad_norm": 0.7885309381045494, "learning_rate": 3.091159772911598e-06, "loss": 0.6774, "step": 15194 }, { "epoch": 0.4436366821406674, "grad_norm": 0.7166158771007306, "learning_rate": 3.0909975669099755e-06, "loss": 0.6212, "step": 15195 }, { "epoch": 0.4436658783685148, "grad_norm": 0.7580928932073691, "learning_rate": 3.090835360908354e-06, "loss": 0.6299, "step": 15196 }, { "epoch": 0.44369507459636215, "grad_norm": 0.6697686328062705, "learning_rate": 3.090673154906732e-06, "loss": 0.6049, "step": 15197 }, { "epoch": 0.4437242708242095, "grad_norm": 0.7019553885051011, "learning_rate": 3.09051094890511e-06, "loss": 0.6378, "step": 15198 }, { "epoch": 0.44375346705205687, "grad_norm": 0.6937106328448954, "learning_rate": 3.090348742903488e-06, "loss": 0.625, "step": 15199 }, { "epoch": 0.44378266327990423, "grad_norm": 0.7570414028778736, "learning_rate": 3.0901865369018656e-06, "loss": 0.6396, "step": 15200 }, { "epoch": 0.4438118595077516, "grad_norm": 0.6988336188802646, "learning_rate": 3.0900243309002436e-06, "loss": 0.5751, "step": 15201 }, { "epoch": 0.44384105573559895, "grad_norm": 0.7635611650847225, "learning_rate": 3.0898621248986216e-06, "loss": 0.7275, "step": 15202 }, { "epoch": 0.4438702519634463, "grad_norm": 0.7820354509047152, "learning_rate": 3.0896999188969996e-06, "loss": 0.692, "step": 15203 }, { "epoch": 0.4438994481912937, "grad_norm": 0.684494476680669, "learning_rate": 3.0895377128953776e-06, "loss": 0.5699, "step": 15204 }, { "epoch": 0.44392864441914104, "grad_norm": 0.7040687477702977, "learning_rate": 3.089375506893755e-06, "loss": 0.6219, "step": 15205 }, { "epoch": 0.4439578406469884, "grad_norm": 0.7606213262915161, "learning_rate": 3.089213300892133e-06, "loss": 0.7172, "step": 15206 }, { "epoch": 0.44398703687483576, "grad_norm": 0.7012696601161362, "learning_rate": 3.089051094890511e-06, "loss": 0.6176, "step": 15207 }, { "epoch": 0.4440162331026831, "grad_norm": 0.6947210041574353, "learning_rate": 3.088888888888889e-06, "loss": 0.6038, "step": 15208 }, { "epoch": 0.4440454293305305, "grad_norm": 0.7081888108879362, "learning_rate": 3.0887266828872668e-06, "loss": 0.631, "step": 15209 }, { "epoch": 0.44407462555837784, "grad_norm": 0.8333402329701556, "learning_rate": 3.0885644768856448e-06, "loss": 0.5692, "step": 15210 }, { "epoch": 0.4441038217862252, "grad_norm": 0.7681443547304703, "learning_rate": 3.088402270884023e-06, "loss": 0.7151, "step": 15211 }, { "epoch": 0.44413301801407257, "grad_norm": 0.7267812261280072, "learning_rate": 3.088240064882401e-06, "loss": 0.6318, "step": 15212 }, { "epoch": 0.44416221424191993, "grad_norm": 0.6788685775490794, "learning_rate": 3.088077858880779e-06, "loss": 0.614, "step": 15213 }, { "epoch": 0.4441914104697673, "grad_norm": 0.7899908637198667, "learning_rate": 3.0879156528791564e-06, "loss": 0.7759, "step": 15214 }, { "epoch": 0.44422060669761465, "grad_norm": 0.7309784250095667, "learning_rate": 3.087753446877535e-06, "loss": 0.6763, "step": 15215 }, { "epoch": 0.444249802925462, "grad_norm": 0.7025203391794468, "learning_rate": 3.087591240875913e-06, "loss": 0.6141, "step": 15216 }, { "epoch": 0.4442789991533094, "grad_norm": 0.7570302252676092, "learning_rate": 3.087429034874291e-06, "loss": 0.6817, "step": 15217 }, { "epoch": 0.44430819538115673, "grad_norm": 0.676275943746932, "learning_rate": 3.087266828872669e-06, "loss": 0.5649, "step": 15218 }, { "epoch": 0.4443373916090041, "grad_norm": 0.769343398746875, "learning_rate": 3.0871046228710464e-06, "loss": 0.7093, "step": 15219 }, { "epoch": 0.44436658783685146, "grad_norm": 0.8072179038632719, "learning_rate": 3.0869424168694244e-06, "loss": 0.6133, "step": 15220 }, { "epoch": 0.4443957840646988, "grad_norm": 0.7025912630847547, "learning_rate": 3.0867802108678024e-06, "loss": 0.6176, "step": 15221 }, { "epoch": 0.4444249802925462, "grad_norm": 0.6869818857879377, "learning_rate": 3.0866180048661804e-06, "loss": 0.5931, "step": 15222 }, { "epoch": 0.44445417652039354, "grad_norm": 0.6371559769590442, "learning_rate": 3.0864557988645585e-06, "loss": 0.4948, "step": 15223 }, { "epoch": 0.4444833727482409, "grad_norm": 0.7777867190086399, "learning_rate": 3.086293592862936e-06, "loss": 0.6911, "step": 15224 }, { "epoch": 0.44451256897608826, "grad_norm": 0.8581718503171197, "learning_rate": 3.086131386861314e-06, "loss": 0.7074, "step": 15225 }, { "epoch": 0.4445417652039356, "grad_norm": 0.6918198482016968, "learning_rate": 3.085969180859692e-06, "loss": 0.6259, "step": 15226 }, { "epoch": 0.44457096143178304, "grad_norm": 0.7537010624421422, "learning_rate": 3.08580697485807e-06, "loss": 0.7091, "step": 15227 }, { "epoch": 0.4446001576596304, "grad_norm": 0.7446854724291821, "learning_rate": 3.0856447688564476e-06, "loss": 0.6598, "step": 15228 }, { "epoch": 0.44462935388747776, "grad_norm": 0.7121498449919968, "learning_rate": 3.0854825628548256e-06, "loss": 0.6606, "step": 15229 }, { "epoch": 0.4446585501153251, "grad_norm": 0.8181400567125776, "learning_rate": 3.0853203568532037e-06, "loss": 0.8161, "step": 15230 }, { "epoch": 0.4446877463431725, "grad_norm": 0.6687326264805955, "learning_rate": 3.0851581508515817e-06, "loss": 0.5619, "step": 15231 }, { "epoch": 0.44471694257101985, "grad_norm": 0.735323171066773, "learning_rate": 3.0849959448499597e-06, "loss": 0.6312, "step": 15232 }, { "epoch": 0.4447461387988672, "grad_norm": 0.715379487381299, "learning_rate": 3.0848337388483372e-06, "loss": 0.6492, "step": 15233 }, { "epoch": 0.44477533502671457, "grad_norm": 0.6826968080102689, "learning_rate": 3.0846715328467157e-06, "loss": 0.5633, "step": 15234 }, { "epoch": 0.44480453125456193, "grad_norm": 0.7001157350599782, "learning_rate": 3.0845093268450937e-06, "loss": 0.5907, "step": 15235 }, { "epoch": 0.4448337274824093, "grad_norm": 0.7541887026794545, "learning_rate": 3.0843471208434717e-06, "loss": 0.6167, "step": 15236 }, { "epoch": 0.44486292371025665, "grad_norm": 0.6728600244347108, "learning_rate": 3.0841849148418497e-06, "loss": 0.6046, "step": 15237 }, { "epoch": 0.444892119938104, "grad_norm": 0.7336649386437565, "learning_rate": 3.0840227088402273e-06, "loss": 0.6064, "step": 15238 }, { "epoch": 0.4449213161659514, "grad_norm": 0.797922308523756, "learning_rate": 3.0838605028386053e-06, "loss": 0.6104, "step": 15239 }, { "epoch": 0.44495051239379874, "grad_norm": 0.7488298721970003, "learning_rate": 3.0836982968369833e-06, "loss": 0.6489, "step": 15240 }, { "epoch": 0.4449797086216461, "grad_norm": 0.7320252448847856, "learning_rate": 3.0835360908353613e-06, "loss": 0.6629, "step": 15241 }, { "epoch": 0.44500890484949346, "grad_norm": 0.781600685334972, "learning_rate": 3.0833738848337393e-06, "loss": 0.7556, "step": 15242 }, { "epoch": 0.4450381010773408, "grad_norm": 0.6956655246572002, "learning_rate": 3.083211678832117e-06, "loss": 0.63, "step": 15243 }, { "epoch": 0.4450672973051882, "grad_norm": 0.7719782956390016, "learning_rate": 3.083049472830495e-06, "loss": 0.734, "step": 15244 }, { "epoch": 0.44509649353303554, "grad_norm": 0.6969821381746553, "learning_rate": 3.082887266828873e-06, "loss": 0.623, "step": 15245 }, { "epoch": 0.4451256897608829, "grad_norm": 0.7753798547656198, "learning_rate": 3.082725060827251e-06, "loss": 0.7004, "step": 15246 }, { "epoch": 0.44515488598873026, "grad_norm": 0.7156188127265158, "learning_rate": 3.0825628548256285e-06, "loss": 0.6081, "step": 15247 }, { "epoch": 0.4451840822165776, "grad_norm": 0.7278375255134074, "learning_rate": 3.0824006488240065e-06, "loss": 0.6204, "step": 15248 }, { "epoch": 0.445213278444425, "grad_norm": 0.7608999184978683, "learning_rate": 3.0822384428223845e-06, "loss": 0.7037, "step": 15249 }, { "epoch": 0.44524247467227235, "grad_norm": 0.7274708296740702, "learning_rate": 3.0820762368207625e-06, "loss": 0.6306, "step": 15250 }, { "epoch": 0.4452716709001197, "grad_norm": 0.8108054191301969, "learning_rate": 3.0819140308191405e-06, "loss": 0.6918, "step": 15251 }, { "epoch": 0.44530086712796707, "grad_norm": 0.7489382630155982, "learning_rate": 3.081751824817518e-06, "loss": 0.6494, "step": 15252 }, { "epoch": 0.44533006335581443, "grad_norm": 0.7380813603284208, "learning_rate": 3.0815896188158965e-06, "loss": 0.6848, "step": 15253 }, { "epoch": 0.4453592595836618, "grad_norm": 0.6877675559487818, "learning_rate": 3.0814274128142745e-06, "loss": 0.6064, "step": 15254 }, { "epoch": 0.44538845581150915, "grad_norm": 0.7285317312214539, "learning_rate": 3.0812652068126526e-06, "loss": 0.6051, "step": 15255 }, { "epoch": 0.4454176520393565, "grad_norm": 0.7353747068911176, "learning_rate": 3.0811030008110306e-06, "loss": 0.6264, "step": 15256 }, { "epoch": 0.4454468482672039, "grad_norm": 0.7535144060622854, "learning_rate": 3.080940794809408e-06, "loss": 0.653, "step": 15257 }, { "epoch": 0.44547604449505124, "grad_norm": 0.7363744848432001, "learning_rate": 3.080778588807786e-06, "loss": 0.6539, "step": 15258 }, { "epoch": 0.4455052407228986, "grad_norm": 0.7918842618385091, "learning_rate": 3.080616382806164e-06, "loss": 0.7518, "step": 15259 }, { "epoch": 0.44553443695074596, "grad_norm": 0.72496332776445, "learning_rate": 3.080454176804542e-06, "loss": 0.663, "step": 15260 }, { "epoch": 0.4455636331785933, "grad_norm": 0.7430631244622125, "learning_rate": 3.08029197080292e-06, "loss": 0.6523, "step": 15261 }, { "epoch": 0.4455928294064407, "grad_norm": 0.7594347129848357, "learning_rate": 3.0801297648012978e-06, "loss": 0.666, "step": 15262 }, { "epoch": 0.44562202563428804, "grad_norm": 0.7651088316211677, "learning_rate": 3.0799675587996758e-06, "loss": 0.7188, "step": 15263 }, { "epoch": 0.4456512218621354, "grad_norm": 0.74946109331289, "learning_rate": 3.0798053527980538e-06, "loss": 0.6318, "step": 15264 }, { "epoch": 0.44568041808998277, "grad_norm": 0.675999417015553, "learning_rate": 3.0796431467964318e-06, "loss": 0.5289, "step": 15265 }, { "epoch": 0.4457096143178301, "grad_norm": 0.7116259419430563, "learning_rate": 3.0794809407948094e-06, "loss": 0.6042, "step": 15266 }, { "epoch": 0.4457388105456775, "grad_norm": 0.7285569250908127, "learning_rate": 3.0793187347931874e-06, "loss": 0.6133, "step": 15267 }, { "epoch": 0.44576800677352485, "grad_norm": 0.7323602220543043, "learning_rate": 3.0791565287915654e-06, "loss": 0.6258, "step": 15268 }, { "epoch": 0.4457972030013722, "grad_norm": 0.7059295465308353, "learning_rate": 3.0789943227899434e-06, "loss": 0.6444, "step": 15269 }, { "epoch": 0.44582639922921957, "grad_norm": 0.734412158506446, "learning_rate": 3.078832116788321e-06, "loss": 0.6788, "step": 15270 }, { "epoch": 0.44585559545706693, "grad_norm": 0.7324400669474642, "learning_rate": 3.0786699107867e-06, "loss": 0.6807, "step": 15271 }, { "epoch": 0.4458847916849143, "grad_norm": 0.7016990460218817, "learning_rate": 3.0785077047850774e-06, "loss": 0.6036, "step": 15272 }, { "epoch": 0.44591398791276166, "grad_norm": 0.7093722126010152, "learning_rate": 3.0783454987834554e-06, "loss": 0.6517, "step": 15273 }, { "epoch": 0.445943184140609, "grad_norm": 0.7227064876757596, "learning_rate": 3.0781832927818334e-06, "loss": 0.612, "step": 15274 }, { "epoch": 0.4459723803684564, "grad_norm": 0.6851740870128977, "learning_rate": 3.0780210867802114e-06, "loss": 0.5991, "step": 15275 }, { "epoch": 0.44600157659630374, "grad_norm": 0.7864712307462818, "learning_rate": 3.077858880778589e-06, "loss": 0.6481, "step": 15276 }, { "epoch": 0.4460307728241511, "grad_norm": 0.7384536168956555, "learning_rate": 3.077696674776967e-06, "loss": 0.6916, "step": 15277 }, { "epoch": 0.44605996905199846, "grad_norm": 0.7097938586832706, "learning_rate": 3.077534468775345e-06, "loss": 0.6112, "step": 15278 }, { "epoch": 0.4460891652798458, "grad_norm": 0.773431699152209, "learning_rate": 3.077372262773723e-06, "loss": 0.6607, "step": 15279 }, { "epoch": 0.4461183615076932, "grad_norm": 0.9050098677287851, "learning_rate": 3.077210056772101e-06, "loss": 0.6309, "step": 15280 }, { "epoch": 0.44614755773554055, "grad_norm": 0.8301365678156114, "learning_rate": 3.0770478507704786e-06, "loss": 0.6515, "step": 15281 }, { "epoch": 0.4461767539633879, "grad_norm": 0.8435941833369711, "learning_rate": 3.0768856447688566e-06, "loss": 0.7978, "step": 15282 }, { "epoch": 0.44620595019123527, "grad_norm": 0.7111158368728172, "learning_rate": 3.0767234387672346e-06, "loss": 0.6413, "step": 15283 }, { "epoch": 0.44623514641908263, "grad_norm": 0.7483905598784965, "learning_rate": 3.0765612327656126e-06, "loss": 0.7472, "step": 15284 }, { "epoch": 0.44626434264693, "grad_norm": 0.7324231376599334, "learning_rate": 3.0763990267639902e-06, "loss": 0.6662, "step": 15285 }, { "epoch": 0.44629353887477735, "grad_norm": 0.7285575087052589, "learning_rate": 3.0762368207623682e-06, "loss": 0.6173, "step": 15286 }, { "epoch": 0.44632273510262477, "grad_norm": 0.7220744258477971, "learning_rate": 3.0760746147607462e-06, "loss": 0.7006, "step": 15287 }, { "epoch": 0.44635193133047213, "grad_norm": 0.7225789050004785, "learning_rate": 3.0759124087591242e-06, "loss": 0.6465, "step": 15288 }, { "epoch": 0.4463811275583195, "grad_norm": 0.7390943583711892, "learning_rate": 3.075750202757502e-06, "loss": 0.6608, "step": 15289 }, { "epoch": 0.44641032378616685, "grad_norm": 0.6889838716821431, "learning_rate": 3.0755879967558807e-06, "loss": 0.6137, "step": 15290 }, { "epoch": 0.4464395200140142, "grad_norm": 0.715771844298602, "learning_rate": 3.0754257907542583e-06, "loss": 0.5949, "step": 15291 }, { "epoch": 0.4464687162418616, "grad_norm": 0.6921746371253907, "learning_rate": 3.0752635847526363e-06, "loss": 0.6098, "step": 15292 }, { "epoch": 0.44649791246970894, "grad_norm": 0.738964943677618, "learning_rate": 3.0751013787510143e-06, "loss": 0.7045, "step": 15293 }, { "epoch": 0.4465271086975563, "grad_norm": 0.7710518369495533, "learning_rate": 3.0749391727493923e-06, "loss": 0.7278, "step": 15294 }, { "epoch": 0.44655630492540366, "grad_norm": 0.752953839800667, "learning_rate": 3.07477696674777e-06, "loss": 0.6708, "step": 15295 }, { "epoch": 0.446585501153251, "grad_norm": 0.7456376462758548, "learning_rate": 3.074614760746148e-06, "loss": 0.669, "step": 15296 }, { "epoch": 0.4466146973810984, "grad_norm": 0.8083341661403894, "learning_rate": 3.074452554744526e-06, "loss": 0.7196, "step": 15297 }, { "epoch": 0.44664389360894574, "grad_norm": 0.7003229648420747, "learning_rate": 3.074290348742904e-06, "loss": 0.6338, "step": 15298 }, { "epoch": 0.4466730898367931, "grad_norm": 0.7036787880956737, "learning_rate": 3.074128142741282e-06, "loss": 0.6339, "step": 15299 }, { "epoch": 0.44670228606464046, "grad_norm": 0.6952956739700684, "learning_rate": 3.0739659367396595e-06, "loss": 0.5914, "step": 15300 }, { "epoch": 0.4467314822924878, "grad_norm": 0.6863129527120032, "learning_rate": 3.0738037307380375e-06, "loss": 0.6056, "step": 15301 }, { "epoch": 0.4467606785203352, "grad_norm": 0.6849714596638684, "learning_rate": 3.0736415247364155e-06, "loss": 0.551, "step": 15302 }, { "epoch": 0.44678987474818255, "grad_norm": 0.7401547473979254, "learning_rate": 3.0734793187347935e-06, "loss": 0.6531, "step": 15303 }, { "epoch": 0.4468190709760299, "grad_norm": 0.6911214833187572, "learning_rate": 3.073317112733171e-06, "loss": 0.6176, "step": 15304 }, { "epoch": 0.44684826720387727, "grad_norm": 0.7806870362114414, "learning_rate": 3.073154906731549e-06, "loss": 0.7475, "step": 15305 }, { "epoch": 0.44687746343172463, "grad_norm": 0.7785528594434705, "learning_rate": 3.072992700729927e-06, "loss": 0.6288, "step": 15306 }, { "epoch": 0.446906659659572, "grad_norm": 0.6998939695508071, "learning_rate": 3.072830494728305e-06, "loss": 0.6124, "step": 15307 }, { "epoch": 0.44693585588741935, "grad_norm": 0.9171186852446489, "learning_rate": 3.0726682887266827e-06, "loss": 0.6744, "step": 15308 }, { "epoch": 0.4469650521152667, "grad_norm": 0.7829488395996054, "learning_rate": 3.0725060827250615e-06, "loss": 0.7177, "step": 15309 }, { "epoch": 0.4469942483431141, "grad_norm": 0.8982887703348599, "learning_rate": 3.072343876723439e-06, "loss": 0.6931, "step": 15310 }, { "epoch": 0.44702344457096144, "grad_norm": 0.6996782855393687, "learning_rate": 3.072181670721817e-06, "loss": 0.5988, "step": 15311 }, { "epoch": 0.4470526407988088, "grad_norm": 0.7727271163144345, "learning_rate": 3.072019464720195e-06, "loss": 0.7516, "step": 15312 }, { "epoch": 0.44708183702665616, "grad_norm": 0.7354879133568565, "learning_rate": 3.071857258718573e-06, "loss": 0.6515, "step": 15313 }, { "epoch": 0.4471110332545035, "grad_norm": 0.7405404377083183, "learning_rate": 3.0716950527169507e-06, "loss": 0.6754, "step": 15314 }, { "epoch": 0.4471402294823509, "grad_norm": 0.6760720617396466, "learning_rate": 3.0715328467153287e-06, "loss": 0.5564, "step": 15315 }, { "epoch": 0.44716942571019824, "grad_norm": 0.7162010801787178, "learning_rate": 3.0713706407137067e-06, "loss": 0.634, "step": 15316 }, { "epoch": 0.4471986219380456, "grad_norm": 0.7632597028619387, "learning_rate": 3.0712084347120847e-06, "loss": 0.7058, "step": 15317 }, { "epoch": 0.44722781816589297, "grad_norm": 0.6858662190523616, "learning_rate": 3.0710462287104627e-06, "loss": 0.5994, "step": 15318 }, { "epoch": 0.4472570143937403, "grad_norm": 0.6951834153362197, "learning_rate": 3.0708840227088403e-06, "loss": 0.6005, "step": 15319 }, { "epoch": 0.4472862106215877, "grad_norm": 0.7283659328705631, "learning_rate": 3.0707218167072183e-06, "loss": 0.6169, "step": 15320 }, { "epoch": 0.44731540684943505, "grad_norm": 0.7171682284862947, "learning_rate": 3.0705596107055963e-06, "loss": 0.5611, "step": 15321 }, { "epoch": 0.4473446030772824, "grad_norm": 0.6678144402835261, "learning_rate": 3.0703974047039743e-06, "loss": 0.5316, "step": 15322 }, { "epoch": 0.44737379930512977, "grad_norm": 0.7425052957417955, "learning_rate": 3.070235198702352e-06, "loss": 0.6685, "step": 15323 }, { "epoch": 0.44740299553297713, "grad_norm": 0.6900563917644219, "learning_rate": 3.07007299270073e-06, "loss": 0.5866, "step": 15324 }, { "epoch": 0.4474321917608245, "grad_norm": 0.7375633679299308, "learning_rate": 3.069910786699108e-06, "loss": 0.6689, "step": 15325 }, { "epoch": 0.44746138798867185, "grad_norm": 0.7089732398528908, "learning_rate": 3.069748580697486e-06, "loss": 0.6015, "step": 15326 }, { "epoch": 0.4474905842165192, "grad_norm": 0.714984044332967, "learning_rate": 3.0695863746958635e-06, "loss": 0.6133, "step": 15327 }, { "epoch": 0.4475197804443666, "grad_norm": 0.7707296504996365, "learning_rate": 3.0694241686942424e-06, "loss": 0.6869, "step": 15328 }, { "epoch": 0.44754897667221394, "grad_norm": 0.7074651186679077, "learning_rate": 3.06926196269262e-06, "loss": 0.6846, "step": 15329 }, { "epoch": 0.4475781729000613, "grad_norm": 0.7824877776569176, "learning_rate": 3.069099756690998e-06, "loss": 0.7081, "step": 15330 }, { "epoch": 0.44760736912790866, "grad_norm": 0.7825967320118566, "learning_rate": 3.068937550689376e-06, "loss": 0.6968, "step": 15331 }, { "epoch": 0.447636565355756, "grad_norm": 0.6702904959930708, "learning_rate": 3.068775344687754e-06, "loss": 0.5971, "step": 15332 }, { "epoch": 0.4476657615836034, "grad_norm": 0.7308506902549654, "learning_rate": 3.0686131386861316e-06, "loss": 0.6528, "step": 15333 }, { "epoch": 0.44769495781145074, "grad_norm": 0.7262629979749822, "learning_rate": 3.0684509326845096e-06, "loss": 0.6776, "step": 15334 }, { "epoch": 0.4477241540392981, "grad_norm": 0.7058183422271975, "learning_rate": 3.0682887266828876e-06, "loss": 0.6543, "step": 15335 }, { "epoch": 0.44775335026714547, "grad_norm": 0.7489616807104953, "learning_rate": 3.0681265206812656e-06, "loss": 0.6705, "step": 15336 }, { "epoch": 0.44778254649499283, "grad_norm": 0.8149528209613058, "learning_rate": 3.0679643146796436e-06, "loss": 0.6781, "step": 15337 }, { "epoch": 0.4478117427228402, "grad_norm": 0.6959620987457006, "learning_rate": 3.067802108678021e-06, "loss": 0.6209, "step": 15338 }, { "epoch": 0.44784093895068755, "grad_norm": 0.7361707068398409, "learning_rate": 3.067639902676399e-06, "loss": 0.7072, "step": 15339 }, { "epoch": 0.4478701351785349, "grad_norm": 0.7307782987763113, "learning_rate": 3.067477696674777e-06, "loss": 0.6417, "step": 15340 }, { "epoch": 0.4478993314063823, "grad_norm": 0.7878963467751398, "learning_rate": 3.067315490673155e-06, "loss": 0.7242, "step": 15341 }, { "epoch": 0.44792852763422963, "grad_norm": 0.6201922681644533, "learning_rate": 3.067153284671533e-06, "loss": 0.5086, "step": 15342 }, { "epoch": 0.447957723862077, "grad_norm": 0.7568159279166845, "learning_rate": 3.066991078669911e-06, "loss": 0.6662, "step": 15343 }, { "epoch": 0.44798692008992436, "grad_norm": 0.7408308912437004, "learning_rate": 3.066828872668289e-06, "loss": 0.6927, "step": 15344 }, { "epoch": 0.4480161163177717, "grad_norm": 0.7437901823537116, "learning_rate": 3.066666666666667e-06, "loss": 0.6781, "step": 15345 }, { "epoch": 0.4480453125456191, "grad_norm": 0.7705113249738049, "learning_rate": 3.0665044606650444e-06, "loss": 0.7125, "step": 15346 }, { "epoch": 0.4480745087734665, "grad_norm": 0.697040910708647, "learning_rate": 3.0663422546634232e-06, "loss": 0.6011, "step": 15347 }, { "epoch": 0.44810370500131386, "grad_norm": 0.7533943184733981, "learning_rate": 3.066180048661801e-06, "loss": 0.6939, "step": 15348 }, { "epoch": 0.4481329012291612, "grad_norm": 0.7362273572078198, "learning_rate": 3.066017842660179e-06, "loss": 0.6318, "step": 15349 }, { "epoch": 0.4481620974570086, "grad_norm": 0.721298056236963, "learning_rate": 3.065855636658557e-06, "loss": 0.6127, "step": 15350 }, { "epoch": 0.44819129368485594, "grad_norm": 0.830723336147422, "learning_rate": 3.065693430656935e-06, "loss": 0.74, "step": 15351 }, { "epoch": 0.4482204899127033, "grad_norm": 0.7861735113315151, "learning_rate": 3.0655312246553124e-06, "loss": 0.7422, "step": 15352 }, { "epoch": 0.44824968614055066, "grad_norm": 0.7933392699267403, "learning_rate": 3.0653690186536904e-06, "loss": 0.6581, "step": 15353 }, { "epoch": 0.448278882368398, "grad_norm": 0.7719312139052773, "learning_rate": 3.0652068126520684e-06, "loss": 0.5894, "step": 15354 }, { "epoch": 0.4483080785962454, "grad_norm": 0.7283458624669769, "learning_rate": 3.0650446066504465e-06, "loss": 0.647, "step": 15355 }, { "epoch": 0.44833727482409275, "grad_norm": 0.7266853900816772, "learning_rate": 3.0648824006488245e-06, "loss": 0.6098, "step": 15356 }, { "epoch": 0.4483664710519401, "grad_norm": 0.9919181248958404, "learning_rate": 3.064720194647202e-06, "loss": 0.759, "step": 15357 }, { "epoch": 0.44839566727978747, "grad_norm": 0.6742468467891742, "learning_rate": 3.06455798864558e-06, "loss": 0.5599, "step": 15358 }, { "epoch": 0.44842486350763483, "grad_norm": 0.7149053538642135, "learning_rate": 3.064395782643958e-06, "loss": 0.6056, "step": 15359 }, { "epoch": 0.4484540597354822, "grad_norm": 0.7666697719889626, "learning_rate": 3.064233576642336e-06, "loss": 0.7406, "step": 15360 }, { "epoch": 0.44848325596332955, "grad_norm": 0.7806399918708935, "learning_rate": 3.0640713706407136e-06, "loss": 0.6372, "step": 15361 }, { "epoch": 0.4485124521911769, "grad_norm": 0.7253037544104529, "learning_rate": 3.0639091646390917e-06, "loss": 0.6628, "step": 15362 }, { "epoch": 0.4485416484190243, "grad_norm": 0.7626090287195654, "learning_rate": 3.0637469586374697e-06, "loss": 0.6892, "step": 15363 }, { "epoch": 0.44857084464687164, "grad_norm": 0.7069899848729597, "learning_rate": 3.0635847526358477e-06, "loss": 0.6582, "step": 15364 }, { "epoch": 0.448600040874719, "grad_norm": 0.684303013000437, "learning_rate": 3.0634225466342253e-06, "loss": 0.5519, "step": 15365 }, { "epoch": 0.44862923710256636, "grad_norm": 0.7608264256312253, "learning_rate": 3.063260340632604e-06, "loss": 0.6541, "step": 15366 }, { "epoch": 0.4486584333304137, "grad_norm": 0.7267713454484248, "learning_rate": 3.0630981346309817e-06, "loss": 0.6829, "step": 15367 }, { "epoch": 0.4486876295582611, "grad_norm": 0.7329883909710676, "learning_rate": 3.0629359286293597e-06, "loss": 0.5947, "step": 15368 }, { "epoch": 0.44871682578610844, "grad_norm": 0.7877625765662065, "learning_rate": 3.0627737226277377e-06, "loss": 0.6848, "step": 15369 }, { "epoch": 0.4487460220139558, "grad_norm": 0.7345528069468894, "learning_rate": 3.0626115166261157e-06, "loss": 0.654, "step": 15370 }, { "epoch": 0.44877521824180316, "grad_norm": 0.7875707742886837, "learning_rate": 3.0624493106244933e-06, "loss": 0.6733, "step": 15371 }, { "epoch": 0.4488044144696505, "grad_norm": 0.7375489297094617, "learning_rate": 3.0622871046228713e-06, "loss": 0.6617, "step": 15372 }, { "epoch": 0.4488336106974979, "grad_norm": 0.7016372157315232, "learning_rate": 3.0621248986212493e-06, "loss": 0.627, "step": 15373 }, { "epoch": 0.44886280692534525, "grad_norm": 0.7343597441537315, "learning_rate": 3.0619626926196273e-06, "loss": 0.6347, "step": 15374 }, { "epoch": 0.4488920031531926, "grad_norm": 0.9716045620741663, "learning_rate": 3.061800486618005e-06, "loss": 0.7062, "step": 15375 }, { "epoch": 0.44892119938103997, "grad_norm": 0.7630983090423148, "learning_rate": 3.061638280616383e-06, "loss": 0.6917, "step": 15376 }, { "epoch": 0.44895039560888733, "grad_norm": 0.768907740407164, "learning_rate": 3.061476074614761e-06, "loss": 0.6793, "step": 15377 }, { "epoch": 0.4489795918367347, "grad_norm": 0.7170824690549772, "learning_rate": 3.061313868613139e-06, "loss": 0.5938, "step": 15378 }, { "epoch": 0.44900878806458205, "grad_norm": 0.7448322080171789, "learning_rate": 3.061151662611517e-06, "loss": 0.5817, "step": 15379 }, { "epoch": 0.4490379842924294, "grad_norm": 0.7385695209215343, "learning_rate": 3.0609894566098945e-06, "loss": 0.6628, "step": 15380 }, { "epoch": 0.4490671805202768, "grad_norm": 0.7172879815611112, "learning_rate": 3.0608272506082725e-06, "loss": 0.6315, "step": 15381 }, { "epoch": 0.44909637674812414, "grad_norm": 0.772804345664138, "learning_rate": 3.0606650446066505e-06, "loss": 0.726, "step": 15382 }, { "epoch": 0.4491255729759715, "grad_norm": 0.6992433221176089, "learning_rate": 3.0605028386050285e-06, "loss": 0.6135, "step": 15383 }, { "epoch": 0.44915476920381886, "grad_norm": 0.7139101711447853, "learning_rate": 3.060340632603406e-06, "loss": 0.6241, "step": 15384 }, { "epoch": 0.4491839654316662, "grad_norm": 0.7310365005437732, "learning_rate": 3.060178426601785e-06, "loss": 0.65, "step": 15385 }, { "epoch": 0.4492131616595136, "grad_norm": 0.736725607270792, "learning_rate": 3.0600162206001625e-06, "loss": 0.6618, "step": 15386 }, { "epoch": 0.44924235788736094, "grad_norm": 0.7424625710437533, "learning_rate": 3.0598540145985406e-06, "loss": 0.6364, "step": 15387 }, { "epoch": 0.4492715541152083, "grad_norm": 0.7567007824523584, "learning_rate": 3.0596918085969186e-06, "loss": 0.7067, "step": 15388 }, { "epoch": 0.44930075034305567, "grad_norm": 0.6968779172671435, "learning_rate": 3.0595296025952966e-06, "loss": 0.6173, "step": 15389 }, { "epoch": 0.449329946570903, "grad_norm": 0.7231964495353058, "learning_rate": 3.059367396593674e-06, "loss": 0.6582, "step": 15390 }, { "epoch": 0.4493591427987504, "grad_norm": 0.8102888598621305, "learning_rate": 3.059205190592052e-06, "loss": 0.7393, "step": 15391 }, { "epoch": 0.44938833902659775, "grad_norm": 0.7861279181543562, "learning_rate": 3.05904298459043e-06, "loss": 0.6585, "step": 15392 }, { "epoch": 0.4494175352544451, "grad_norm": 0.7382713592136708, "learning_rate": 3.058880778588808e-06, "loss": 0.6166, "step": 15393 }, { "epoch": 0.44944673148229247, "grad_norm": 0.8081079682395095, "learning_rate": 3.0587185725871858e-06, "loss": 0.6507, "step": 15394 }, { "epoch": 0.44947592771013983, "grad_norm": 0.7347100958544346, "learning_rate": 3.0585563665855638e-06, "loss": 0.6597, "step": 15395 }, { "epoch": 0.4495051239379872, "grad_norm": 0.743197800320193, "learning_rate": 3.0583941605839418e-06, "loss": 0.6733, "step": 15396 }, { "epoch": 0.44953432016583456, "grad_norm": 0.7755005597086833, "learning_rate": 3.0582319545823198e-06, "loss": 0.7112, "step": 15397 }, { "epoch": 0.4495635163936819, "grad_norm": 0.8502690535853237, "learning_rate": 3.0580697485806978e-06, "loss": 0.6356, "step": 15398 }, { "epoch": 0.4495927126215293, "grad_norm": 0.7013555946722011, "learning_rate": 3.0579075425790754e-06, "loss": 0.6051, "step": 15399 }, { "epoch": 0.44962190884937664, "grad_norm": 0.678574181342333, "learning_rate": 3.0577453365774534e-06, "loss": 0.5954, "step": 15400 }, { "epoch": 0.449651105077224, "grad_norm": 0.7099964925971726, "learning_rate": 3.0575831305758314e-06, "loss": 0.6228, "step": 15401 }, { "epoch": 0.44968030130507136, "grad_norm": 0.6944422584641179, "learning_rate": 3.0574209245742094e-06, "loss": 0.6082, "step": 15402 }, { "epoch": 0.4497094975329187, "grad_norm": 0.7474490958186777, "learning_rate": 3.057258718572587e-06, "loss": 0.6921, "step": 15403 }, { "epoch": 0.4497386937607661, "grad_norm": 0.7925430215221732, "learning_rate": 3.057096512570966e-06, "loss": 0.6791, "step": 15404 }, { "epoch": 0.44976788998861345, "grad_norm": 0.7068700676858556, "learning_rate": 3.0569343065693434e-06, "loss": 0.6084, "step": 15405 }, { "epoch": 0.4497970862164608, "grad_norm": 0.7097736728559179, "learning_rate": 3.0567721005677214e-06, "loss": 0.649, "step": 15406 }, { "epoch": 0.4498262824443082, "grad_norm": 0.734699875635188, "learning_rate": 3.0566098945660994e-06, "loss": 0.6448, "step": 15407 }, { "epoch": 0.4498554786721556, "grad_norm": 0.8196009763850853, "learning_rate": 3.0564476885644774e-06, "loss": 0.7354, "step": 15408 }, { "epoch": 0.44988467490000295, "grad_norm": 0.7478225294560703, "learning_rate": 3.056285482562855e-06, "loss": 0.634, "step": 15409 }, { "epoch": 0.4499138711278503, "grad_norm": 0.7741175684506406, "learning_rate": 3.056123276561233e-06, "loss": 0.6994, "step": 15410 }, { "epoch": 0.44994306735569767, "grad_norm": 0.7038000267373498, "learning_rate": 3.055961070559611e-06, "loss": 0.6619, "step": 15411 }, { "epoch": 0.44997226358354503, "grad_norm": 0.7282924985613688, "learning_rate": 3.055798864557989e-06, "loss": 0.6647, "step": 15412 }, { "epoch": 0.4500014598113924, "grad_norm": 0.6591726158991319, "learning_rate": 3.0556366585563666e-06, "loss": 0.5475, "step": 15413 }, { "epoch": 0.45003065603923975, "grad_norm": 0.7417069604697231, "learning_rate": 3.0554744525547446e-06, "loss": 0.7091, "step": 15414 }, { "epoch": 0.4500598522670871, "grad_norm": 0.7414549093237436, "learning_rate": 3.0553122465531226e-06, "loss": 0.7196, "step": 15415 }, { "epoch": 0.4500890484949345, "grad_norm": 0.7715257779937724, "learning_rate": 3.0551500405515006e-06, "loss": 0.7136, "step": 15416 }, { "epoch": 0.45011824472278184, "grad_norm": 0.69516751251194, "learning_rate": 3.0549878345498786e-06, "loss": 0.5992, "step": 15417 }, { "epoch": 0.4501474409506292, "grad_norm": 0.7231451387486173, "learning_rate": 3.0548256285482562e-06, "loss": 0.6882, "step": 15418 }, { "epoch": 0.45017663717847656, "grad_norm": 0.7460689899220769, "learning_rate": 3.0546634225466342e-06, "loss": 0.6575, "step": 15419 }, { "epoch": 0.4502058334063239, "grad_norm": 0.7034568424046959, "learning_rate": 3.0545012165450122e-06, "loss": 0.6123, "step": 15420 }, { "epoch": 0.4502350296341713, "grad_norm": 0.7307637885309735, "learning_rate": 3.0543390105433902e-06, "loss": 0.6815, "step": 15421 }, { "epoch": 0.45026422586201864, "grad_norm": 0.7301536500645169, "learning_rate": 3.0541768045417687e-06, "loss": 0.6642, "step": 15422 }, { "epoch": 0.450293422089866, "grad_norm": 0.8029411608383432, "learning_rate": 3.0540145985401467e-06, "loss": 0.8057, "step": 15423 }, { "epoch": 0.45032261831771336, "grad_norm": 0.7252069608510625, "learning_rate": 3.0538523925385243e-06, "loss": 0.6661, "step": 15424 }, { "epoch": 0.4503518145455607, "grad_norm": 0.7301843682458763, "learning_rate": 3.0536901865369023e-06, "loss": 0.6347, "step": 15425 }, { "epoch": 0.4503810107734081, "grad_norm": 0.7220953631955517, "learning_rate": 3.0535279805352803e-06, "loss": 0.6356, "step": 15426 }, { "epoch": 0.45041020700125545, "grad_norm": 0.817929191177949, "learning_rate": 3.0533657745336583e-06, "loss": 0.8039, "step": 15427 }, { "epoch": 0.4504394032291028, "grad_norm": 0.7357789100075964, "learning_rate": 3.053203568532036e-06, "loss": 0.6002, "step": 15428 }, { "epoch": 0.45046859945695017, "grad_norm": 0.7318724367509276, "learning_rate": 3.053041362530414e-06, "loss": 0.6017, "step": 15429 }, { "epoch": 0.45049779568479753, "grad_norm": 0.7719296513430193, "learning_rate": 3.052879156528792e-06, "loss": 0.7114, "step": 15430 }, { "epoch": 0.4505269919126449, "grad_norm": 0.7092886867394287, "learning_rate": 3.05271695052717e-06, "loss": 0.6448, "step": 15431 }, { "epoch": 0.45055618814049225, "grad_norm": 0.6828371808545493, "learning_rate": 3.0525547445255475e-06, "loss": 0.592, "step": 15432 }, { "epoch": 0.4505853843683396, "grad_norm": 0.7194512217805481, "learning_rate": 3.0523925385239255e-06, "loss": 0.6641, "step": 15433 }, { "epoch": 0.450614580596187, "grad_norm": 0.8194785364769834, "learning_rate": 3.0522303325223035e-06, "loss": 0.7535, "step": 15434 }, { "epoch": 0.45064377682403434, "grad_norm": 0.7115935031584087, "learning_rate": 3.0520681265206815e-06, "loss": 0.5999, "step": 15435 }, { "epoch": 0.4506729730518817, "grad_norm": 0.6992493082172478, "learning_rate": 3.0519059205190595e-06, "loss": 0.5988, "step": 15436 }, { "epoch": 0.45070216927972906, "grad_norm": 0.6837217229323328, "learning_rate": 3.051743714517437e-06, "loss": 0.6086, "step": 15437 }, { "epoch": 0.4507313655075764, "grad_norm": 0.7797107616065454, "learning_rate": 3.051581508515815e-06, "loss": 0.6801, "step": 15438 }, { "epoch": 0.4507605617354238, "grad_norm": 0.6762287273155653, "learning_rate": 3.051419302514193e-06, "loss": 0.5653, "step": 15439 }, { "epoch": 0.45078975796327114, "grad_norm": 0.6766753645889423, "learning_rate": 3.051257096512571e-06, "loss": 0.5747, "step": 15440 }, { "epoch": 0.4508189541911185, "grad_norm": 0.7277543358538082, "learning_rate": 3.0510948905109495e-06, "loss": 0.5808, "step": 15441 }, { "epoch": 0.45084815041896587, "grad_norm": 0.7455147484960247, "learning_rate": 3.0509326845093275e-06, "loss": 0.6988, "step": 15442 }, { "epoch": 0.4508773466468132, "grad_norm": 0.6948503787787613, "learning_rate": 3.050770478507705e-06, "loss": 0.5845, "step": 15443 }, { "epoch": 0.4509065428746606, "grad_norm": 0.7035682121705329, "learning_rate": 3.050608272506083e-06, "loss": 0.6032, "step": 15444 }, { "epoch": 0.45093573910250795, "grad_norm": 0.6895936944886372, "learning_rate": 3.050446066504461e-06, "loss": 0.6013, "step": 15445 }, { "epoch": 0.4509649353303553, "grad_norm": 0.7124265065481673, "learning_rate": 3.050283860502839e-06, "loss": 0.6352, "step": 15446 }, { "epoch": 0.45099413155820267, "grad_norm": 0.8042712248482325, "learning_rate": 3.0501216545012167e-06, "loss": 0.6053, "step": 15447 }, { "epoch": 0.45102332778605003, "grad_norm": 0.7504716050740498, "learning_rate": 3.0499594484995947e-06, "loss": 0.6891, "step": 15448 }, { "epoch": 0.4510525240138974, "grad_norm": 0.7247999305296907, "learning_rate": 3.0497972424979727e-06, "loss": 0.6618, "step": 15449 }, { "epoch": 0.45108172024174475, "grad_norm": 0.7042173692117988, "learning_rate": 3.0496350364963507e-06, "loss": 0.6616, "step": 15450 }, { "epoch": 0.4511109164695921, "grad_norm": 0.7068443909220358, "learning_rate": 3.0494728304947283e-06, "loss": 0.5765, "step": 15451 }, { "epoch": 0.4511401126974395, "grad_norm": 0.707469517422104, "learning_rate": 3.0493106244931063e-06, "loss": 0.6558, "step": 15452 }, { "epoch": 0.45116930892528684, "grad_norm": 0.7998009993453479, "learning_rate": 3.0491484184914843e-06, "loss": 0.7127, "step": 15453 }, { "epoch": 0.4511985051531342, "grad_norm": 0.6812864824864043, "learning_rate": 3.0489862124898624e-06, "loss": 0.5575, "step": 15454 }, { "epoch": 0.45122770138098156, "grad_norm": 0.8063077191251541, "learning_rate": 3.0488240064882404e-06, "loss": 0.6454, "step": 15455 }, { "epoch": 0.4512568976088289, "grad_norm": 0.7172445470861936, "learning_rate": 3.048661800486618e-06, "loss": 0.6532, "step": 15456 }, { "epoch": 0.4512860938366763, "grad_norm": 0.7459239294432755, "learning_rate": 3.048499594484996e-06, "loss": 0.6965, "step": 15457 }, { "epoch": 0.45131529006452364, "grad_norm": 0.7266329449362463, "learning_rate": 3.048337388483374e-06, "loss": 0.6659, "step": 15458 }, { "epoch": 0.451344486292371, "grad_norm": 0.8891156376558393, "learning_rate": 3.048175182481752e-06, "loss": 0.7395, "step": 15459 }, { "epoch": 0.45137368252021837, "grad_norm": 0.6954297745207143, "learning_rate": 3.0480129764801304e-06, "loss": 0.6081, "step": 15460 }, { "epoch": 0.45140287874806573, "grad_norm": 0.7502684589814057, "learning_rate": 3.0478507704785084e-06, "loss": 0.6592, "step": 15461 }, { "epoch": 0.4514320749759131, "grad_norm": 0.7721221442987967, "learning_rate": 3.047688564476886e-06, "loss": 0.594, "step": 15462 }, { "epoch": 0.45146127120376045, "grad_norm": 0.6638091270115424, "learning_rate": 3.047526358475264e-06, "loss": 0.5236, "step": 15463 }, { "epoch": 0.4514904674316078, "grad_norm": 0.7864654833617897, "learning_rate": 3.047364152473642e-06, "loss": 0.7755, "step": 15464 }, { "epoch": 0.4515196636594552, "grad_norm": 0.7246155695016069, "learning_rate": 3.04720194647202e-06, "loss": 0.6397, "step": 15465 }, { "epoch": 0.45154885988730253, "grad_norm": 0.8005878689055194, "learning_rate": 3.0470397404703976e-06, "loss": 0.6257, "step": 15466 }, { "epoch": 0.4515780561151499, "grad_norm": 0.9594748806882334, "learning_rate": 3.0468775344687756e-06, "loss": 0.649, "step": 15467 }, { "epoch": 0.4516072523429973, "grad_norm": 0.6959459946199145, "learning_rate": 3.0467153284671536e-06, "loss": 0.5608, "step": 15468 }, { "epoch": 0.4516364485708447, "grad_norm": 0.7393047699017707, "learning_rate": 3.0465531224655316e-06, "loss": 0.6655, "step": 15469 }, { "epoch": 0.45166564479869203, "grad_norm": 0.6896716456028386, "learning_rate": 3.046390916463909e-06, "loss": 0.5437, "step": 15470 }, { "epoch": 0.4516948410265394, "grad_norm": 0.7236316779366341, "learning_rate": 3.046228710462287e-06, "loss": 0.6613, "step": 15471 }, { "epoch": 0.45172403725438676, "grad_norm": 0.8957700511075237, "learning_rate": 3.046066504460665e-06, "loss": 0.5216, "step": 15472 }, { "epoch": 0.4517532334822341, "grad_norm": 0.7634324911300165, "learning_rate": 3.045904298459043e-06, "loss": 0.6153, "step": 15473 }, { "epoch": 0.4517824297100815, "grad_norm": 0.7176718719960757, "learning_rate": 3.0457420924574212e-06, "loss": 0.6132, "step": 15474 }, { "epoch": 0.45181162593792884, "grad_norm": 1.0649407067591035, "learning_rate": 3.045579886455799e-06, "loss": 0.7444, "step": 15475 }, { "epoch": 0.4518408221657762, "grad_norm": 0.7006770504600166, "learning_rate": 3.045417680454177e-06, "loss": 0.667, "step": 15476 }, { "epoch": 0.45187001839362356, "grad_norm": 0.7624610874880876, "learning_rate": 3.045255474452555e-06, "loss": 0.752, "step": 15477 }, { "epoch": 0.4518992146214709, "grad_norm": 0.7925527431010051, "learning_rate": 3.045093268450933e-06, "loss": 0.7031, "step": 15478 }, { "epoch": 0.4519284108493183, "grad_norm": 0.6789008998144356, "learning_rate": 3.0449310624493113e-06, "loss": 0.5347, "step": 15479 }, { "epoch": 0.45195760707716565, "grad_norm": 0.7588243507888616, "learning_rate": 3.0447688564476893e-06, "loss": 0.6913, "step": 15480 }, { "epoch": 0.451986803305013, "grad_norm": 0.6543089281045573, "learning_rate": 3.044606650446067e-06, "loss": 0.5771, "step": 15481 }, { "epoch": 0.45201599953286037, "grad_norm": 0.7332260710565112, "learning_rate": 3.044444444444445e-06, "loss": 0.6771, "step": 15482 }, { "epoch": 0.45204519576070773, "grad_norm": 0.7788316633891236, "learning_rate": 3.044282238442823e-06, "loss": 0.7005, "step": 15483 }, { "epoch": 0.4520743919885551, "grad_norm": 0.692694290037318, "learning_rate": 3.044120032441201e-06, "loss": 0.629, "step": 15484 }, { "epoch": 0.45210358821640245, "grad_norm": 0.7456379012887983, "learning_rate": 3.0439578264395784e-06, "loss": 0.6624, "step": 15485 }, { "epoch": 0.4521327844442498, "grad_norm": 0.6585728257674195, "learning_rate": 3.0437956204379565e-06, "loss": 0.5619, "step": 15486 }, { "epoch": 0.4521619806720972, "grad_norm": 0.7022588106500744, "learning_rate": 3.0436334144363345e-06, "loss": 0.6099, "step": 15487 }, { "epoch": 0.45219117689994454, "grad_norm": 0.7268442040746653, "learning_rate": 3.0434712084347125e-06, "loss": 0.632, "step": 15488 }, { "epoch": 0.4522203731277919, "grad_norm": 0.7416741262738699, "learning_rate": 3.04330900243309e-06, "loss": 0.6625, "step": 15489 }, { "epoch": 0.45224956935563926, "grad_norm": 0.6770086724946637, "learning_rate": 3.043146796431468e-06, "loss": 0.5893, "step": 15490 }, { "epoch": 0.4522787655834866, "grad_norm": 0.7149845443043391, "learning_rate": 3.042984590429846e-06, "loss": 0.5596, "step": 15491 }, { "epoch": 0.452307961811334, "grad_norm": 0.7554732940850155, "learning_rate": 3.042822384428224e-06, "loss": 0.63, "step": 15492 }, { "epoch": 0.45233715803918134, "grad_norm": 0.7344129491358364, "learning_rate": 3.042660178426602e-06, "loss": 0.6736, "step": 15493 }, { "epoch": 0.4523663542670287, "grad_norm": 0.7276416019465336, "learning_rate": 3.0424979724249797e-06, "loss": 0.6367, "step": 15494 }, { "epoch": 0.45239555049487606, "grad_norm": 0.7867052507718677, "learning_rate": 3.0423357664233577e-06, "loss": 0.6359, "step": 15495 }, { "epoch": 0.4524247467227234, "grad_norm": 0.7088396532603208, "learning_rate": 3.0421735604217357e-06, "loss": 0.613, "step": 15496 }, { "epoch": 0.4524539429505708, "grad_norm": 0.7234301305065327, "learning_rate": 3.0420113544201137e-06, "loss": 0.6676, "step": 15497 }, { "epoch": 0.45248313917841815, "grad_norm": 0.6750573114329979, "learning_rate": 3.041849148418492e-06, "loss": 0.5798, "step": 15498 }, { "epoch": 0.4525123354062655, "grad_norm": 0.7692525575741528, "learning_rate": 3.0416869424168697e-06, "loss": 0.705, "step": 15499 }, { "epoch": 0.45254153163411287, "grad_norm": 0.7562289920966461, "learning_rate": 3.0415247364152477e-06, "loss": 0.6905, "step": 15500 }, { "epoch": 0.45257072786196023, "grad_norm": 0.8372448290538641, "learning_rate": 3.0413625304136257e-06, "loss": 0.6998, "step": 15501 }, { "epoch": 0.4525999240898076, "grad_norm": 0.7575805217122646, "learning_rate": 3.0412003244120037e-06, "loss": 0.6849, "step": 15502 }, { "epoch": 0.45262912031765495, "grad_norm": 0.843248509198737, "learning_rate": 3.0410381184103817e-06, "loss": 0.7762, "step": 15503 }, { "epoch": 0.4526583165455023, "grad_norm": 0.7387199478060186, "learning_rate": 3.0408759124087593e-06, "loss": 0.7728, "step": 15504 }, { "epoch": 0.4526875127733497, "grad_norm": 0.7398577309666687, "learning_rate": 3.0407137064071373e-06, "loss": 0.6216, "step": 15505 }, { "epoch": 0.45271670900119704, "grad_norm": 0.7377813109308774, "learning_rate": 3.0405515004055153e-06, "loss": 0.6851, "step": 15506 }, { "epoch": 0.4527459052290444, "grad_norm": 0.7368425682194301, "learning_rate": 3.0403892944038933e-06, "loss": 0.6341, "step": 15507 }, { "epoch": 0.45277510145689176, "grad_norm": 0.70110736081085, "learning_rate": 3.040227088402271e-06, "loss": 0.5614, "step": 15508 }, { "epoch": 0.4528042976847391, "grad_norm": 0.7799852974157917, "learning_rate": 3.040064882400649e-06, "loss": 0.74, "step": 15509 }, { "epoch": 0.4528334939125865, "grad_norm": 0.7719499401302806, "learning_rate": 3.039902676399027e-06, "loss": 0.6991, "step": 15510 }, { "epoch": 0.45286269014043384, "grad_norm": 0.7122675936314662, "learning_rate": 3.039740470397405e-06, "loss": 0.644, "step": 15511 }, { "epoch": 0.4528918863682812, "grad_norm": 0.749203585198829, "learning_rate": 3.039578264395783e-06, "loss": 0.674, "step": 15512 }, { "epoch": 0.45292108259612857, "grad_norm": 0.738569126900971, "learning_rate": 3.0394160583941605e-06, "loss": 0.6581, "step": 15513 }, { "epoch": 0.4529502788239759, "grad_norm": 0.6569065074269235, "learning_rate": 3.0392538523925385e-06, "loss": 0.557, "step": 15514 }, { "epoch": 0.4529794750518233, "grad_norm": 0.790322932715947, "learning_rate": 3.0390916463909165e-06, "loss": 0.592, "step": 15515 }, { "epoch": 0.45300867127967065, "grad_norm": 0.7224468590641608, "learning_rate": 3.0389294403892945e-06, "loss": 0.6811, "step": 15516 }, { "epoch": 0.453037867507518, "grad_norm": 0.8504153031007884, "learning_rate": 3.038767234387673e-06, "loss": 0.6498, "step": 15517 }, { "epoch": 0.45306706373536537, "grad_norm": 0.7074545556014322, "learning_rate": 3.0386050283860506e-06, "loss": 0.6384, "step": 15518 }, { "epoch": 0.45309625996321273, "grad_norm": 0.709732743485081, "learning_rate": 3.0384428223844286e-06, "loss": 0.6651, "step": 15519 }, { "epoch": 0.4531254561910601, "grad_norm": 0.741047260065882, "learning_rate": 3.0382806163828066e-06, "loss": 0.6589, "step": 15520 }, { "epoch": 0.45315465241890746, "grad_norm": 0.7592040563237298, "learning_rate": 3.0381184103811846e-06, "loss": 0.6477, "step": 15521 }, { "epoch": 0.4531838486467548, "grad_norm": 0.7368951795104612, "learning_rate": 3.0379562043795626e-06, "loss": 0.6109, "step": 15522 }, { "epoch": 0.4532130448746022, "grad_norm": 0.8733227976575048, "learning_rate": 3.03779399837794e-06, "loss": 0.6438, "step": 15523 }, { "epoch": 0.45324224110244954, "grad_norm": 0.7589623978174234, "learning_rate": 3.037631792376318e-06, "loss": 0.6317, "step": 15524 }, { "epoch": 0.4532714373302969, "grad_norm": 0.8011152347074697, "learning_rate": 3.037469586374696e-06, "loss": 0.7229, "step": 15525 }, { "epoch": 0.45330063355814426, "grad_norm": 0.7053840241794662, "learning_rate": 3.037307380373074e-06, "loss": 0.6098, "step": 15526 }, { "epoch": 0.4533298297859916, "grad_norm": 0.7217688732238697, "learning_rate": 3.0371451743714518e-06, "loss": 0.629, "step": 15527 }, { "epoch": 0.45335902601383904, "grad_norm": 0.7482755947581049, "learning_rate": 3.0369829683698298e-06, "loss": 0.6582, "step": 15528 }, { "epoch": 0.4533882222416864, "grad_norm": 0.7289290327028997, "learning_rate": 3.0368207623682078e-06, "loss": 0.6889, "step": 15529 }, { "epoch": 0.45341741846953376, "grad_norm": 0.7195924671936497, "learning_rate": 3.0366585563665858e-06, "loss": 0.6332, "step": 15530 }, { "epoch": 0.4534466146973811, "grad_norm": 0.7331843848876962, "learning_rate": 3.036496350364964e-06, "loss": 0.6374, "step": 15531 }, { "epoch": 0.4534758109252285, "grad_norm": 0.7437935813102751, "learning_rate": 3.0363341443633414e-06, "loss": 0.6373, "step": 15532 }, { "epoch": 0.45350500715307585, "grad_norm": 0.7811816985509559, "learning_rate": 3.0361719383617194e-06, "loss": 0.7505, "step": 15533 }, { "epoch": 0.4535342033809232, "grad_norm": 0.7334479550237928, "learning_rate": 3.0360097323600974e-06, "loss": 0.6626, "step": 15534 }, { "epoch": 0.45356339960877057, "grad_norm": 0.7257060965833397, "learning_rate": 3.0358475263584754e-06, "loss": 0.6139, "step": 15535 }, { "epoch": 0.45359259583661793, "grad_norm": 0.6684504691473573, "learning_rate": 3.035685320356854e-06, "loss": 0.5767, "step": 15536 }, { "epoch": 0.4536217920644653, "grad_norm": 0.6945007346328003, "learning_rate": 3.0355231143552314e-06, "loss": 0.5703, "step": 15537 }, { "epoch": 0.45365098829231265, "grad_norm": 0.8652902638603874, "learning_rate": 3.0353609083536094e-06, "loss": 0.7759, "step": 15538 }, { "epoch": 0.45368018452016, "grad_norm": 0.7503700727183594, "learning_rate": 3.0351987023519874e-06, "loss": 0.7483, "step": 15539 }, { "epoch": 0.4537093807480074, "grad_norm": 0.7540456195974922, "learning_rate": 3.0350364963503654e-06, "loss": 0.7226, "step": 15540 }, { "epoch": 0.45373857697585473, "grad_norm": 0.7231582006722569, "learning_rate": 3.0348742903487434e-06, "loss": 0.6073, "step": 15541 }, { "epoch": 0.4537677732037021, "grad_norm": 0.7517724889969177, "learning_rate": 3.034712084347121e-06, "loss": 0.6891, "step": 15542 }, { "epoch": 0.45379696943154946, "grad_norm": 0.7433217703373023, "learning_rate": 3.034549878345499e-06, "loss": 0.6256, "step": 15543 }, { "epoch": 0.4538261656593968, "grad_norm": 0.7173741390967376, "learning_rate": 3.034387672343877e-06, "loss": 0.6287, "step": 15544 }, { "epoch": 0.4538553618872442, "grad_norm": 0.7133096266682247, "learning_rate": 3.034225466342255e-06, "loss": 0.6428, "step": 15545 }, { "epoch": 0.45388455811509154, "grad_norm": 0.6784792448471384, "learning_rate": 3.0340632603406326e-06, "loss": 0.5614, "step": 15546 }, { "epoch": 0.4539137543429389, "grad_norm": 0.7742619451206496, "learning_rate": 3.0339010543390106e-06, "loss": 0.6973, "step": 15547 }, { "epoch": 0.45394295057078626, "grad_norm": 0.7593936686348469, "learning_rate": 3.0337388483373886e-06, "loss": 0.6883, "step": 15548 }, { "epoch": 0.4539721467986336, "grad_norm": 0.7463484466954093, "learning_rate": 3.0335766423357666e-06, "loss": 0.6333, "step": 15549 }, { "epoch": 0.454001343026481, "grad_norm": 0.7248801138674695, "learning_rate": 3.0334144363341447e-06, "loss": 0.6638, "step": 15550 }, { "epoch": 0.45403053925432835, "grad_norm": 0.7301329513949314, "learning_rate": 3.0332522303325222e-06, "loss": 0.6535, "step": 15551 }, { "epoch": 0.4540597354821757, "grad_norm": 0.7485221128895546, "learning_rate": 3.0330900243309002e-06, "loss": 0.6632, "step": 15552 }, { "epoch": 0.45408893171002307, "grad_norm": 0.6835106057327167, "learning_rate": 3.0329278183292782e-06, "loss": 0.5715, "step": 15553 }, { "epoch": 0.45411812793787043, "grad_norm": 0.8053941650860938, "learning_rate": 3.0327656123276563e-06, "loss": 0.6939, "step": 15554 }, { "epoch": 0.4541473241657178, "grad_norm": 0.7552914643770857, "learning_rate": 3.0326034063260347e-06, "loss": 0.6671, "step": 15555 }, { "epoch": 0.45417652039356515, "grad_norm": 0.7255579579658269, "learning_rate": 3.0324412003244123e-06, "loss": 0.6413, "step": 15556 }, { "epoch": 0.4542057166214125, "grad_norm": 0.7645792621433267, "learning_rate": 3.0322789943227903e-06, "loss": 0.7084, "step": 15557 }, { "epoch": 0.4542349128492599, "grad_norm": 0.8911075074987668, "learning_rate": 3.0321167883211683e-06, "loss": 0.734, "step": 15558 }, { "epoch": 0.45426410907710724, "grad_norm": 0.7418693317418615, "learning_rate": 3.0319545823195463e-06, "loss": 0.6606, "step": 15559 }, { "epoch": 0.4542933053049546, "grad_norm": 0.6884097999894354, "learning_rate": 3.0317923763179243e-06, "loss": 0.5998, "step": 15560 }, { "epoch": 0.45432250153280196, "grad_norm": 0.6815343769780039, "learning_rate": 3.031630170316302e-06, "loss": 0.5441, "step": 15561 }, { "epoch": 0.4543516977606493, "grad_norm": 0.7309399552841135, "learning_rate": 3.03146796431468e-06, "loss": 0.7348, "step": 15562 }, { "epoch": 0.4543808939884967, "grad_norm": 0.6842824416200726, "learning_rate": 3.031305758313058e-06, "loss": 0.6055, "step": 15563 }, { "epoch": 0.45441009021634404, "grad_norm": 0.8052146572780045, "learning_rate": 3.031143552311436e-06, "loss": 0.7097, "step": 15564 }, { "epoch": 0.4544392864441914, "grad_norm": 0.7567890407431738, "learning_rate": 3.0309813463098135e-06, "loss": 0.6108, "step": 15565 }, { "epoch": 0.45446848267203876, "grad_norm": 0.7884907533970591, "learning_rate": 3.0308191403081915e-06, "loss": 0.7177, "step": 15566 }, { "epoch": 0.4544976788998861, "grad_norm": 0.792204891760033, "learning_rate": 3.0306569343065695e-06, "loss": 0.7315, "step": 15567 }, { "epoch": 0.4545268751277335, "grad_norm": 0.7065845105690837, "learning_rate": 3.0304947283049475e-06, "loss": 0.6118, "step": 15568 }, { "epoch": 0.45455607135558085, "grad_norm": 0.7374956274453206, "learning_rate": 3.0303325223033255e-06, "loss": 0.6746, "step": 15569 }, { "epoch": 0.4545852675834282, "grad_norm": 0.6828849112670979, "learning_rate": 3.030170316301703e-06, "loss": 0.6165, "step": 15570 }, { "epoch": 0.45461446381127557, "grad_norm": 0.7170261476983827, "learning_rate": 3.030008110300081e-06, "loss": 0.6438, "step": 15571 }, { "epoch": 0.45464366003912293, "grad_norm": 0.6780143485118957, "learning_rate": 3.029845904298459e-06, "loss": 0.5602, "step": 15572 }, { "epoch": 0.4546728562669703, "grad_norm": 0.7216496930159539, "learning_rate": 3.0296836982968375e-06, "loss": 0.6503, "step": 15573 }, { "epoch": 0.45470205249481765, "grad_norm": 0.6932167705713751, "learning_rate": 3.0295214922952155e-06, "loss": 0.582, "step": 15574 }, { "epoch": 0.454731248722665, "grad_norm": 0.7714767673538468, "learning_rate": 3.029359286293593e-06, "loss": 0.7125, "step": 15575 }, { "epoch": 0.4547604449505124, "grad_norm": 0.6930456716340803, "learning_rate": 3.029197080291971e-06, "loss": 0.5619, "step": 15576 }, { "epoch": 0.45478964117835974, "grad_norm": 0.7109770770571134, "learning_rate": 3.029034874290349e-06, "loss": 0.6206, "step": 15577 }, { "epoch": 0.4548188374062071, "grad_norm": 0.7918202437916495, "learning_rate": 3.028872668288727e-06, "loss": 0.7031, "step": 15578 }, { "epoch": 0.45484803363405446, "grad_norm": 1.0438666958914302, "learning_rate": 3.028710462287105e-06, "loss": 0.8509, "step": 15579 }, { "epoch": 0.4548772298619018, "grad_norm": 0.6786378020122454, "learning_rate": 3.0285482562854827e-06, "loss": 0.559, "step": 15580 }, { "epoch": 0.4549064260897492, "grad_norm": 0.8080219706672855, "learning_rate": 3.0283860502838607e-06, "loss": 0.7481, "step": 15581 }, { "epoch": 0.45493562231759654, "grad_norm": 0.695074213574171, "learning_rate": 3.0282238442822388e-06, "loss": 0.6095, "step": 15582 }, { "epoch": 0.4549648185454439, "grad_norm": 0.7810337183313658, "learning_rate": 3.0280616382806168e-06, "loss": 0.6721, "step": 15583 }, { "epoch": 0.45499401477329127, "grad_norm": 0.6994007993419656, "learning_rate": 3.0278994322789943e-06, "loss": 0.6742, "step": 15584 }, { "epoch": 0.4550232110011386, "grad_norm": 0.6919313793002717, "learning_rate": 3.0277372262773723e-06, "loss": 0.6218, "step": 15585 }, { "epoch": 0.455052407228986, "grad_norm": 0.7735878264263111, "learning_rate": 3.0275750202757504e-06, "loss": 0.8005, "step": 15586 }, { "epoch": 0.45508160345683335, "grad_norm": 0.7069747721611533, "learning_rate": 3.0274128142741284e-06, "loss": 0.5896, "step": 15587 }, { "epoch": 0.45511079968468077, "grad_norm": 0.7264607288731895, "learning_rate": 3.0272506082725064e-06, "loss": 0.6173, "step": 15588 }, { "epoch": 0.45513999591252813, "grad_norm": 0.727750538966595, "learning_rate": 3.027088402270884e-06, "loss": 0.6378, "step": 15589 }, { "epoch": 0.4551691921403755, "grad_norm": 0.7648171819457873, "learning_rate": 3.026926196269262e-06, "loss": 0.6678, "step": 15590 }, { "epoch": 0.45519838836822285, "grad_norm": 0.6915973854880642, "learning_rate": 3.02676399026764e-06, "loss": 0.6303, "step": 15591 }, { "epoch": 0.4552275845960702, "grad_norm": 0.7791895264614747, "learning_rate": 3.0266017842660184e-06, "loss": 0.6466, "step": 15592 }, { "epoch": 0.4552567808239176, "grad_norm": 0.7743713896374661, "learning_rate": 3.0264395782643964e-06, "loss": 0.6958, "step": 15593 }, { "epoch": 0.45528597705176493, "grad_norm": 0.7179603129925299, "learning_rate": 3.026277372262774e-06, "loss": 0.5583, "step": 15594 }, { "epoch": 0.4553151732796123, "grad_norm": 0.7631668567606331, "learning_rate": 3.026115166261152e-06, "loss": 0.692, "step": 15595 }, { "epoch": 0.45534436950745966, "grad_norm": 0.6629148977583719, "learning_rate": 3.02595296025953e-06, "loss": 0.5588, "step": 15596 }, { "epoch": 0.455373565735307, "grad_norm": 0.7508800957118585, "learning_rate": 3.025790754257908e-06, "loss": 0.7037, "step": 15597 }, { "epoch": 0.4554027619631544, "grad_norm": 0.7159517935112202, "learning_rate": 3.025628548256286e-06, "loss": 0.6358, "step": 15598 }, { "epoch": 0.45543195819100174, "grad_norm": 0.7198700069234408, "learning_rate": 3.0254663422546636e-06, "loss": 0.6258, "step": 15599 }, { "epoch": 0.4554611544188491, "grad_norm": 0.7560035711446158, "learning_rate": 3.0253041362530416e-06, "loss": 0.728, "step": 15600 }, { "epoch": 0.45549035064669646, "grad_norm": 0.7376147602855371, "learning_rate": 3.0251419302514196e-06, "loss": 0.6732, "step": 15601 }, { "epoch": 0.4555195468745438, "grad_norm": 0.6941999910160985, "learning_rate": 3.0249797242497976e-06, "loss": 0.6197, "step": 15602 }, { "epoch": 0.4555487431023912, "grad_norm": 0.708767738788841, "learning_rate": 3.024817518248175e-06, "loss": 0.6281, "step": 15603 }, { "epoch": 0.45557793933023855, "grad_norm": 0.8007605759608496, "learning_rate": 3.024655312246553e-06, "loss": 0.6645, "step": 15604 }, { "epoch": 0.4556071355580859, "grad_norm": 0.7962724005989598, "learning_rate": 3.0244931062449312e-06, "loss": 0.6429, "step": 15605 }, { "epoch": 0.45563633178593327, "grad_norm": 0.7246182177059289, "learning_rate": 3.0243309002433092e-06, "loss": 0.6889, "step": 15606 }, { "epoch": 0.45566552801378063, "grad_norm": 0.7573356246846514, "learning_rate": 3.0241686942416872e-06, "loss": 0.7123, "step": 15607 }, { "epoch": 0.455694724241628, "grad_norm": 0.8270139805640331, "learning_rate": 3.024006488240065e-06, "loss": 0.7004, "step": 15608 }, { "epoch": 0.45572392046947535, "grad_norm": 0.7515511041333293, "learning_rate": 3.023844282238443e-06, "loss": 0.6461, "step": 15609 }, { "epoch": 0.4557531166973227, "grad_norm": 0.6716617291117694, "learning_rate": 3.023682076236821e-06, "loss": 0.5908, "step": 15610 }, { "epoch": 0.4557823129251701, "grad_norm": 0.7501618183068305, "learning_rate": 3.0235198702351993e-06, "loss": 0.6796, "step": 15611 }, { "epoch": 0.45581150915301744, "grad_norm": 0.7582865435008234, "learning_rate": 3.0233576642335773e-06, "loss": 0.6375, "step": 15612 }, { "epoch": 0.4558407053808648, "grad_norm": 0.7748428453188191, "learning_rate": 3.023195458231955e-06, "loss": 0.6663, "step": 15613 }, { "epoch": 0.45586990160871216, "grad_norm": 0.7257284256446487, "learning_rate": 3.023033252230333e-06, "loss": 0.6088, "step": 15614 }, { "epoch": 0.4558990978365595, "grad_norm": 0.7846861788588088, "learning_rate": 3.022871046228711e-06, "loss": 0.737, "step": 15615 }, { "epoch": 0.4559282940644069, "grad_norm": 0.7367896945273545, "learning_rate": 3.022708840227089e-06, "loss": 0.6561, "step": 15616 }, { "epoch": 0.45595749029225424, "grad_norm": 0.8619328569901475, "learning_rate": 3.022546634225467e-06, "loss": 0.7007, "step": 15617 }, { "epoch": 0.4559866865201016, "grad_norm": 0.730789055923268, "learning_rate": 3.0223844282238445e-06, "loss": 0.6355, "step": 15618 }, { "epoch": 0.45601588274794896, "grad_norm": 0.7557031466431167, "learning_rate": 3.0222222222222225e-06, "loss": 0.7518, "step": 15619 }, { "epoch": 0.4560450789757963, "grad_norm": 0.7353680727357408, "learning_rate": 3.0220600162206005e-06, "loss": 0.6556, "step": 15620 }, { "epoch": 0.4560742752036437, "grad_norm": 0.6975324841469742, "learning_rate": 3.0218978102189785e-06, "loss": 0.5841, "step": 15621 }, { "epoch": 0.45610347143149105, "grad_norm": 0.7304551657069508, "learning_rate": 3.021735604217356e-06, "loss": 0.5957, "step": 15622 }, { "epoch": 0.4561326676593384, "grad_norm": 0.7200915673162137, "learning_rate": 3.021573398215734e-06, "loss": 0.653, "step": 15623 }, { "epoch": 0.45616186388718577, "grad_norm": 0.7844444359219623, "learning_rate": 3.021411192214112e-06, "loss": 0.6787, "step": 15624 }, { "epoch": 0.45619106011503313, "grad_norm": 0.7496213953901216, "learning_rate": 3.02124898621249e-06, "loss": 0.6699, "step": 15625 }, { "epoch": 0.4562202563428805, "grad_norm": 0.6713821889235084, "learning_rate": 3.021086780210868e-06, "loss": 0.5705, "step": 15626 }, { "epoch": 0.45624945257072785, "grad_norm": 0.7044810798425811, "learning_rate": 3.0209245742092457e-06, "loss": 0.6649, "step": 15627 }, { "epoch": 0.4562786487985752, "grad_norm": 0.7316634628782044, "learning_rate": 3.0207623682076237e-06, "loss": 0.6922, "step": 15628 }, { "epoch": 0.4563078450264226, "grad_norm": 0.7447451619863629, "learning_rate": 3.0206001622060017e-06, "loss": 0.6537, "step": 15629 }, { "epoch": 0.45633704125426994, "grad_norm": 0.7625502792102837, "learning_rate": 3.02043795620438e-06, "loss": 0.6505, "step": 15630 }, { "epoch": 0.4563662374821173, "grad_norm": 0.7421899102974083, "learning_rate": 3.020275750202758e-06, "loss": 0.6997, "step": 15631 }, { "epoch": 0.45639543370996466, "grad_norm": 0.7149201335486518, "learning_rate": 3.0201135442011357e-06, "loss": 0.6201, "step": 15632 }, { "epoch": 0.456424629937812, "grad_norm": 0.7211755856828762, "learning_rate": 3.0199513381995137e-06, "loss": 0.6083, "step": 15633 }, { "epoch": 0.4564538261656594, "grad_norm": 0.6930944425468984, "learning_rate": 3.0197891321978917e-06, "loss": 0.5736, "step": 15634 }, { "epoch": 0.45648302239350674, "grad_norm": 0.8081689097196664, "learning_rate": 3.0196269261962697e-06, "loss": 0.7677, "step": 15635 }, { "epoch": 0.4565122186213541, "grad_norm": 0.7808155432945006, "learning_rate": 3.0194647201946477e-06, "loss": 0.6795, "step": 15636 }, { "epoch": 0.45654141484920147, "grad_norm": 0.7318498716163149, "learning_rate": 3.0193025141930253e-06, "loss": 0.6462, "step": 15637 }, { "epoch": 0.4565706110770488, "grad_norm": 0.6982318087685773, "learning_rate": 3.0191403081914033e-06, "loss": 0.5928, "step": 15638 }, { "epoch": 0.4565998073048962, "grad_norm": 0.6689341154878642, "learning_rate": 3.0189781021897813e-06, "loss": 0.5715, "step": 15639 }, { "epoch": 0.45662900353274355, "grad_norm": 0.7699317231759656, "learning_rate": 3.0188158961881593e-06, "loss": 0.7344, "step": 15640 }, { "epoch": 0.4566581997605909, "grad_norm": 0.6933210229940702, "learning_rate": 3.018653690186537e-06, "loss": 0.6342, "step": 15641 }, { "epoch": 0.45668739598843827, "grad_norm": 0.6946364106639155, "learning_rate": 3.018491484184915e-06, "loss": 0.6318, "step": 15642 }, { "epoch": 0.45671659221628563, "grad_norm": 0.7234047600638367, "learning_rate": 3.018329278183293e-06, "loss": 0.629, "step": 15643 }, { "epoch": 0.456745788444133, "grad_norm": 0.8505886325721672, "learning_rate": 3.018167072181671e-06, "loss": 0.7531, "step": 15644 }, { "epoch": 0.45677498467198036, "grad_norm": 0.7103941498052331, "learning_rate": 3.0180048661800485e-06, "loss": 0.5828, "step": 15645 }, { "epoch": 0.4568041808998277, "grad_norm": 0.7256037272019866, "learning_rate": 3.0178426601784265e-06, "loss": 0.6051, "step": 15646 }, { "epoch": 0.4568333771276751, "grad_norm": 0.7371867191665256, "learning_rate": 3.0176804541768045e-06, "loss": 0.6535, "step": 15647 }, { "epoch": 0.45686257335552244, "grad_norm": 0.6655735054827828, "learning_rate": 3.0175182481751825e-06, "loss": 0.5579, "step": 15648 }, { "epoch": 0.45689176958336986, "grad_norm": 0.8435488223856784, "learning_rate": 3.017356042173561e-06, "loss": 0.6442, "step": 15649 }, { "epoch": 0.4569209658112172, "grad_norm": 0.7065499532695211, "learning_rate": 3.017193836171939e-06, "loss": 0.6074, "step": 15650 }, { "epoch": 0.4569501620390646, "grad_norm": 0.7266136074692071, "learning_rate": 3.0170316301703166e-06, "loss": 0.6578, "step": 15651 }, { "epoch": 0.45697935826691194, "grad_norm": 0.6988948169392288, "learning_rate": 3.0168694241686946e-06, "loss": 0.6028, "step": 15652 }, { "epoch": 0.4570085544947593, "grad_norm": 0.6991299079113079, "learning_rate": 3.0167072181670726e-06, "loss": 0.5638, "step": 15653 }, { "epoch": 0.45703775072260666, "grad_norm": 0.7321719881169125, "learning_rate": 3.0165450121654506e-06, "loss": 0.6652, "step": 15654 }, { "epoch": 0.457066946950454, "grad_norm": 0.785469305323766, "learning_rate": 3.0163828061638286e-06, "loss": 0.7045, "step": 15655 }, { "epoch": 0.4570961431783014, "grad_norm": 0.7930679656883367, "learning_rate": 3.016220600162206e-06, "loss": 0.8147, "step": 15656 }, { "epoch": 0.45712533940614875, "grad_norm": 0.7714329008947849, "learning_rate": 3.016058394160584e-06, "loss": 0.6531, "step": 15657 }, { "epoch": 0.4571545356339961, "grad_norm": 0.6977324661730743, "learning_rate": 3.015896188158962e-06, "loss": 0.5859, "step": 15658 }, { "epoch": 0.45718373186184347, "grad_norm": 0.740792941882149, "learning_rate": 3.01573398215734e-06, "loss": 0.6233, "step": 15659 }, { "epoch": 0.45721292808969083, "grad_norm": 0.7079739810298173, "learning_rate": 3.0155717761557178e-06, "loss": 0.6361, "step": 15660 }, { "epoch": 0.4572421243175382, "grad_norm": 0.6952140895361344, "learning_rate": 3.0154095701540958e-06, "loss": 0.6032, "step": 15661 }, { "epoch": 0.45727132054538555, "grad_norm": 0.7224668678261598, "learning_rate": 3.015247364152474e-06, "loss": 0.6406, "step": 15662 }, { "epoch": 0.4573005167732329, "grad_norm": 0.7203369575262532, "learning_rate": 3.015085158150852e-06, "loss": 0.6123, "step": 15663 }, { "epoch": 0.4573297130010803, "grad_norm": 0.6901111542328758, "learning_rate": 3.0149229521492294e-06, "loss": 0.5831, "step": 15664 }, { "epoch": 0.45735890922892763, "grad_norm": 0.7161930047482338, "learning_rate": 3.0147607461476074e-06, "loss": 0.6612, "step": 15665 }, { "epoch": 0.457388105456775, "grad_norm": 0.7573705050295031, "learning_rate": 3.0145985401459854e-06, "loss": 0.7127, "step": 15666 }, { "epoch": 0.45741730168462236, "grad_norm": 0.7385270141034783, "learning_rate": 3.0144363341443634e-06, "loss": 0.665, "step": 15667 }, { "epoch": 0.4574464979124697, "grad_norm": 0.7893334197478975, "learning_rate": 3.014274128142742e-06, "loss": 0.7579, "step": 15668 }, { "epoch": 0.4574756941403171, "grad_norm": 0.7267647082288766, "learning_rate": 3.01411192214112e-06, "loss": 0.633, "step": 15669 }, { "epoch": 0.45750489036816444, "grad_norm": 0.6605280962506678, "learning_rate": 3.0139497161394974e-06, "loss": 0.5582, "step": 15670 }, { "epoch": 0.4575340865960118, "grad_norm": 0.7126273123518314, "learning_rate": 3.0137875101378754e-06, "loss": 0.6639, "step": 15671 }, { "epoch": 0.45756328282385916, "grad_norm": 0.6666209753784393, "learning_rate": 3.0136253041362534e-06, "loss": 0.5552, "step": 15672 }, { "epoch": 0.4575924790517065, "grad_norm": 0.759700905908956, "learning_rate": 3.0134630981346314e-06, "loss": 0.6976, "step": 15673 }, { "epoch": 0.4576216752795539, "grad_norm": 0.6842118761165309, "learning_rate": 3.0133008921330094e-06, "loss": 0.5974, "step": 15674 }, { "epoch": 0.45765087150740125, "grad_norm": 0.7064330100020865, "learning_rate": 3.013138686131387e-06, "loss": 0.5849, "step": 15675 }, { "epoch": 0.4576800677352486, "grad_norm": 0.7144920281368006, "learning_rate": 3.012976480129765e-06, "loss": 0.5827, "step": 15676 }, { "epoch": 0.45770926396309597, "grad_norm": 0.7247415626424228, "learning_rate": 3.012814274128143e-06, "loss": 0.6497, "step": 15677 }, { "epoch": 0.45773846019094333, "grad_norm": 0.683987641765477, "learning_rate": 3.012652068126521e-06, "loss": 0.6084, "step": 15678 }, { "epoch": 0.4577676564187907, "grad_norm": 0.7368862411632522, "learning_rate": 3.0124898621248986e-06, "loss": 0.6263, "step": 15679 }, { "epoch": 0.45779685264663805, "grad_norm": 0.7437459224140153, "learning_rate": 3.0123276561232766e-06, "loss": 0.6646, "step": 15680 }, { "epoch": 0.4578260488744854, "grad_norm": 0.6653362251859746, "learning_rate": 3.0121654501216546e-06, "loss": 0.531, "step": 15681 }, { "epoch": 0.4578552451023328, "grad_norm": 0.7098639960269539, "learning_rate": 3.0120032441200327e-06, "loss": 0.6106, "step": 15682 }, { "epoch": 0.45788444133018014, "grad_norm": 0.7191683320896354, "learning_rate": 3.0118410381184102e-06, "loss": 0.6444, "step": 15683 }, { "epoch": 0.4579136375580275, "grad_norm": 0.7224249653876185, "learning_rate": 3.0116788321167882e-06, "loss": 0.6686, "step": 15684 }, { "epoch": 0.45794283378587486, "grad_norm": 0.8120149434031197, "learning_rate": 3.0115166261151663e-06, "loss": 0.6609, "step": 15685 }, { "epoch": 0.4579720300137222, "grad_norm": 0.7373569363784054, "learning_rate": 3.0113544201135443e-06, "loss": 0.6842, "step": 15686 }, { "epoch": 0.4580012262415696, "grad_norm": 0.755655165191474, "learning_rate": 3.0111922141119227e-06, "loss": 0.6912, "step": 15687 }, { "epoch": 0.45803042246941694, "grad_norm": 0.7000310947163564, "learning_rate": 3.0110300081103007e-06, "loss": 0.5795, "step": 15688 }, { "epoch": 0.4580596186972643, "grad_norm": 0.6920586871400637, "learning_rate": 3.0108678021086783e-06, "loss": 0.5999, "step": 15689 }, { "epoch": 0.45808881492511166, "grad_norm": 0.7055631815700768, "learning_rate": 3.0107055961070563e-06, "loss": 0.5973, "step": 15690 }, { "epoch": 0.458118011152959, "grad_norm": 0.6993781936909382, "learning_rate": 3.0105433901054343e-06, "loss": 0.5911, "step": 15691 }, { "epoch": 0.4581472073808064, "grad_norm": 0.7038952831525457, "learning_rate": 3.0103811841038123e-06, "loss": 0.5989, "step": 15692 }, { "epoch": 0.45817640360865375, "grad_norm": 0.6765354667632398, "learning_rate": 3.0102189781021903e-06, "loss": 0.6282, "step": 15693 }, { "epoch": 0.4582055998365011, "grad_norm": 0.7931520973268529, "learning_rate": 3.010056772100568e-06, "loss": 0.7271, "step": 15694 }, { "epoch": 0.45823479606434847, "grad_norm": 0.7806241505852429, "learning_rate": 3.009894566098946e-06, "loss": 0.7403, "step": 15695 }, { "epoch": 0.45826399229219583, "grad_norm": 0.780238240074209, "learning_rate": 3.009732360097324e-06, "loss": 0.7165, "step": 15696 }, { "epoch": 0.4582931885200432, "grad_norm": 0.819302936241503, "learning_rate": 3.009570154095702e-06, "loss": 0.7139, "step": 15697 }, { "epoch": 0.45832238474789055, "grad_norm": 0.7640529261629866, "learning_rate": 3.0094079480940795e-06, "loss": 0.6615, "step": 15698 }, { "epoch": 0.4583515809757379, "grad_norm": 0.7749852713370887, "learning_rate": 3.0092457420924575e-06, "loss": 0.716, "step": 15699 }, { "epoch": 0.4583807772035853, "grad_norm": 0.7816079876278194, "learning_rate": 3.0090835360908355e-06, "loss": 0.6701, "step": 15700 }, { "epoch": 0.45840997343143264, "grad_norm": 0.7191692570409528, "learning_rate": 3.0089213300892135e-06, "loss": 0.6563, "step": 15701 }, { "epoch": 0.45843916965928, "grad_norm": 0.6861114054432957, "learning_rate": 3.008759124087591e-06, "loss": 0.6176, "step": 15702 }, { "epoch": 0.45846836588712736, "grad_norm": 0.6991758959742578, "learning_rate": 3.008596918085969e-06, "loss": 0.6102, "step": 15703 }, { "epoch": 0.4584975621149747, "grad_norm": 0.6752625879207684, "learning_rate": 3.008434712084347e-06, "loss": 0.559, "step": 15704 }, { "epoch": 0.4585267583428221, "grad_norm": 0.7503282302188271, "learning_rate": 3.008272506082725e-06, "loss": 0.6771, "step": 15705 }, { "epoch": 0.45855595457066944, "grad_norm": 0.8366567164667185, "learning_rate": 3.0081103000811035e-06, "loss": 0.5317, "step": 15706 }, { "epoch": 0.4585851507985168, "grad_norm": 0.7668929475352183, "learning_rate": 3.0079480940794816e-06, "loss": 0.7196, "step": 15707 }, { "epoch": 0.45861434702636417, "grad_norm": 0.6815154594418715, "learning_rate": 3.007785888077859e-06, "loss": 0.5811, "step": 15708 }, { "epoch": 0.4586435432542116, "grad_norm": 0.7423225271911036, "learning_rate": 3.007623682076237e-06, "loss": 0.6363, "step": 15709 }, { "epoch": 0.45867273948205894, "grad_norm": 0.7620476036320971, "learning_rate": 3.007461476074615e-06, "loss": 0.7051, "step": 15710 }, { "epoch": 0.4587019357099063, "grad_norm": 0.6517504039559971, "learning_rate": 3.007299270072993e-06, "loss": 0.5281, "step": 15711 }, { "epoch": 0.45873113193775367, "grad_norm": 0.7755281344129832, "learning_rate": 3.007137064071371e-06, "loss": 0.7033, "step": 15712 }, { "epoch": 0.45876032816560103, "grad_norm": 0.765900289546762, "learning_rate": 3.0069748580697487e-06, "loss": 0.6813, "step": 15713 }, { "epoch": 0.4587895243934484, "grad_norm": 0.7400698380036275, "learning_rate": 3.0068126520681268e-06, "loss": 0.6585, "step": 15714 }, { "epoch": 0.45881872062129575, "grad_norm": 0.778013193234904, "learning_rate": 3.0066504460665048e-06, "loss": 0.7338, "step": 15715 }, { "epoch": 0.4588479168491431, "grad_norm": 0.8491736927912172, "learning_rate": 3.0064882400648828e-06, "loss": 0.743, "step": 15716 }, { "epoch": 0.4588771130769905, "grad_norm": 0.7346537643695898, "learning_rate": 3.0063260340632604e-06, "loss": 0.6648, "step": 15717 }, { "epoch": 0.45890630930483783, "grad_norm": 0.7426859760167771, "learning_rate": 3.0061638280616384e-06, "loss": 0.6992, "step": 15718 }, { "epoch": 0.4589355055326852, "grad_norm": 0.9744500711381429, "learning_rate": 3.0060016220600164e-06, "loss": 0.7335, "step": 15719 }, { "epoch": 0.45896470176053256, "grad_norm": 0.7266405448292136, "learning_rate": 3.0058394160583944e-06, "loss": 0.6435, "step": 15720 }, { "epoch": 0.4589938979883799, "grad_norm": 0.763160821245639, "learning_rate": 3.005677210056772e-06, "loss": 0.7027, "step": 15721 }, { "epoch": 0.4590230942162273, "grad_norm": 0.8258922246053217, "learning_rate": 3.00551500405515e-06, "loss": 0.7613, "step": 15722 }, { "epoch": 0.45905229044407464, "grad_norm": 0.7072365143726461, "learning_rate": 3.005352798053528e-06, "loss": 0.5962, "step": 15723 }, { "epoch": 0.459081486671922, "grad_norm": 0.7085050135053903, "learning_rate": 3.0051905920519064e-06, "loss": 0.648, "step": 15724 }, { "epoch": 0.45911068289976936, "grad_norm": 0.696056425322188, "learning_rate": 3.0050283860502844e-06, "loss": 0.6202, "step": 15725 }, { "epoch": 0.4591398791276167, "grad_norm": 0.6727592554789782, "learning_rate": 3.0048661800486624e-06, "loss": 0.5792, "step": 15726 }, { "epoch": 0.4591690753554641, "grad_norm": 0.7530870467453236, "learning_rate": 3.00470397404704e-06, "loss": 0.6756, "step": 15727 }, { "epoch": 0.45919827158331145, "grad_norm": 0.7156827884881027, "learning_rate": 3.004541768045418e-06, "loss": 0.6676, "step": 15728 }, { "epoch": 0.4592274678111588, "grad_norm": 0.7160441217062594, "learning_rate": 3.004379562043796e-06, "loss": 0.5997, "step": 15729 }, { "epoch": 0.45925666403900617, "grad_norm": 0.7026915574857479, "learning_rate": 3.004217356042174e-06, "loss": 0.5915, "step": 15730 }, { "epoch": 0.45928586026685353, "grad_norm": 0.6868939130049573, "learning_rate": 3.004055150040552e-06, "loss": 0.5971, "step": 15731 }, { "epoch": 0.4593150564947009, "grad_norm": 0.7944981281816522, "learning_rate": 3.0038929440389296e-06, "loss": 0.7304, "step": 15732 }, { "epoch": 0.45934425272254825, "grad_norm": 0.6678447085176679, "learning_rate": 3.0037307380373076e-06, "loss": 0.5877, "step": 15733 }, { "epoch": 0.4593734489503956, "grad_norm": 0.7045645443049264, "learning_rate": 3.0035685320356856e-06, "loss": 0.5743, "step": 15734 }, { "epoch": 0.459402645178243, "grad_norm": 0.8203138943890038, "learning_rate": 3.0034063260340636e-06, "loss": 0.7205, "step": 15735 }, { "epoch": 0.45943184140609034, "grad_norm": 0.7816244184285258, "learning_rate": 3.003244120032441e-06, "loss": 0.7732, "step": 15736 }, { "epoch": 0.4594610376339377, "grad_norm": 0.7410769160508338, "learning_rate": 3.0030819140308192e-06, "loss": 0.6843, "step": 15737 }, { "epoch": 0.45949023386178506, "grad_norm": 0.7397710608818138, "learning_rate": 3.0029197080291972e-06, "loss": 0.6422, "step": 15738 }, { "epoch": 0.4595194300896324, "grad_norm": 0.7432936072525417, "learning_rate": 3.0027575020275752e-06, "loss": 0.6508, "step": 15739 }, { "epoch": 0.4595486263174798, "grad_norm": 0.7000573542241791, "learning_rate": 3.002595296025953e-06, "loss": 0.5922, "step": 15740 }, { "epoch": 0.45957782254532714, "grad_norm": 0.7005914446011818, "learning_rate": 3.002433090024331e-06, "loss": 0.6305, "step": 15741 }, { "epoch": 0.4596070187731745, "grad_norm": 0.7964255696504583, "learning_rate": 3.002270884022709e-06, "loss": 0.7646, "step": 15742 }, { "epoch": 0.45963621500102186, "grad_norm": 0.734750189753078, "learning_rate": 3.0021086780210873e-06, "loss": 0.5928, "step": 15743 }, { "epoch": 0.4596654112288692, "grad_norm": 0.715055450241341, "learning_rate": 3.0019464720194653e-06, "loss": 0.6406, "step": 15744 }, { "epoch": 0.4596946074567166, "grad_norm": 0.7054855469152906, "learning_rate": 3.0017842660178433e-06, "loss": 0.6246, "step": 15745 }, { "epoch": 0.45972380368456395, "grad_norm": 0.6962674340199811, "learning_rate": 3.001622060016221e-06, "loss": 0.6244, "step": 15746 }, { "epoch": 0.4597529999124113, "grad_norm": 0.734853018237837, "learning_rate": 3.001459854014599e-06, "loss": 0.6325, "step": 15747 }, { "epoch": 0.45978219614025867, "grad_norm": 0.7517964140166832, "learning_rate": 3.001297648012977e-06, "loss": 0.6836, "step": 15748 }, { "epoch": 0.45981139236810603, "grad_norm": 0.7478775724487253, "learning_rate": 3.001135442011355e-06, "loss": 0.6256, "step": 15749 }, { "epoch": 0.4598405885959534, "grad_norm": 0.7438516174258324, "learning_rate": 3.000973236009733e-06, "loss": 0.656, "step": 15750 }, { "epoch": 0.45986978482380075, "grad_norm": 0.7300060565936566, "learning_rate": 3.0008110300081105e-06, "loss": 0.6678, "step": 15751 }, { "epoch": 0.4598989810516481, "grad_norm": 0.7532335964977581, "learning_rate": 3.0006488240064885e-06, "loss": 0.6931, "step": 15752 }, { "epoch": 0.4599281772794955, "grad_norm": 0.7787037216900444, "learning_rate": 3.0004866180048665e-06, "loss": 0.7502, "step": 15753 }, { "epoch": 0.45995737350734284, "grad_norm": 0.7095274518573003, "learning_rate": 3.0003244120032445e-06, "loss": 0.6447, "step": 15754 }, { "epoch": 0.4599865697351902, "grad_norm": 0.7401350112453915, "learning_rate": 3.000162206001622e-06, "loss": 0.7046, "step": 15755 }, { "epoch": 0.46001576596303756, "grad_norm": 0.7729874307685507, "learning_rate": 3e-06, "loss": 0.7202, "step": 15756 }, { "epoch": 0.4600449621908849, "grad_norm": 0.6670012515542646, "learning_rate": 2.999837793998378e-06, "loss": 0.5562, "step": 15757 }, { "epoch": 0.4600741584187323, "grad_norm": 0.7554392379484494, "learning_rate": 2.999675587996756e-06, "loss": 0.6621, "step": 15758 }, { "epoch": 0.46010335464657964, "grad_norm": 0.7819139551056049, "learning_rate": 2.9995133819951337e-06, "loss": 0.6894, "step": 15759 }, { "epoch": 0.460132550874427, "grad_norm": 0.7788544617579254, "learning_rate": 2.9993511759935117e-06, "loss": 0.7235, "step": 15760 }, { "epoch": 0.46016174710227437, "grad_norm": 0.6859301043949518, "learning_rate": 2.9991889699918897e-06, "loss": 0.6162, "step": 15761 }, { "epoch": 0.4601909433301217, "grad_norm": 0.7258719806498615, "learning_rate": 2.999026763990268e-06, "loss": 0.6352, "step": 15762 }, { "epoch": 0.4602201395579691, "grad_norm": 0.7130023557537064, "learning_rate": 2.998864557988646e-06, "loss": 0.6552, "step": 15763 }, { "epoch": 0.46024933578581645, "grad_norm": 0.8403446093722381, "learning_rate": 2.998702351987024e-06, "loss": 0.7239, "step": 15764 }, { "epoch": 0.4602785320136638, "grad_norm": 0.7031579305976656, "learning_rate": 2.9985401459854017e-06, "loss": 0.6051, "step": 15765 }, { "epoch": 0.46030772824151117, "grad_norm": 0.7055240307457055, "learning_rate": 2.9983779399837797e-06, "loss": 0.6179, "step": 15766 }, { "epoch": 0.46033692446935853, "grad_norm": 0.7021059705779102, "learning_rate": 2.9982157339821577e-06, "loss": 0.6392, "step": 15767 }, { "epoch": 0.4603661206972059, "grad_norm": 0.7214615177027113, "learning_rate": 2.9980535279805357e-06, "loss": 0.6473, "step": 15768 }, { "epoch": 0.4603953169250533, "grad_norm": 0.7288411554990573, "learning_rate": 2.9978913219789133e-06, "loss": 0.6431, "step": 15769 }, { "epoch": 0.46042451315290067, "grad_norm": 0.7099633856931356, "learning_rate": 2.9977291159772913e-06, "loss": 0.6372, "step": 15770 }, { "epoch": 0.46045370938074803, "grad_norm": 0.7613065863887251, "learning_rate": 2.9975669099756693e-06, "loss": 0.6744, "step": 15771 }, { "epoch": 0.4604829056085954, "grad_norm": 0.6980239161655218, "learning_rate": 2.9974047039740473e-06, "loss": 0.5746, "step": 15772 }, { "epoch": 0.46051210183644276, "grad_norm": 0.8245421118526447, "learning_rate": 2.9972424979724253e-06, "loss": 0.7225, "step": 15773 }, { "epoch": 0.4605412980642901, "grad_norm": 0.6978981212773446, "learning_rate": 2.997080291970803e-06, "loss": 0.6561, "step": 15774 }, { "epoch": 0.4605704942921375, "grad_norm": 0.7064663364674992, "learning_rate": 2.996918085969181e-06, "loss": 0.5802, "step": 15775 }, { "epoch": 0.46059969051998484, "grad_norm": 0.7341269762914645, "learning_rate": 2.996755879967559e-06, "loss": 0.6854, "step": 15776 }, { "epoch": 0.4606288867478322, "grad_norm": 0.697853776005656, "learning_rate": 2.996593673965937e-06, "loss": 0.5831, "step": 15777 }, { "epoch": 0.46065808297567956, "grad_norm": 0.7580074287583726, "learning_rate": 2.9964314679643145e-06, "loss": 0.6291, "step": 15778 }, { "epoch": 0.4606872792035269, "grad_norm": 0.7067096621065332, "learning_rate": 2.9962692619626925e-06, "loss": 0.6499, "step": 15779 }, { "epoch": 0.4607164754313743, "grad_norm": 0.7534739020176148, "learning_rate": 2.9961070559610705e-06, "loss": 0.6971, "step": 15780 }, { "epoch": 0.46074567165922165, "grad_norm": 0.8497958031702452, "learning_rate": 2.995944849959449e-06, "loss": 0.776, "step": 15781 }, { "epoch": 0.460774867887069, "grad_norm": 0.7366316022945929, "learning_rate": 2.995782643957827e-06, "loss": 0.6282, "step": 15782 }, { "epoch": 0.46080406411491637, "grad_norm": 0.794171137719328, "learning_rate": 2.995620437956205e-06, "loss": 0.7179, "step": 15783 }, { "epoch": 0.46083326034276373, "grad_norm": 0.7322816262784045, "learning_rate": 2.9954582319545826e-06, "loss": 0.6436, "step": 15784 }, { "epoch": 0.4608624565706111, "grad_norm": 0.7428574430311049, "learning_rate": 2.9952960259529606e-06, "loss": 0.6869, "step": 15785 }, { "epoch": 0.46089165279845845, "grad_norm": 0.7290011832000725, "learning_rate": 2.9951338199513386e-06, "loss": 0.6967, "step": 15786 }, { "epoch": 0.4609208490263058, "grad_norm": 0.7603854684020961, "learning_rate": 2.9949716139497166e-06, "loss": 0.6852, "step": 15787 }, { "epoch": 0.4609500452541532, "grad_norm": 0.7071108680597105, "learning_rate": 2.994809407948094e-06, "loss": 0.6322, "step": 15788 }, { "epoch": 0.46097924148200053, "grad_norm": 0.7206034696096777, "learning_rate": 2.994647201946472e-06, "loss": 0.6205, "step": 15789 }, { "epoch": 0.4610084377098479, "grad_norm": 0.7057177990665445, "learning_rate": 2.99448499594485e-06, "loss": 0.6261, "step": 15790 }, { "epoch": 0.46103763393769526, "grad_norm": 0.730210185737837, "learning_rate": 2.994322789943228e-06, "loss": 0.6709, "step": 15791 }, { "epoch": 0.4610668301655426, "grad_norm": 0.7324867483353698, "learning_rate": 2.994160583941606e-06, "loss": 0.6616, "step": 15792 }, { "epoch": 0.46109602639339, "grad_norm": 0.7383536860153411, "learning_rate": 2.9939983779399838e-06, "loss": 0.562, "step": 15793 }, { "epoch": 0.46112522262123734, "grad_norm": 0.6798998937979952, "learning_rate": 2.993836171938362e-06, "loss": 0.5409, "step": 15794 }, { "epoch": 0.4611544188490847, "grad_norm": 0.7013185132986768, "learning_rate": 2.99367396593674e-06, "loss": 0.6195, "step": 15795 }, { "epoch": 0.46118361507693206, "grad_norm": 0.7082975586021338, "learning_rate": 2.993511759935118e-06, "loss": 0.669, "step": 15796 }, { "epoch": 0.4612128113047794, "grad_norm": 0.74220047780648, "learning_rate": 2.9933495539334954e-06, "loss": 0.616, "step": 15797 }, { "epoch": 0.4612420075326268, "grad_norm": 0.6503572876126877, "learning_rate": 2.9931873479318734e-06, "loss": 0.5099, "step": 15798 }, { "epoch": 0.46127120376047415, "grad_norm": 0.7399672555963648, "learning_rate": 2.9930251419302514e-06, "loss": 0.6851, "step": 15799 }, { "epoch": 0.4613003999883215, "grad_norm": 0.7500322082634225, "learning_rate": 2.99286293592863e-06, "loss": 0.6879, "step": 15800 }, { "epoch": 0.46132959621616887, "grad_norm": 0.7806744515746699, "learning_rate": 2.992700729927008e-06, "loss": 0.668, "step": 15801 }, { "epoch": 0.46135879244401623, "grad_norm": 0.7334713623457809, "learning_rate": 2.992538523925386e-06, "loss": 0.6776, "step": 15802 }, { "epoch": 0.4613879886718636, "grad_norm": 0.7185441682371131, "learning_rate": 2.9923763179237634e-06, "loss": 0.642, "step": 15803 }, { "epoch": 0.46141718489971095, "grad_norm": 0.7225452462500157, "learning_rate": 2.9922141119221414e-06, "loss": 0.6787, "step": 15804 }, { "epoch": 0.4614463811275583, "grad_norm": 0.7365687608584969, "learning_rate": 2.9920519059205194e-06, "loss": 0.6452, "step": 15805 }, { "epoch": 0.4614755773554057, "grad_norm": 0.8553218007692649, "learning_rate": 2.9918896999188975e-06, "loss": 0.7459, "step": 15806 }, { "epoch": 0.46150477358325304, "grad_norm": 0.8080249909895053, "learning_rate": 2.991727493917275e-06, "loss": 0.7412, "step": 15807 }, { "epoch": 0.4615339698111004, "grad_norm": 0.7306232858493447, "learning_rate": 2.991565287915653e-06, "loss": 0.6751, "step": 15808 }, { "epoch": 0.46156316603894776, "grad_norm": 0.6583622821880746, "learning_rate": 2.991403081914031e-06, "loss": 0.5356, "step": 15809 }, { "epoch": 0.4615923622667951, "grad_norm": 0.7185290085322387, "learning_rate": 2.991240875912409e-06, "loss": 0.6323, "step": 15810 }, { "epoch": 0.4616215584946425, "grad_norm": 0.6890927194725344, "learning_rate": 2.991078669910787e-06, "loss": 0.5731, "step": 15811 }, { "epoch": 0.46165075472248984, "grad_norm": 0.8399464005806669, "learning_rate": 2.9909164639091646e-06, "loss": 0.7028, "step": 15812 }, { "epoch": 0.4616799509503372, "grad_norm": 0.73778173031786, "learning_rate": 2.9907542579075427e-06, "loss": 0.6519, "step": 15813 }, { "epoch": 0.46170914717818456, "grad_norm": 0.7080558001558659, "learning_rate": 2.9905920519059207e-06, "loss": 0.552, "step": 15814 }, { "epoch": 0.4617383434060319, "grad_norm": 0.7158933524936, "learning_rate": 2.9904298459042987e-06, "loss": 0.6047, "step": 15815 }, { "epoch": 0.4617675396338793, "grad_norm": 0.8200289229023223, "learning_rate": 2.9902676399026762e-06, "loss": 0.7255, "step": 15816 }, { "epoch": 0.46179673586172665, "grad_norm": 0.7917253816098556, "learning_rate": 2.9901054339010543e-06, "loss": 0.7084, "step": 15817 }, { "epoch": 0.461825932089574, "grad_norm": 0.7000499022807136, "learning_rate": 2.9899432278994323e-06, "loss": 0.65, "step": 15818 }, { "epoch": 0.46185512831742137, "grad_norm": 0.7005749328931021, "learning_rate": 2.9897810218978107e-06, "loss": 0.6553, "step": 15819 }, { "epoch": 0.46188432454526873, "grad_norm": 0.7524313617429842, "learning_rate": 2.9896188158961887e-06, "loss": 0.6791, "step": 15820 }, { "epoch": 0.4619135207731161, "grad_norm": 0.7573745824642965, "learning_rate": 2.9894566098945667e-06, "loss": 0.7033, "step": 15821 }, { "epoch": 0.46194271700096345, "grad_norm": 0.7014123915932631, "learning_rate": 2.9892944038929443e-06, "loss": 0.572, "step": 15822 }, { "epoch": 0.4619719132288108, "grad_norm": 0.7179259825603262, "learning_rate": 2.9891321978913223e-06, "loss": 0.5128, "step": 15823 }, { "epoch": 0.4620011094566582, "grad_norm": 0.7142421728336713, "learning_rate": 2.9889699918897003e-06, "loss": 0.5894, "step": 15824 }, { "epoch": 0.46203030568450554, "grad_norm": 0.7077899602711849, "learning_rate": 2.9888077858880783e-06, "loss": 0.6024, "step": 15825 }, { "epoch": 0.4620595019123529, "grad_norm": 0.7786312335802875, "learning_rate": 2.988645579886456e-06, "loss": 0.7548, "step": 15826 }, { "epoch": 0.46208869814020026, "grad_norm": 0.6967782437944576, "learning_rate": 2.988483373884834e-06, "loss": 0.6071, "step": 15827 }, { "epoch": 0.4621178943680476, "grad_norm": 0.7172186771340768, "learning_rate": 2.988321167883212e-06, "loss": 0.6261, "step": 15828 }, { "epoch": 0.462147090595895, "grad_norm": 0.787042448218925, "learning_rate": 2.98815896188159e-06, "loss": 0.7092, "step": 15829 }, { "epoch": 0.4621762868237424, "grad_norm": 0.8248753738577622, "learning_rate": 2.987996755879968e-06, "loss": 0.6942, "step": 15830 }, { "epoch": 0.46220548305158976, "grad_norm": 0.6675757591909228, "learning_rate": 2.9878345498783455e-06, "loss": 0.5935, "step": 15831 }, { "epoch": 0.4622346792794371, "grad_norm": 0.7826638051720657, "learning_rate": 2.9876723438767235e-06, "loss": 0.6984, "step": 15832 }, { "epoch": 0.4622638755072845, "grad_norm": 0.7119230860731793, "learning_rate": 2.9875101378751015e-06, "loss": 0.6296, "step": 15833 }, { "epoch": 0.46229307173513184, "grad_norm": 0.6847145263074781, "learning_rate": 2.9873479318734795e-06, "loss": 0.5706, "step": 15834 }, { "epoch": 0.4623222679629792, "grad_norm": 0.7303982517458133, "learning_rate": 2.987185725871857e-06, "loss": 0.5999, "step": 15835 }, { "epoch": 0.46235146419082657, "grad_norm": 0.6905365934477441, "learning_rate": 2.987023519870235e-06, "loss": 0.5674, "step": 15836 }, { "epoch": 0.4623806604186739, "grad_norm": 0.7422993077884222, "learning_rate": 2.986861313868613e-06, "loss": 0.6118, "step": 15837 }, { "epoch": 0.4624098566465213, "grad_norm": 0.7384624564282972, "learning_rate": 2.9866991078669916e-06, "loss": 0.659, "step": 15838 }, { "epoch": 0.46243905287436865, "grad_norm": 0.7338226989174937, "learning_rate": 2.9865369018653696e-06, "loss": 0.6477, "step": 15839 }, { "epoch": 0.462468249102216, "grad_norm": 0.6827686837020425, "learning_rate": 2.9863746958637476e-06, "loss": 0.6066, "step": 15840 }, { "epoch": 0.4624974453300634, "grad_norm": 0.7448464519419457, "learning_rate": 2.986212489862125e-06, "loss": 0.6792, "step": 15841 }, { "epoch": 0.46252664155791073, "grad_norm": 0.7392335132881507, "learning_rate": 2.986050283860503e-06, "loss": 0.6335, "step": 15842 }, { "epoch": 0.4625558377857581, "grad_norm": 0.6931940897830919, "learning_rate": 2.985888077858881e-06, "loss": 0.6139, "step": 15843 }, { "epoch": 0.46258503401360546, "grad_norm": 0.6892514661228909, "learning_rate": 2.985725871857259e-06, "loss": 0.5932, "step": 15844 }, { "epoch": 0.4626142302414528, "grad_norm": 0.6821325231031773, "learning_rate": 2.9855636658556368e-06, "loss": 0.5685, "step": 15845 }, { "epoch": 0.4626434264693002, "grad_norm": 0.7906518859143107, "learning_rate": 2.9854014598540148e-06, "loss": 0.6046, "step": 15846 }, { "epoch": 0.46267262269714754, "grad_norm": 0.8354575206084375, "learning_rate": 2.9852392538523928e-06, "loss": 0.7013, "step": 15847 }, { "epoch": 0.4627018189249949, "grad_norm": 0.7343910135173024, "learning_rate": 2.9850770478507708e-06, "loss": 0.6338, "step": 15848 }, { "epoch": 0.46273101515284226, "grad_norm": 0.7141757929584673, "learning_rate": 2.9849148418491488e-06, "loss": 0.6043, "step": 15849 }, { "epoch": 0.4627602113806896, "grad_norm": 0.6721959786762759, "learning_rate": 2.9847526358475264e-06, "loss": 0.507, "step": 15850 }, { "epoch": 0.462789407608537, "grad_norm": 0.7441556531759934, "learning_rate": 2.9845904298459044e-06, "loss": 0.6846, "step": 15851 }, { "epoch": 0.46281860383638435, "grad_norm": 0.7224878687618298, "learning_rate": 2.9844282238442824e-06, "loss": 0.632, "step": 15852 }, { "epoch": 0.4628478000642317, "grad_norm": 0.6968753881095152, "learning_rate": 2.9842660178426604e-06, "loss": 0.5945, "step": 15853 }, { "epoch": 0.46287699629207907, "grad_norm": 0.7090813312367286, "learning_rate": 2.984103811841038e-06, "loss": 0.5577, "step": 15854 }, { "epoch": 0.46290619251992643, "grad_norm": 0.7086855724837955, "learning_rate": 2.983941605839416e-06, "loss": 0.6425, "step": 15855 }, { "epoch": 0.4629353887477738, "grad_norm": 0.7453897463855381, "learning_rate": 2.983779399837794e-06, "loss": 0.6287, "step": 15856 }, { "epoch": 0.46296458497562115, "grad_norm": 0.6624097071725439, "learning_rate": 2.9836171938361724e-06, "loss": 0.5877, "step": 15857 }, { "epoch": 0.4629937812034685, "grad_norm": 0.7211252085717803, "learning_rate": 2.9834549878345504e-06, "loss": 0.6267, "step": 15858 }, { "epoch": 0.4630229774313159, "grad_norm": 0.7125912340431906, "learning_rate": 2.9832927818329284e-06, "loss": 0.588, "step": 15859 }, { "epoch": 0.46305217365916324, "grad_norm": 0.7787240467379589, "learning_rate": 2.983130575831306e-06, "loss": 0.6805, "step": 15860 }, { "epoch": 0.4630813698870106, "grad_norm": 0.7635618856771544, "learning_rate": 2.982968369829684e-06, "loss": 0.6945, "step": 15861 }, { "epoch": 0.46311056611485796, "grad_norm": 0.7153739119141711, "learning_rate": 2.982806163828062e-06, "loss": 0.6432, "step": 15862 }, { "epoch": 0.4631397623427053, "grad_norm": 0.7223885839134383, "learning_rate": 2.98264395782644e-06, "loss": 0.6669, "step": 15863 }, { "epoch": 0.4631689585705527, "grad_norm": 0.8089722398246607, "learning_rate": 2.9824817518248176e-06, "loss": 0.6904, "step": 15864 }, { "epoch": 0.46319815479840004, "grad_norm": 0.752892731628038, "learning_rate": 2.9823195458231956e-06, "loss": 0.6231, "step": 15865 }, { "epoch": 0.4632273510262474, "grad_norm": 0.8532138919037077, "learning_rate": 2.9821573398215736e-06, "loss": 0.7428, "step": 15866 }, { "epoch": 0.46325654725409476, "grad_norm": 0.6766375724901375, "learning_rate": 2.9819951338199516e-06, "loss": 0.6122, "step": 15867 }, { "epoch": 0.4632857434819421, "grad_norm": 0.731765136663634, "learning_rate": 2.9818329278183296e-06, "loss": 0.6166, "step": 15868 }, { "epoch": 0.4633149397097895, "grad_norm": 0.784965456879569, "learning_rate": 2.9816707218167072e-06, "loss": 0.7238, "step": 15869 }, { "epoch": 0.46334413593763685, "grad_norm": 0.6727890828213923, "learning_rate": 2.9815085158150852e-06, "loss": 0.5659, "step": 15870 }, { "epoch": 0.4633733321654842, "grad_norm": 0.7193375847233933, "learning_rate": 2.9813463098134632e-06, "loss": 0.6452, "step": 15871 }, { "epoch": 0.46340252839333157, "grad_norm": 0.7558284063591347, "learning_rate": 2.9811841038118412e-06, "loss": 0.6409, "step": 15872 }, { "epoch": 0.46343172462117893, "grad_norm": 0.8003533649441902, "learning_rate": 2.981021897810219e-06, "loss": 0.6551, "step": 15873 }, { "epoch": 0.4634609208490263, "grad_norm": 0.7737843651070182, "learning_rate": 2.980859691808597e-06, "loss": 0.7, "step": 15874 }, { "epoch": 0.46349011707687365, "grad_norm": 0.7812057100320833, "learning_rate": 2.9806974858069753e-06, "loss": 0.7554, "step": 15875 }, { "epoch": 0.463519313304721, "grad_norm": 0.7391171031587204, "learning_rate": 2.9805352798053533e-06, "loss": 0.6196, "step": 15876 }, { "epoch": 0.4635485095325684, "grad_norm": 0.7469707513992604, "learning_rate": 2.9803730738037313e-06, "loss": 0.6604, "step": 15877 }, { "epoch": 0.46357770576041574, "grad_norm": 0.7484964312695519, "learning_rate": 2.9802108678021093e-06, "loss": 0.6461, "step": 15878 }, { "epoch": 0.4636069019882631, "grad_norm": 0.7487850627229823, "learning_rate": 2.980048661800487e-06, "loss": 0.6572, "step": 15879 }, { "epoch": 0.46363609821611046, "grad_norm": 0.6933016284907414, "learning_rate": 2.979886455798865e-06, "loss": 0.5727, "step": 15880 }, { "epoch": 0.4636652944439578, "grad_norm": 0.7285059206828274, "learning_rate": 2.979724249797243e-06, "loss": 0.6641, "step": 15881 }, { "epoch": 0.4636944906718052, "grad_norm": 0.7337642441535546, "learning_rate": 2.979562043795621e-06, "loss": 0.6605, "step": 15882 }, { "epoch": 0.46372368689965254, "grad_norm": 0.666628610743877, "learning_rate": 2.9793998377939985e-06, "loss": 0.5153, "step": 15883 }, { "epoch": 0.4637528831274999, "grad_norm": 0.7572638566895834, "learning_rate": 2.9792376317923765e-06, "loss": 0.6662, "step": 15884 }, { "epoch": 0.46378207935534727, "grad_norm": 0.7105105846466311, "learning_rate": 2.9790754257907545e-06, "loss": 0.5874, "step": 15885 }, { "epoch": 0.4638112755831946, "grad_norm": 0.7180825002921604, "learning_rate": 2.9789132197891325e-06, "loss": 0.6354, "step": 15886 }, { "epoch": 0.463840471811042, "grad_norm": 0.7142471771256187, "learning_rate": 2.9787510137875105e-06, "loss": 0.6448, "step": 15887 }, { "epoch": 0.46386966803888935, "grad_norm": 0.909701341244431, "learning_rate": 2.978588807785888e-06, "loss": 0.6783, "step": 15888 }, { "epoch": 0.4638988642667367, "grad_norm": 0.7529652858789435, "learning_rate": 2.978426601784266e-06, "loss": 0.642, "step": 15889 }, { "epoch": 0.4639280604945841, "grad_norm": 0.7439890136011099, "learning_rate": 2.978264395782644e-06, "loss": 0.6596, "step": 15890 }, { "epoch": 0.4639572567224315, "grad_norm": 0.6934263662151162, "learning_rate": 2.978102189781022e-06, "loss": 0.5988, "step": 15891 }, { "epoch": 0.46398645295027885, "grad_norm": 0.721115638636445, "learning_rate": 2.9779399837793997e-06, "loss": 0.6188, "step": 15892 }, { "epoch": 0.4640156491781262, "grad_norm": 0.8841031464795409, "learning_rate": 2.9777777777777777e-06, "loss": 0.697, "step": 15893 }, { "epoch": 0.46404484540597357, "grad_norm": 0.6565238036493468, "learning_rate": 2.977615571776156e-06, "loss": 0.548, "step": 15894 }, { "epoch": 0.46407404163382093, "grad_norm": 0.8074611153981825, "learning_rate": 2.977453365774534e-06, "loss": 0.7249, "step": 15895 }, { "epoch": 0.4641032378616683, "grad_norm": 0.73474491139069, "learning_rate": 2.977291159772912e-06, "loss": 0.6105, "step": 15896 }, { "epoch": 0.46413243408951566, "grad_norm": 0.7289913600037217, "learning_rate": 2.97712895377129e-06, "loss": 0.5971, "step": 15897 }, { "epoch": 0.464161630317363, "grad_norm": 0.8023391121293336, "learning_rate": 2.9769667477696677e-06, "loss": 0.7901, "step": 15898 }, { "epoch": 0.4641908265452104, "grad_norm": 0.7452887427843133, "learning_rate": 2.9768045417680457e-06, "loss": 0.6936, "step": 15899 }, { "epoch": 0.46422002277305774, "grad_norm": 0.7488668186629538, "learning_rate": 2.9766423357664237e-06, "loss": 0.7108, "step": 15900 }, { "epoch": 0.4642492190009051, "grad_norm": 0.711737322690731, "learning_rate": 2.9764801297648017e-06, "loss": 0.6136, "step": 15901 }, { "epoch": 0.46427841522875246, "grad_norm": 0.6221234106058026, "learning_rate": 2.9763179237631793e-06, "loss": 0.5208, "step": 15902 }, { "epoch": 0.4643076114565998, "grad_norm": 0.7200505949184111, "learning_rate": 2.9761557177615573e-06, "loss": 0.6294, "step": 15903 }, { "epoch": 0.4643368076844472, "grad_norm": 0.7149077789542778, "learning_rate": 2.9759935117599353e-06, "loss": 0.6218, "step": 15904 }, { "epoch": 0.46436600391229454, "grad_norm": 0.7373183425236552, "learning_rate": 2.9758313057583133e-06, "loss": 0.6805, "step": 15905 }, { "epoch": 0.4643952001401419, "grad_norm": 0.7442730503195237, "learning_rate": 2.9756690997566914e-06, "loss": 0.6864, "step": 15906 }, { "epoch": 0.46442439636798927, "grad_norm": 0.7313902302458, "learning_rate": 2.975506893755069e-06, "loss": 0.6372, "step": 15907 }, { "epoch": 0.46445359259583663, "grad_norm": 0.7100851662375621, "learning_rate": 2.975344687753447e-06, "loss": 0.667, "step": 15908 }, { "epoch": 0.464482788823684, "grad_norm": 0.7757159250393075, "learning_rate": 2.975182481751825e-06, "loss": 0.7601, "step": 15909 }, { "epoch": 0.46451198505153135, "grad_norm": 0.6596728748285194, "learning_rate": 2.975020275750203e-06, "loss": 0.5512, "step": 15910 }, { "epoch": 0.4645411812793787, "grad_norm": 0.7846513154642335, "learning_rate": 2.9748580697485805e-06, "loss": 0.7184, "step": 15911 }, { "epoch": 0.4645703775072261, "grad_norm": 0.7462260415886572, "learning_rate": 2.9746958637469585e-06, "loss": 0.6461, "step": 15912 }, { "epoch": 0.46459957373507343, "grad_norm": 0.7147696209720524, "learning_rate": 2.974533657745337e-06, "loss": 0.6206, "step": 15913 }, { "epoch": 0.4646287699629208, "grad_norm": 0.7184236711245997, "learning_rate": 2.974371451743715e-06, "loss": 0.6034, "step": 15914 }, { "epoch": 0.46465796619076816, "grad_norm": 0.7315250212570693, "learning_rate": 2.974209245742093e-06, "loss": 0.642, "step": 15915 }, { "epoch": 0.4646871624186155, "grad_norm": 0.7928669107271054, "learning_rate": 2.974047039740471e-06, "loss": 0.7096, "step": 15916 }, { "epoch": 0.4647163586464629, "grad_norm": 0.7810817628969466, "learning_rate": 2.9738848337388486e-06, "loss": 0.6061, "step": 15917 }, { "epoch": 0.46474555487431024, "grad_norm": 0.7852590602922085, "learning_rate": 2.9737226277372266e-06, "loss": 0.6945, "step": 15918 }, { "epoch": 0.4647747511021576, "grad_norm": 0.6863442154145097, "learning_rate": 2.9735604217356046e-06, "loss": 0.6108, "step": 15919 }, { "epoch": 0.46480394733000496, "grad_norm": 0.6909756215417462, "learning_rate": 2.9733982157339826e-06, "loss": 0.5599, "step": 15920 }, { "epoch": 0.4648331435578523, "grad_norm": 0.7109175909752108, "learning_rate": 2.97323600973236e-06, "loss": 0.6125, "step": 15921 }, { "epoch": 0.4648623397856997, "grad_norm": 0.8242059971955622, "learning_rate": 2.973073803730738e-06, "loss": 0.6818, "step": 15922 }, { "epoch": 0.46489153601354705, "grad_norm": 0.7235607683471763, "learning_rate": 2.972911597729116e-06, "loss": 0.6433, "step": 15923 }, { "epoch": 0.4649207322413944, "grad_norm": 0.7587932854827233, "learning_rate": 2.972749391727494e-06, "loss": 0.6669, "step": 15924 }, { "epoch": 0.46494992846924177, "grad_norm": 0.772969925898505, "learning_rate": 2.9725871857258722e-06, "loss": 0.6471, "step": 15925 }, { "epoch": 0.46497912469708913, "grad_norm": 0.7007673965335017, "learning_rate": 2.97242497972425e-06, "loss": 0.5785, "step": 15926 }, { "epoch": 0.4650083209249365, "grad_norm": 0.714062594779764, "learning_rate": 2.972262773722628e-06, "loss": 0.5624, "step": 15927 }, { "epoch": 0.46503751715278385, "grad_norm": 0.7602683477985062, "learning_rate": 2.972100567721006e-06, "loss": 0.6499, "step": 15928 }, { "epoch": 0.4650667133806312, "grad_norm": 0.8135232391458017, "learning_rate": 2.971938361719384e-06, "loss": 0.7973, "step": 15929 }, { "epoch": 0.4650959096084786, "grad_norm": 0.7480271395315677, "learning_rate": 2.9717761557177614e-06, "loss": 0.7134, "step": 15930 }, { "epoch": 0.46512510583632594, "grad_norm": 0.6903054774404486, "learning_rate": 2.9716139497161394e-06, "loss": 0.5711, "step": 15931 }, { "epoch": 0.4651543020641733, "grad_norm": 0.7642777385565218, "learning_rate": 2.971451743714518e-06, "loss": 0.6576, "step": 15932 }, { "epoch": 0.46518349829202066, "grad_norm": 0.687322001049208, "learning_rate": 2.971289537712896e-06, "loss": 0.5774, "step": 15933 }, { "epoch": 0.465212694519868, "grad_norm": 0.7207399608237434, "learning_rate": 2.971127331711274e-06, "loss": 0.6479, "step": 15934 }, { "epoch": 0.4652418907477154, "grad_norm": 0.7036721266081097, "learning_rate": 2.970965125709652e-06, "loss": 0.6281, "step": 15935 }, { "epoch": 0.46527108697556274, "grad_norm": 0.7432937698420012, "learning_rate": 2.9708029197080294e-06, "loss": 0.6275, "step": 15936 }, { "epoch": 0.4653002832034101, "grad_norm": 0.9859962955038913, "learning_rate": 2.9706407137064074e-06, "loss": 0.6037, "step": 15937 }, { "epoch": 0.46532947943125746, "grad_norm": 0.7040636165734226, "learning_rate": 2.9704785077047855e-06, "loss": 0.6762, "step": 15938 }, { "epoch": 0.4653586756591048, "grad_norm": 0.6965259358226024, "learning_rate": 2.9703163017031635e-06, "loss": 0.607, "step": 15939 }, { "epoch": 0.4653878718869522, "grad_norm": 0.7325299646389769, "learning_rate": 2.970154095701541e-06, "loss": 0.7143, "step": 15940 }, { "epoch": 0.46541706811479955, "grad_norm": 0.7442618636861852, "learning_rate": 2.969991889699919e-06, "loss": 0.7292, "step": 15941 }, { "epoch": 0.4654462643426469, "grad_norm": 0.6734821950315112, "learning_rate": 2.969829683698297e-06, "loss": 0.5478, "step": 15942 }, { "epoch": 0.46547546057049427, "grad_norm": 0.6998718322515052, "learning_rate": 2.969667477696675e-06, "loss": 0.5735, "step": 15943 }, { "epoch": 0.46550465679834163, "grad_norm": 0.7352098895446123, "learning_rate": 2.969505271695053e-06, "loss": 0.639, "step": 15944 }, { "epoch": 0.465533853026189, "grad_norm": 0.7163508197885186, "learning_rate": 2.9693430656934307e-06, "loss": 0.637, "step": 15945 }, { "epoch": 0.46556304925403635, "grad_norm": 0.7131980015534823, "learning_rate": 2.9691808596918087e-06, "loss": 0.6582, "step": 15946 }, { "epoch": 0.4655922454818837, "grad_norm": 0.7894996040352542, "learning_rate": 2.9690186536901867e-06, "loss": 0.6947, "step": 15947 }, { "epoch": 0.4656214417097311, "grad_norm": 0.6905319958393255, "learning_rate": 2.9688564476885647e-06, "loss": 0.5775, "step": 15948 }, { "epoch": 0.46565063793757844, "grad_norm": 0.6834502903417128, "learning_rate": 2.9686942416869423e-06, "loss": 0.6181, "step": 15949 }, { "epoch": 0.46567983416542585, "grad_norm": 0.7314166533665198, "learning_rate": 2.9685320356853203e-06, "loss": 0.6401, "step": 15950 }, { "epoch": 0.4657090303932732, "grad_norm": 0.7408192227147593, "learning_rate": 2.9683698296836987e-06, "loss": 0.6959, "step": 15951 }, { "epoch": 0.4657382266211206, "grad_norm": 0.7872980300044078, "learning_rate": 2.9682076236820767e-06, "loss": 0.6192, "step": 15952 }, { "epoch": 0.46576742284896794, "grad_norm": 0.7732081637672724, "learning_rate": 2.9680454176804547e-06, "loss": 0.7005, "step": 15953 }, { "epoch": 0.4657966190768153, "grad_norm": 0.7730305195275287, "learning_rate": 2.9678832116788327e-06, "loss": 0.6495, "step": 15954 }, { "epoch": 0.46582581530466266, "grad_norm": 0.7821423555731628, "learning_rate": 2.9677210056772103e-06, "loss": 0.7019, "step": 15955 }, { "epoch": 0.46585501153251, "grad_norm": 0.7248472713651987, "learning_rate": 2.9675587996755883e-06, "loss": 0.6561, "step": 15956 }, { "epoch": 0.4658842077603574, "grad_norm": 0.733848312635275, "learning_rate": 2.9673965936739663e-06, "loss": 0.667, "step": 15957 }, { "epoch": 0.46591340398820474, "grad_norm": 0.7583604171684686, "learning_rate": 2.9672343876723443e-06, "loss": 0.713, "step": 15958 }, { "epoch": 0.4659426002160521, "grad_norm": 0.7105334516513181, "learning_rate": 2.967072181670722e-06, "loss": 0.5925, "step": 15959 }, { "epoch": 0.46597179644389947, "grad_norm": 0.7385543844884532, "learning_rate": 2.9669099756691e-06, "loss": 0.6455, "step": 15960 }, { "epoch": 0.4660009926717468, "grad_norm": 0.6841394651126083, "learning_rate": 2.966747769667478e-06, "loss": 0.5878, "step": 15961 }, { "epoch": 0.4660301888995942, "grad_norm": 0.7891256755166186, "learning_rate": 2.966585563665856e-06, "loss": 0.6651, "step": 15962 }, { "epoch": 0.46605938512744155, "grad_norm": 0.6942837582575576, "learning_rate": 2.966423357664234e-06, "loss": 0.5737, "step": 15963 }, { "epoch": 0.4660885813552889, "grad_norm": 0.7904728412213894, "learning_rate": 2.9662611516626115e-06, "loss": 0.6577, "step": 15964 }, { "epoch": 0.4661177775831363, "grad_norm": 0.719505291941794, "learning_rate": 2.9660989456609895e-06, "loss": 0.6074, "step": 15965 }, { "epoch": 0.46614697381098363, "grad_norm": 0.8014085074859645, "learning_rate": 2.9659367396593675e-06, "loss": 0.7367, "step": 15966 }, { "epoch": 0.466176170038831, "grad_norm": 0.6704110643785924, "learning_rate": 2.9657745336577455e-06, "loss": 0.5605, "step": 15967 }, { "epoch": 0.46620536626667836, "grad_norm": 0.7392685156513027, "learning_rate": 2.965612327656123e-06, "loss": 0.686, "step": 15968 }, { "epoch": 0.4662345624945257, "grad_norm": 0.6838902585600793, "learning_rate": 2.965450121654501e-06, "loss": 0.5871, "step": 15969 }, { "epoch": 0.4662637587223731, "grad_norm": 0.7226680003575705, "learning_rate": 2.9652879156528796e-06, "loss": 0.6793, "step": 15970 }, { "epoch": 0.46629295495022044, "grad_norm": 0.7673168213903196, "learning_rate": 2.9651257096512576e-06, "loss": 0.6369, "step": 15971 }, { "epoch": 0.4663221511780678, "grad_norm": 0.94195291293543, "learning_rate": 2.9649635036496356e-06, "loss": 0.7725, "step": 15972 }, { "epoch": 0.46635134740591516, "grad_norm": 0.7705671997542973, "learning_rate": 2.9648012976480136e-06, "loss": 0.6886, "step": 15973 }, { "epoch": 0.4663805436337625, "grad_norm": 0.7138590745780398, "learning_rate": 2.964639091646391e-06, "loss": 0.5943, "step": 15974 }, { "epoch": 0.4664097398616099, "grad_norm": 0.7368515815374981, "learning_rate": 2.964476885644769e-06, "loss": 0.6636, "step": 15975 }, { "epoch": 0.46643893608945725, "grad_norm": 0.695472777367099, "learning_rate": 2.964314679643147e-06, "loss": 0.6015, "step": 15976 }, { "epoch": 0.4664681323173046, "grad_norm": 0.7398669400693332, "learning_rate": 2.964152473641525e-06, "loss": 0.6207, "step": 15977 }, { "epoch": 0.46649732854515197, "grad_norm": 0.8295471320159099, "learning_rate": 2.9639902676399028e-06, "loss": 0.6522, "step": 15978 }, { "epoch": 0.46652652477299933, "grad_norm": 0.7090808118940339, "learning_rate": 2.9638280616382808e-06, "loss": 0.6389, "step": 15979 }, { "epoch": 0.4665557210008467, "grad_norm": 0.7707516150037782, "learning_rate": 2.9636658556366588e-06, "loss": 0.7342, "step": 15980 }, { "epoch": 0.46658491722869405, "grad_norm": 0.7185981770678166, "learning_rate": 2.9635036496350368e-06, "loss": 0.6296, "step": 15981 }, { "epoch": 0.4666141134565414, "grad_norm": 0.7028722038414407, "learning_rate": 2.963341443633415e-06, "loss": 0.5963, "step": 15982 }, { "epoch": 0.4666433096843888, "grad_norm": 0.7131211654596115, "learning_rate": 2.9631792376317924e-06, "loss": 0.6106, "step": 15983 }, { "epoch": 0.46667250591223614, "grad_norm": 0.7141724511876261, "learning_rate": 2.9630170316301704e-06, "loss": 0.6441, "step": 15984 }, { "epoch": 0.4667017021400835, "grad_norm": 0.7440485196591815, "learning_rate": 2.9628548256285484e-06, "loss": 0.7013, "step": 15985 }, { "epoch": 0.46673089836793086, "grad_norm": 0.7360849876120893, "learning_rate": 2.9626926196269264e-06, "loss": 0.6811, "step": 15986 }, { "epoch": 0.4667600945957782, "grad_norm": 0.7441592858207701, "learning_rate": 2.962530413625304e-06, "loss": 0.6611, "step": 15987 }, { "epoch": 0.4667892908236256, "grad_norm": 0.6698116780661302, "learning_rate": 2.962368207623682e-06, "loss": 0.5369, "step": 15988 }, { "epoch": 0.46681848705147294, "grad_norm": 0.7420374134769276, "learning_rate": 2.9622060016220604e-06, "loss": 0.6787, "step": 15989 }, { "epoch": 0.4668476832793203, "grad_norm": 0.7437937748003605, "learning_rate": 2.9620437956204384e-06, "loss": 0.6667, "step": 15990 }, { "epoch": 0.46687687950716766, "grad_norm": 0.7165881044394258, "learning_rate": 2.9618815896188164e-06, "loss": 0.6124, "step": 15991 }, { "epoch": 0.466906075735015, "grad_norm": 0.7450574031485923, "learning_rate": 2.9617193836171944e-06, "loss": 0.6678, "step": 15992 }, { "epoch": 0.4669352719628624, "grad_norm": 0.6706988766204085, "learning_rate": 2.961557177615572e-06, "loss": 0.571, "step": 15993 }, { "epoch": 0.46696446819070975, "grad_norm": 0.7342836991584517, "learning_rate": 2.96139497161395e-06, "loss": 0.6412, "step": 15994 }, { "epoch": 0.4669936644185571, "grad_norm": 0.7665698432457568, "learning_rate": 2.961232765612328e-06, "loss": 0.7239, "step": 15995 }, { "epoch": 0.46702286064640447, "grad_norm": 0.7220780587462131, "learning_rate": 2.961070559610706e-06, "loss": 0.6267, "step": 15996 }, { "epoch": 0.46705205687425183, "grad_norm": 0.7994051655999694, "learning_rate": 2.9609083536090836e-06, "loss": 0.6628, "step": 15997 }, { "epoch": 0.4670812531020992, "grad_norm": 0.6908526333562179, "learning_rate": 2.9607461476074616e-06, "loss": 0.5825, "step": 15998 }, { "epoch": 0.46711044932994655, "grad_norm": 0.7191286883062519, "learning_rate": 2.9605839416058396e-06, "loss": 0.6425, "step": 15999 }, { "epoch": 0.4671396455577939, "grad_norm": 0.6982183767073319, "learning_rate": 2.9604217356042176e-06, "loss": 0.5989, "step": 16000 }, { "epoch": 0.4671688417856413, "grad_norm": 0.7021553229358718, "learning_rate": 2.9602595296025956e-06, "loss": 0.6426, "step": 16001 }, { "epoch": 0.46719803801348864, "grad_norm": 0.7510006952059999, "learning_rate": 2.9600973236009732e-06, "loss": 0.5735, "step": 16002 }, { "epoch": 0.467227234241336, "grad_norm": 0.8134039308148937, "learning_rate": 2.9599351175993512e-06, "loss": 0.7173, "step": 16003 }, { "epoch": 0.46725643046918336, "grad_norm": 0.7305729271987572, "learning_rate": 2.9597729115977292e-06, "loss": 0.6488, "step": 16004 }, { "epoch": 0.4672856266970307, "grad_norm": 0.7227630803436539, "learning_rate": 2.9596107055961073e-06, "loss": 0.6387, "step": 16005 }, { "epoch": 0.4673148229248781, "grad_norm": 0.7985403635231828, "learning_rate": 2.959448499594485e-06, "loss": 0.7419, "step": 16006 }, { "epoch": 0.46734401915272544, "grad_norm": 0.7041201436234326, "learning_rate": 2.959286293592863e-06, "loss": 0.6394, "step": 16007 }, { "epoch": 0.4673732153805728, "grad_norm": 0.7758421772724925, "learning_rate": 2.9591240875912413e-06, "loss": 0.7072, "step": 16008 }, { "epoch": 0.46740241160842017, "grad_norm": 0.7253537294200343, "learning_rate": 2.9589618815896193e-06, "loss": 0.6717, "step": 16009 }, { "epoch": 0.4674316078362676, "grad_norm": 0.7510694670297084, "learning_rate": 2.9587996755879973e-06, "loss": 0.6709, "step": 16010 }, { "epoch": 0.46746080406411494, "grad_norm": 0.7985963915390202, "learning_rate": 2.9586374695863753e-06, "loss": 0.7169, "step": 16011 }, { "epoch": 0.4674900002919623, "grad_norm": 0.6777739021264488, "learning_rate": 2.958475263584753e-06, "loss": 0.5896, "step": 16012 }, { "epoch": 0.46751919651980967, "grad_norm": 0.6886010951845716, "learning_rate": 2.958313057583131e-06, "loss": 0.6111, "step": 16013 }, { "epoch": 0.467548392747657, "grad_norm": 0.7126016964645361, "learning_rate": 2.958150851581509e-06, "loss": 0.6585, "step": 16014 }, { "epoch": 0.4675775889755044, "grad_norm": 0.7632047714361486, "learning_rate": 2.957988645579887e-06, "loss": 0.6565, "step": 16015 }, { "epoch": 0.46760678520335175, "grad_norm": 0.7135297585947961, "learning_rate": 2.9578264395782645e-06, "loss": 0.6266, "step": 16016 }, { "epoch": 0.4676359814311991, "grad_norm": 0.7022683896245187, "learning_rate": 2.9576642335766425e-06, "loss": 0.6327, "step": 16017 }, { "epoch": 0.46766517765904647, "grad_norm": 0.7651782373318662, "learning_rate": 2.9575020275750205e-06, "loss": 0.7035, "step": 16018 }, { "epoch": 0.46769437388689383, "grad_norm": 0.7513977569417434, "learning_rate": 2.9573398215733985e-06, "loss": 0.7208, "step": 16019 }, { "epoch": 0.4677235701147412, "grad_norm": 0.7101182303179511, "learning_rate": 2.957177615571776e-06, "loss": 0.6285, "step": 16020 }, { "epoch": 0.46775276634258856, "grad_norm": 0.7367809434768149, "learning_rate": 2.957015409570154e-06, "loss": 0.5766, "step": 16021 }, { "epoch": 0.4677819625704359, "grad_norm": 0.7295533118354471, "learning_rate": 2.956853203568532e-06, "loss": 0.6378, "step": 16022 }, { "epoch": 0.4678111587982833, "grad_norm": 0.7013124286495955, "learning_rate": 2.95669099756691e-06, "loss": 0.6527, "step": 16023 }, { "epoch": 0.46784035502613064, "grad_norm": 0.788981867144199, "learning_rate": 2.956528791565288e-06, "loss": 0.7131, "step": 16024 }, { "epoch": 0.467869551253978, "grad_norm": 0.7804552015095398, "learning_rate": 2.9563665855636657e-06, "loss": 0.8075, "step": 16025 }, { "epoch": 0.46789874748182536, "grad_norm": 0.756574611746711, "learning_rate": 2.956204379562044e-06, "loss": 0.6016, "step": 16026 }, { "epoch": 0.4679279437096727, "grad_norm": 0.7936019160510226, "learning_rate": 2.956042173560422e-06, "loss": 0.7116, "step": 16027 }, { "epoch": 0.4679571399375201, "grad_norm": 0.7111110297199972, "learning_rate": 2.9558799675588e-06, "loss": 0.5228, "step": 16028 }, { "epoch": 0.46798633616536744, "grad_norm": 0.8541722924527576, "learning_rate": 2.955717761557178e-06, "loss": 0.7902, "step": 16029 }, { "epoch": 0.4680155323932148, "grad_norm": 0.7302715292775945, "learning_rate": 2.955555555555556e-06, "loss": 0.67, "step": 16030 }, { "epoch": 0.46804472862106217, "grad_norm": 0.6888617118411717, "learning_rate": 2.9553933495539337e-06, "loss": 0.5629, "step": 16031 }, { "epoch": 0.46807392484890953, "grad_norm": 0.7357363896276292, "learning_rate": 2.9552311435523117e-06, "loss": 0.6767, "step": 16032 }, { "epoch": 0.4681031210767569, "grad_norm": 0.7934841717358523, "learning_rate": 2.9550689375506897e-06, "loss": 0.6576, "step": 16033 }, { "epoch": 0.46813231730460425, "grad_norm": 0.737881971409519, "learning_rate": 2.9549067315490678e-06, "loss": 0.6239, "step": 16034 }, { "epoch": 0.4681615135324516, "grad_norm": 0.7663061643219771, "learning_rate": 2.9547445255474453e-06, "loss": 0.6269, "step": 16035 }, { "epoch": 0.468190709760299, "grad_norm": 0.7138892685660578, "learning_rate": 2.9545823195458233e-06, "loss": 0.6008, "step": 16036 }, { "epoch": 0.46821990598814633, "grad_norm": 0.6796448135132579, "learning_rate": 2.9544201135442014e-06, "loss": 0.5711, "step": 16037 }, { "epoch": 0.4682491022159937, "grad_norm": 0.7761157445897191, "learning_rate": 2.9542579075425794e-06, "loss": 0.7609, "step": 16038 }, { "epoch": 0.46827829844384106, "grad_norm": 0.7379487095587179, "learning_rate": 2.954095701540957e-06, "loss": 0.6797, "step": 16039 }, { "epoch": 0.4683074946716884, "grad_norm": 0.7260107393659883, "learning_rate": 2.953933495539335e-06, "loss": 0.7239, "step": 16040 }, { "epoch": 0.4683366908995358, "grad_norm": 0.8117092955138246, "learning_rate": 2.953771289537713e-06, "loss": 0.7932, "step": 16041 }, { "epoch": 0.46836588712738314, "grad_norm": 0.7165730465839771, "learning_rate": 2.953609083536091e-06, "loss": 0.6509, "step": 16042 }, { "epoch": 0.4683950833552305, "grad_norm": 0.7658847781586328, "learning_rate": 2.953446877534469e-06, "loss": 0.6556, "step": 16043 }, { "epoch": 0.46842427958307786, "grad_norm": 0.7563240946029887, "learning_rate": 2.9532846715328466e-06, "loss": 0.6473, "step": 16044 }, { "epoch": 0.4684534758109252, "grad_norm": 0.8143668189696351, "learning_rate": 2.953122465531225e-06, "loss": 0.7505, "step": 16045 }, { "epoch": 0.4684826720387726, "grad_norm": 0.7428096654792592, "learning_rate": 2.952960259529603e-06, "loss": 0.6697, "step": 16046 }, { "epoch": 0.46851186826661995, "grad_norm": 0.7599344855061594, "learning_rate": 2.952798053527981e-06, "loss": 0.6368, "step": 16047 }, { "epoch": 0.4685410644944673, "grad_norm": 0.7069608988849883, "learning_rate": 2.952635847526359e-06, "loss": 0.6193, "step": 16048 }, { "epoch": 0.46857026072231467, "grad_norm": 0.725144898382035, "learning_rate": 2.952473641524737e-06, "loss": 0.6796, "step": 16049 }, { "epoch": 0.46859945695016203, "grad_norm": 0.7026929808128921, "learning_rate": 2.9523114355231146e-06, "loss": 0.6038, "step": 16050 }, { "epoch": 0.4686286531780094, "grad_norm": 0.6899267676912123, "learning_rate": 2.9521492295214926e-06, "loss": 0.6149, "step": 16051 }, { "epoch": 0.46865784940585675, "grad_norm": 0.6879678589850537, "learning_rate": 2.9519870235198706e-06, "loss": 0.5923, "step": 16052 }, { "epoch": 0.4686870456337041, "grad_norm": 0.6611631100882024, "learning_rate": 2.9518248175182486e-06, "loss": 0.5715, "step": 16053 }, { "epoch": 0.4687162418615515, "grad_norm": 0.7092525012387396, "learning_rate": 2.951662611516626e-06, "loss": 0.6468, "step": 16054 }, { "epoch": 0.46874543808939884, "grad_norm": 0.7083243756903201, "learning_rate": 2.951500405515004e-06, "loss": 0.616, "step": 16055 }, { "epoch": 0.4687746343172462, "grad_norm": 0.6923032252185228, "learning_rate": 2.951338199513382e-06, "loss": 0.6217, "step": 16056 }, { "epoch": 0.46880383054509356, "grad_norm": 0.7379262425362374, "learning_rate": 2.9511759935117602e-06, "loss": 0.6513, "step": 16057 }, { "epoch": 0.4688330267729409, "grad_norm": 0.6855301112963019, "learning_rate": 2.951013787510138e-06, "loss": 0.5718, "step": 16058 }, { "epoch": 0.4688622230007883, "grad_norm": 0.7025868969636411, "learning_rate": 2.950851581508516e-06, "loss": 0.6493, "step": 16059 }, { "epoch": 0.46889141922863564, "grad_norm": 0.7975283880573019, "learning_rate": 2.950689375506894e-06, "loss": 0.6892, "step": 16060 }, { "epoch": 0.468920615456483, "grad_norm": 0.7324185244388844, "learning_rate": 2.950527169505272e-06, "loss": 0.6494, "step": 16061 }, { "epoch": 0.46894981168433036, "grad_norm": 0.7145647417140716, "learning_rate": 2.95036496350365e-06, "loss": 0.6137, "step": 16062 }, { "epoch": 0.4689790079121777, "grad_norm": 0.7671923350473133, "learning_rate": 2.9502027575020274e-06, "loss": 0.6773, "step": 16063 }, { "epoch": 0.4690082041400251, "grad_norm": 0.7173254443598899, "learning_rate": 2.950040551500406e-06, "loss": 0.6207, "step": 16064 }, { "epoch": 0.46903740036787245, "grad_norm": 0.7354053701238227, "learning_rate": 2.949878345498784e-06, "loss": 0.6292, "step": 16065 }, { "epoch": 0.4690665965957198, "grad_norm": 0.79153788568739, "learning_rate": 2.949716139497162e-06, "loss": 0.7429, "step": 16066 }, { "epoch": 0.46909579282356717, "grad_norm": 0.7801665087030378, "learning_rate": 2.94955393349554e-06, "loss": 0.7332, "step": 16067 }, { "epoch": 0.46912498905141453, "grad_norm": 0.7085417691609757, "learning_rate": 2.949391727493918e-06, "loss": 0.5958, "step": 16068 }, { "epoch": 0.4691541852792619, "grad_norm": 0.7095243231744861, "learning_rate": 2.9492295214922955e-06, "loss": 0.6635, "step": 16069 }, { "epoch": 0.46918338150710925, "grad_norm": 0.6608449555626561, "learning_rate": 2.9490673154906735e-06, "loss": 0.5079, "step": 16070 }, { "epoch": 0.46921257773495667, "grad_norm": 0.7788340780055725, "learning_rate": 2.9489051094890515e-06, "loss": 0.7362, "step": 16071 }, { "epoch": 0.46924177396280403, "grad_norm": 0.712673755287059, "learning_rate": 2.9487429034874295e-06, "loss": 0.648, "step": 16072 }, { "epoch": 0.4692709701906514, "grad_norm": 0.7144536424135896, "learning_rate": 2.948580697485807e-06, "loss": 0.6082, "step": 16073 }, { "epoch": 0.46930016641849875, "grad_norm": 0.667899795455489, "learning_rate": 2.948418491484185e-06, "loss": 0.5714, "step": 16074 }, { "epoch": 0.4693293626463461, "grad_norm": 0.7692584274899037, "learning_rate": 2.948256285482563e-06, "loss": 0.6996, "step": 16075 }, { "epoch": 0.4693585588741935, "grad_norm": 0.7044941376700465, "learning_rate": 2.948094079480941e-06, "loss": 0.6216, "step": 16076 }, { "epoch": 0.46938775510204084, "grad_norm": 0.7450466191016235, "learning_rate": 2.9479318734793187e-06, "loss": 0.646, "step": 16077 }, { "epoch": 0.4694169513298882, "grad_norm": 0.8225554490714331, "learning_rate": 2.9477696674776967e-06, "loss": 0.7366, "step": 16078 }, { "epoch": 0.46944614755773556, "grad_norm": 0.7758760521895872, "learning_rate": 2.9476074614760747e-06, "loss": 0.7996, "step": 16079 }, { "epoch": 0.4694753437855829, "grad_norm": 0.7589554381754263, "learning_rate": 2.9474452554744527e-06, "loss": 0.6808, "step": 16080 }, { "epoch": 0.4695045400134303, "grad_norm": 0.7830426493808632, "learning_rate": 2.9472830494728307e-06, "loss": 0.7655, "step": 16081 }, { "epoch": 0.46953373624127764, "grad_norm": 0.661658745203146, "learning_rate": 2.9471208434712083e-06, "loss": 0.59, "step": 16082 }, { "epoch": 0.469562932469125, "grad_norm": 0.7693323742206227, "learning_rate": 2.9469586374695867e-06, "loss": 0.7048, "step": 16083 }, { "epoch": 0.46959212869697237, "grad_norm": 0.7022478040473873, "learning_rate": 2.9467964314679647e-06, "loss": 0.6533, "step": 16084 }, { "epoch": 0.4696213249248197, "grad_norm": 0.7434042880913225, "learning_rate": 2.9466342254663427e-06, "loss": 0.706, "step": 16085 }, { "epoch": 0.4696505211526671, "grad_norm": 0.7990002067831375, "learning_rate": 2.9464720194647207e-06, "loss": 0.6892, "step": 16086 }, { "epoch": 0.46967971738051445, "grad_norm": 0.7613918551811223, "learning_rate": 2.9463098134630987e-06, "loss": 0.7068, "step": 16087 }, { "epoch": 0.4697089136083618, "grad_norm": 0.7293989749901235, "learning_rate": 2.9461476074614763e-06, "loss": 0.6576, "step": 16088 }, { "epoch": 0.4697381098362092, "grad_norm": 0.7066484251525608, "learning_rate": 2.9459854014598543e-06, "loss": 0.6076, "step": 16089 }, { "epoch": 0.46976730606405653, "grad_norm": 0.7496568211309216, "learning_rate": 2.9458231954582323e-06, "loss": 0.6651, "step": 16090 }, { "epoch": 0.4697965022919039, "grad_norm": 0.7422212477669424, "learning_rate": 2.9456609894566103e-06, "loss": 0.7109, "step": 16091 }, { "epoch": 0.46982569851975126, "grad_norm": 0.7766667067679816, "learning_rate": 2.945498783454988e-06, "loss": 0.7112, "step": 16092 }, { "epoch": 0.4698548947475986, "grad_norm": 0.7657684863382525, "learning_rate": 2.945336577453366e-06, "loss": 0.7169, "step": 16093 }, { "epoch": 0.469884090975446, "grad_norm": 0.729261394292061, "learning_rate": 2.945174371451744e-06, "loss": 0.6976, "step": 16094 }, { "epoch": 0.46991328720329334, "grad_norm": 0.7404790839011364, "learning_rate": 2.945012165450122e-06, "loss": 0.6062, "step": 16095 }, { "epoch": 0.4699424834311407, "grad_norm": 0.6770943023211055, "learning_rate": 2.9448499594484995e-06, "loss": 0.5896, "step": 16096 }, { "epoch": 0.46997167965898806, "grad_norm": 0.695139802301624, "learning_rate": 2.9446877534468775e-06, "loss": 0.5862, "step": 16097 }, { "epoch": 0.4700008758868354, "grad_norm": 0.9074159227781199, "learning_rate": 2.9445255474452555e-06, "loss": 0.6842, "step": 16098 }, { "epoch": 0.4700300721146828, "grad_norm": 0.7172580659478093, "learning_rate": 2.9443633414436335e-06, "loss": 0.6274, "step": 16099 }, { "epoch": 0.47005926834253015, "grad_norm": 0.7619955806284098, "learning_rate": 2.9442011354420115e-06, "loss": 0.6809, "step": 16100 }, { "epoch": 0.4700884645703775, "grad_norm": 0.7329558456628453, "learning_rate": 2.944038929440389e-06, "loss": 0.6215, "step": 16101 }, { "epoch": 0.47011766079822487, "grad_norm": 0.7363408437243727, "learning_rate": 2.9438767234387676e-06, "loss": 0.635, "step": 16102 }, { "epoch": 0.47014685702607223, "grad_norm": 0.8000636275788906, "learning_rate": 2.9437145174371456e-06, "loss": 0.6849, "step": 16103 }, { "epoch": 0.4701760532539196, "grad_norm": 0.7258342942839946, "learning_rate": 2.9435523114355236e-06, "loss": 0.6809, "step": 16104 }, { "epoch": 0.47020524948176695, "grad_norm": 0.7017491544418054, "learning_rate": 2.9433901054339016e-06, "loss": 0.6371, "step": 16105 }, { "epoch": 0.4702344457096143, "grad_norm": 0.7416828167345678, "learning_rate": 2.9432278994322796e-06, "loss": 0.6099, "step": 16106 }, { "epoch": 0.4702636419374617, "grad_norm": 0.6838977231157953, "learning_rate": 2.943065693430657e-06, "loss": 0.6263, "step": 16107 }, { "epoch": 0.47029283816530904, "grad_norm": 0.781610553425849, "learning_rate": 2.942903487429035e-06, "loss": 0.6672, "step": 16108 }, { "epoch": 0.4703220343931564, "grad_norm": 0.7282075652742116, "learning_rate": 2.942741281427413e-06, "loss": 0.6583, "step": 16109 }, { "epoch": 0.47035123062100376, "grad_norm": 0.7214426171200086, "learning_rate": 2.942579075425791e-06, "loss": 0.5868, "step": 16110 }, { "epoch": 0.4703804268488511, "grad_norm": 0.6982460214627358, "learning_rate": 2.9424168694241688e-06, "loss": 0.6249, "step": 16111 }, { "epoch": 0.4704096230766985, "grad_norm": 0.7136877454185191, "learning_rate": 2.9422546634225468e-06, "loss": 0.6311, "step": 16112 }, { "epoch": 0.47043881930454584, "grad_norm": 0.7126656348215834, "learning_rate": 2.9420924574209248e-06, "loss": 0.6565, "step": 16113 }, { "epoch": 0.4704680155323932, "grad_norm": 0.7436629733903561, "learning_rate": 2.941930251419303e-06, "loss": 0.6497, "step": 16114 }, { "epoch": 0.47049721176024056, "grad_norm": 0.6940685805957941, "learning_rate": 2.9417680454176804e-06, "loss": 0.6073, "step": 16115 }, { "epoch": 0.4705264079880879, "grad_norm": 0.8246006618405479, "learning_rate": 2.9416058394160584e-06, "loss": 0.7047, "step": 16116 }, { "epoch": 0.4705556042159353, "grad_norm": 0.7636249474098576, "learning_rate": 2.9414436334144364e-06, "loss": 0.7618, "step": 16117 }, { "epoch": 0.47058480044378265, "grad_norm": 0.7165667796840983, "learning_rate": 2.9412814274128144e-06, "loss": 0.634, "step": 16118 }, { "epoch": 0.47061399667163, "grad_norm": 0.6834510676445766, "learning_rate": 2.9411192214111924e-06, "loss": 0.6024, "step": 16119 }, { "epoch": 0.47064319289947737, "grad_norm": 0.6779144564775226, "learning_rate": 2.94095701540957e-06, "loss": 0.6124, "step": 16120 }, { "epoch": 0.47067238912732473, "grad_norm": 0.7175790225432697, "learning_rate": 2.9407948094079484e-06, "loss": 0.6352, "step": 16121 }, { "epoch": 0.4707015853551721, "grad_norm": 0.7405444858542395, "learning_rate": 2.9406326034063264e-06, "loss": 0.7117, "step": 16122 }, { "epoch": 0.47073078158301945, "grad_norm": 0.6976000864349328, "learning_rate": 2.9404703974047044e-06, "loss": 0.61, "step": 16123 }, { "epoch": 0.4707599778108668, "grad_norm": 0.7464560409102662, "learning_rate": 2.9403081914030824e-06, "loss": 0.6619, "step": 16124 }, { "epoch": 0.4707891740387142, "grad_norm": 0.7209666363411202, "learning_rate": 2.9401459854014604e-06, "loss": 0.6502, "step": 16125 }, { "epoch": 0.47081837026656154, "grad_norm": 0.6989752318759003, "learning_rate": 2.939983779399838e-06, "loss": 0.6396, "step": 16126 }, { "epoch": 0.4708475664944089, "grad_norm": 0.6928312640032207, "learning_rate": 2.939821573398216e-06, "loss": 0.6067, "step": 16127 }, { "epoch": 0.47087676272225626, "grad_norm": 0.7115893050286757, "learning_rate": 2.939659367396594e-06, "loss": 0.6252, "step": 16128 }, { "epoch": 0.4709059589501036, "grad_norm": 0.7724800973672957, "learning_rate": 2.939497161394972e-06, "loss": 0.7544, "step": 16129 }, { "epoch": 0.470935155177951, "grad_norm": 0.8330791926347111, "learning_rate": 2.9393349553933496e-06, "loss": 0.6562, "step": 16130 }, { "epoch": 0.4709643514057984, "grad_norm": 0.7230542088707554, "learning_rate": 2.9391727493917276e-06, "loss": 0.6445, "step": 16131 }, { "epoch": 0.47099354763364576, "grad_norm": 0.6913972332674274, "learning_rate": 2.9390105433901056e-06, "loss": 0.6251, "step": 16132 }, { "epoch": 0.4710227438614931, "grad_norm": 0.675263606481125, "learning_rate": 2.9388483373884837e-06, "loss": 0.5605, "step": 16133 }, { "epoch": 0.4710519400893405, "grad_norm": 0.7905120918202047, "learning_rate": 2.9386861313868612e-06, "loss": 0.6893, "step": 16134 }, { "epoch": 0.47108113631718784, "grad_norm": 0.7939579568231203, "learning_rate": 2.9385239253852392e-06, "loss": 0.6487, "step": 16135 }, { "epoch": 0.4711103325450352, "grad_norm": 0.7092942864386346, "learning_rate": 2.9383617193836172e-06, "loss": 0.6257, "step": 16136 }, { "epoch": 0.47113952877288257, "grad_norm": 0.6680925178632003, "learning_rate": 2.9381995133819953e-06, "loss": 0.5634, "step": 16137 }, { "epoch": 0.4711687250007299, "grad_norm": 0.6880596048872987, "learning_rate": 2.9380373073803733e-06, "loss": 0.6454, "step": 16138 }, { "epoch": 0.4711979212285773, "grad_norm": 0.7996118002804994, "learning_rate": 2.937875101378751e-06, "loss": 0.7619, "step": 16139 }, { "epoch": 0.47122711745642465, "grad_norm": 0.7875695625079341, "learning_rate": 2.9377128953771293e-06, "loss": 0.7913, "step": 16140 }, { "epoch": 0.471256313684272, "grad_norm": 0.7067398741660088, "learning_rate": 2.9375506893755073e-06, "loss": 0.6624, "step": 16141 }, { "epoch": 0.47128550991211937, "grad_norm": 0.7261781064114515, "learning_rate": 2.9373884833738853e-06, "loss": 0.6271, "step": 16142 }, { "epoch": 0.47131470613996673, "grad_norm": 0.7327127102713561, "learning_rate": 2.9372262773722633e-06, "loss": 0.6565, "step": 16143 }, { "epoch": 0.4713439023678141, "grad_norm": 0.7787936951982637, "learning_rate": 2.937064071370641e-06, "loss": 0.663, "step": 16144 }, { "epoch": 0.47137309859566145, "grad_norm": 0.7137502971702465, "learning_rate": 2.936901865369019e-06, "loss": 0.6474, "step": 16145 }, { "epoch": 0.4714022948235088, "grad_norm": 0.7491545075648951, "learning_rate": 2.936739659367397e-06, "loss": 0.6361, "step": 16146 }, { "epoch": 0.4714314910513562, "grad_norm": 0.7620803156026068, "learning_rate": 2.936577453365775e-06, "loss": 0.7377, "step": 16147 }, { "epoch": 0.47146068727920354, "grad_norm": 0.6937309916020553, "learning_rate": 2.936415247364153e-06, "loss": 0.6019, "step": 16148 }, { "epoch": 0.4714898835070509, "grad_norm": 0.7863775240660442, "learning_rate": 2.9362530413625305e-06, "loss": 0.7404, "step": 16149 }, { "epoch": 0.47151907973489826, "grad_norm": 0.6737751933624531, "learning_rate": 2.9360908353609085e-06, "loss": 0.5571, "step": 16150 }, { "epoch": 0.4715482759627456, "grad_norm": 0.8077225696826218, "learning_rate": 2.9359286293592865e-06, "loss": 0.7497, "step": 16151 }, { "epoch": 0.471577472190593, "grad_norm": 0.7492805936630867, "learning_rate": 2.9357664233576645e-06, "loss": 0.6662, "step": 16152 }, { "epoch": 0.47160666841844034, "grad_norm": 0.7064318719580549, "learning_rate": 2.935604217356042e-06, "loss": 0.6088, "step": 16153 }, { "epoch": 0.4716358646462877, "grad_norm": 0.744473029423195, "learning_rate": 2.93544201135442e-06, "loss": 0.6714, "step": 16154 }, { "epoch": 0.47166506087413507, "grad_norm": 0.804624667942865, "learning_rate": 2.935279805352798e-06, "loss": 0.6611, "step": 16155 }, { "epoch": 0.47169425710198243, "grad_norm": 0.728020263445298, "learning_rate": 2.935117599351176e-06, "loss": 0.6722, "step": 16156 }, { "epoch": 0.4717234533298298, "grad_norm": 0.7157138824372742, "learning_rate": 2.934955393349554e-06, "loss": 0.6452, "step": 16157 }, { "epoch": 0.47175264955767715, "grad_norm": 0.7057657304307204, "learning_rate": 2.9347931873479317e-06, "loss": 0.6047, "step": 16158 }, { "epoch": 0.4717818457855245, "grad_norm": 0.7787989685997656, "learning_rate": 2.93463098134631e-06, "loss": 0.7262, "step": 16159 }, { "epoch": 0.4718110420133719, "grad_norm": 0.7208886216511409, "learning_rate": 2.934468775344688e-06, "loss": 0.6111, "step": 16160 }, { "epoch": 0.47184023824121923, "grad_norm": 0.8492107314956823, "learning_rate": 2.934306569343066e-06, "loss": 0.6462, "step": 16161 }, { "epoch": 0.4718694344690666, "grad_norm": 0.7175703048535466, "learning_rate": 2.934144363341444e-06, "loss": 0.6535, "step": 16162 }, { "epoch": 0.47189863069691396, "grad_norm": 0.7577747210495132, "learning_rate": 2.9339821573398217e-06, "loss": 0.7079, "step": 16163 }, { "epoch": 0.4719278269247613, "grad_norm": 0.7523304796846334, "learning_rate": 2.9338199513381997e-06, "loss": 0.675, "step": 16164 }, { "epoch": 0.4719570231526087, "grad_norm": 0.6587894419625911, "learning_rate": 2.9336577453365778e-06, "loss": 0.5365, "step": 16165 }, { "epoch": 0.47198621938045604, "grad_norm": 0.7415740833769783, "learning_rate": 2.9334955393349558e-06, "loss": 0.6451, "step": 16166 }, { "epoch": 0.4720154156083034, "grad_norm": 0.7163680397691323, "learning_rate": 2.9333333333333338e-06, "loss": 0.6482, "step": 16167 }, { "epoch": 0.47204461183615076, "grad_norm": 0.7275217246321984, "learning_rate": 2.9331711273317113e-06, "loss": 0.6138, "step": 16168 }, { "epoch": 0.4720738080639981, "grad_norm": 0.6899683882969803, "learning_rate": 2.9330089213300894e-06, "loss": 0.5664, "step": 16169 }, { "epoch": 0.4721030042918455, "grad_norm": 0.8020119679344728, "learning_rate": 2.9328467153284674e-06, "loss": 0.6996, "step": 16170 }, { "epoch": 0.47213220051969285, "grad_norm": 0.6795816812438931, "learning_rate": 2.9326845093268454e-06, "loss": 0.6209, "step": 16171 }, { "epoch": 0.4721613967475402, "grad_norm": 0.7612808612387174, "learning_rate": 2.932522303325223e-06, "loss": 0.7112, "step": 16172 }, { "epoch": 0.47219059297538757, "grad_norm": 0.7306038722094329, "learning_rate": 2.932360097323601e-06, "loss": 0.6217, "step": 16173 }, { "epoch": 0.47221978920323493, "grad_norm": 0.6908468318081337, "learning_rate": 2.932197891321979e-06, "loss": 0.646, "step": 16174 }, { "epoch": 0.4722489854310823, "grad_norm": 0.7056552002258311, "learning_rate": 2.932035685320357e-06, "loss": 0.5972, "step": 16175 }, { "epoch": 0.47227818165892965, "grad_norm": 0.7442171302358785, "learning_rate": 2.931873479318735e-06, "loss": 0.6703, "step": 16176 }, { "epoch": 0.472307377886777, "grad_norm": 0.6974909706866484, "learning_rate": 2.9317112733171134e-06, "loss": 0.6478, "step": 16177 }, { "epoch": 0.4723365741146244, "grad_norm": 0.7042979673933358, "learning_rate": 2.931549067315491e-06, "loss": 0.6395, "step": 16178 }, { "epoch": 0.47236577034247174, "grad_norm": 0.7728460780879493, "learning_rate": 2.931386861313869e-06, "loss": 0.6659, "step": 16179 }, { "epoch": 0.4723949665703191, "grad_norm": 0.7429117648938356, "learning_rate": 2.931224655312247e-06, "loss": 0.6878, "step": 16180 }, { "epoch": 0.47242416279816646, "grad_norm": 0.7206925660607916, "learning_rate": 2.931062449310625e-06, "loss": 0.6318, "step": 16181 }, { "epoch": 0.4724533590260138, "grad_norm": 0.7400993646415996, "learning_rate": 2.9309002433090026e-06, "loss": 0.6366, "step": 16182 }, { "epoch": 0.4724825552538612, "grad_norm": 0.7372555781070789, "learning_rate": 2.9307380373073806e-06, "loss": 0.6595, "step": 16183 }, { "epoch": 0.47251175148170854, "grad_norm": 0.7214333990798147, "learning_rate": 2.9305758313057586e-06, "loss": 0.6547, "step": 16184 }, { "epoch": 0.4725409477095559, "grad_norm": 0.7597135200383892, "learning_rate": 2.9304136253041366e-06, "loss": 0.7155, "step": 16185 }, { "epoch": 0.47257014393740326, "grad_norm": 0.6786171307102469, "learning_rate": 2.9302514193025146e-06, "loss": 0.554, "step": 16186 }, { "epoch": 0.4725993401652506, "grad_norm": 0.6776799683940744, "learning_rate": 2.930089213300892e-06, "loss": 0.5832, "step": 16187 }, { "epoch": 0.472628536393098, "grad_norm": 0.833571650115924, "learning_rate": 2.9299270072992702e-06, "loss": 0.7007, "step": 16188 }, { "epoch": 0.47265773262094535, "grad_norm": 0.789218486910166, "learning_rate": 2.9297648012976482e-06, "loss": 0.73, "step": 16189 }, { "epoch": 0.4726869288487927, "grad_norm": 0.7661096540454368, "learning_rate": 2.9296025952960262e-06, "loss": 0.6584, "step": 16190 }, { "epoch": 0.4727161250766401, "grad_norm": 0.7586073776777698, "learning_rate": 2.929440389294404e-06, "loss": 0.6552, "step": 16191 }, { "epoch": 0.4727453213044875, "grad_norm": 0.7805513304254252, "learning_rate": 2.929278183292782e-06, "loss": 0.6396, "step": 16192 }, { "epoch": 0.47277451753233485, "grad_norm": 0.6961414482164741, "learning_rate": 2.92911597729116e-06, "loss": 0.6281, "step": 16193 }, { "epoch": 0.4728037137601822, "grad_norm": 0.7013472009132042, "learning_rate": 2.928953771289538e-06, "loss": 0.588, "step": 16194 }, { "epoch": 0.47283290998802957, "grad_norm": 0.8733114535424562, "learning_rate": 2.928791565287916e-06, "loss": 0.6955, "step": 16195 }, { "epoch": 0.47286210621587693, "grad_norm": 0.7216472988554543, "learning_rate": 2.9286293592862943e-06, "loss": 0.6475, "step": 16196 }, { "epoch": 0.4728913024437243, "grad_norm": 0.8443744403406984, "learning_rate": 2.928467153284672e-06, "loss": 0.6567, "step": 16197 }, { "epoch": 0.47292049867157165, "grad_norm": 0.6582500494288369, "learning_rate": 2.92830494728305e-06, "loss": 0.5436, "step": 16198 }, { "epoch": 0.472949694899419, "grad_norm": 0.7290180112598357, "learning_rate": 2.928142741281428e-06, "loss": 0.6304, "step": 16199 }, { "epoch": 0.4729788911272664, "grad_norm": 0.7290850046476731, "learning_rate": 2.927980535279806e-06, "loss": 0.6557, "step": 16200 }, { "epoch": 0.47300808735511374, "grad_norm": 0.6716726041008265, "learning_rate": 2.9278183292781835e-06, "loss": 0.6022, "step": 16201 }, { "epoch": 0.4730372835829611, "grad_norm": 0.9155691004037186, "learning_rate": 2.9276561232765615e-06, "loss": 0.6959, "step": 16202 }, { "epoch": 0.47306647981080846, "grad_norm": 0.6488921140914137, "learning_rate": 2.9274939172749395e-06, "loss": 0.5233, "step": 16203 }, { "epoch": 0.4730956760386558, "grad_norm": 0.7055535516103002, "learning_rate": 2.9273317112733175e-06, "loss": 0.6147, "step": 16204 }, { "epoch": 0.4731248722665032, "grad_norm": 0.7624189407499516, "learning_rate": 2.9271695052716955e-06, "loss": 0.7094, "step": 16205 }, { "epoch": 0.47315406849435054, "grad_norm": 0.8390732448634883, "learning_rate": 2.927007299270073e-06, "loss": 0.7281, "step": 16206 }, { "epoch": 0.4731832647221979, "grad_norm": 0.7229004605299802, "learning_rate": 2.926845093268451e-06, "loss": 0.6098, "step": 16207 }, { "epoch": 0.47321246095004527, "grad_norm": 0.713183624874855, "learning_rate": 2.926682887266829e-06, "loss": 0.6182, "step": 16208 }, { "epoch": 0.4732416571778926, "grad_norm": 0.7036635335028875, "learning_rate": 2.926520681265207e-06, "loss": 0.6241, "step": 16209 }, { "epoch": 0.47327085340574, "grad_norm": 0.7211135699316996, "learning_rate": 2.9263584752635847e-06, "loss": 0.6756, "step": 16210 }, { "epoch": 0.47330004963358735, "grad_norm": 0.6894730643799009, "learning_rate": 2.9261962692619627e-06, "loss": 0.6013, "step": 16211 }, { "epoch": 0.4733292458614347, "grad_norm": 0.7527367506872259, "learning_rate": 2.9260340632603407e-06, "loss": 0.6644, "step": 16212 }, { "epoch": 0.47335844208928207, "grad_norm": 0.7307052095301936, "learning_rate": 2.9258718572587187e-06, "loss": 0.618, "step": 16213 }, { "epoch": 0.47338763831712943, "grad_norm": 0.7118295947409902, "learning_rate": 2.9257096512570967e-06, "loss": 0.6598, "step": 16214 }, { "epoch": 0.4734168345449768, "grad_norm": 0.6550001842572009, "learning_rate": 2.925547445255475e-06, "loss": 0.5936, "step": 16215 }, { "epoch": 0.47344603077282416, "grad_norm": 0.7341813593570639, "learning_rate": 2.9253852392538527e-06, "loss": 0.6646, "step": 16216 }, { "epoch": 0.4734752270006715, "grad_norm": 0.7334583352170992, "learning_rate": 2.9252230332522307e-06, "loss": 0.6169, "step": 16217 }, { "epoch": 0.4735044232285189, "grad_norm": 0.6740656651097658, "learning_rate": 2.9250608272506087e-06, "loss": 0.5912, "step": 16218 }, { "epoch": 0.47353361945636624, "grad_norm": 0.750528907505174, "learning_rate": 2.9248986212489867e-06, "loss": 0.7175, "step": 16219 }, { "epoch": 0.4735628156842136, "grad_norm": 0.6910650630987979, "learning_rate": 2.9247364152473643e-06, "loss": 0.6141, "step": 16220 }, { "epoch": 0.47359201191206096, "grad_norm": 0.7257896982514066, "learning_rate": 2.9245742092457423e-06, "loss": 0.6575, "step": 16221 }, { "epoch": 0.4736212081399083, "grad_norm": 0.7200895090747672, "learning_rate": 2.9244120032441203e-06, "loss": 0.6775, "step": 16222 }, { "epoch": 0.4736504043677557, "grad_norm": 0.7346476504530138, "learning_rate": 2.9242497972424983e-06, "loss": 0.6967, "step": 16223 }, { "epoch": 0.47367960059560305, "grad_norm": 0.712221223770527, "learning_rate": 2.9240875912408763e-06, "loss": 0.6887, "step": 16224 }, { "epoch": 0.4737087968234504, "grad_norm": 0.6997280180546084, "learning_rate": 2.923925385239254e-06, "loss": 0.6241, "step": 16225 }, { "epoch": 0.47373799305129777, "grad_norm": 0.7125940052181017, "learning_rate": 2.923763179237632e-06, "loss": 0.6495, "step": 16226 }, { "epoch": 0.47376718927914513, "grad_norm": 0.7300607796051175, "learning_rate": 2.92360097323601e-06, "loss": 0.6941, "step": 16227 }, { "epoch": 0.4737963855069925, "grad_norm": 0.7355062311467573, "learning_rate": 2.923438767234388e-06, "loss": 0.6143, "step": 16228 }, { "epoch": 0.47382558173483985, "grad_norm": 0.6823390855454622, "learning_rate": 2.9232765612327655e-06, "loss": 0.5987, "step": 16229 }, { "epoch": 0.4738547779626872, "grad_norm": 0.741846405622493, "learning_rate": 2.9231143552311435e-06, "loss": 0.6443, "step": 16230 }, { "epoch": 0.4738839741905346, "grad_norm": 0.7400911688330045, "learning_rate": 2.9229521492295215e-06, "loss": 0.5856, "step": 16231 }, { "epoch": 0.47391317041838193, "grad_norm": 0.7534885908487059, "learning_rate": 2.9227899432278995e-06, "loss": 0.69, "step": 16232 }, { "epoch": 0.4739423666462293, "grad_norm": 0.7367644449382184, "learning_rate": 2.9226277372262776e-06, "loss": 0.7098, "step": 16233 }, { "epoch": 0.47397156287407666, "grad_norm": 0.781928128925256, "learning_rate": 2.922465531224656e-06, "loss": 0.7274, "step": 16234 }, { "epoch": 0.474000759101924, "grad_norm": 0.6927899762994187, "learning_rate": 2.9223033252230336e-06, "loss": 0.595, "step": 16235 }, { "epoch": 0.4740299553297714, "grad_norm": 0.813583048106455, "learning_rate": 2.9221411192214116e-06, "loss": 0.7465, "step": 16236 }, { "epoch": 0.47405915155761874, "grad_norm": 0.8240687735258607, "learning_rate": 2.9219789132197896e-06, "loss": 0.6376, "step": 16237 }, { "epoch": 0.4740883477854661, "grad_norm": 0.6834168416815002, "learning_rate": 2.9218167072181676e-06, "loss": 0.5995, "step": 16238 }, { "epoch": 0.47411754401331346, "grad_norm": 0.6583855268319782, "learning_rate": 2.921654501216545e-06, "loss": 0.5717, "step": 16239 }, { "epoch": 0.4741467402411608, "grad_norm": 0.7456646086599938, "learning_rate": 2.921492295214923e-06, "loss": 0.6645, "step": 16240 }, { "epoch": 0.4741759364690082, "grad_norm": 0.7143898853952197, "learning_rate": 2.921330089213301e-06, "loss": 0.573, "step": 16241 }, { "epoch": 0.47420513269685555, "grad_norm": 0.8032862294348646, "learning_rate": 2.921167883211679e-06, "loss": 0.7184, "step": 16242 }, { "epoch": 0.4742343289247029, "grad_norm": 0.761786291416409, "learning_rate": 2.921005677210057e-06, "loss": 0.7315, "step": 16243 }, { "epoch": 0.47426352515255027, "grad_norm": 0.7328594355921039, "learning_rate": 2.9208434712084348e-06, "loss": 0.6294, "step": 16244 }, { "epoch": 0.47429272138039763, "grad_norm": 0.7453508240234388, "learning_rate": 2.920681265206813e-06, "loss": 0.6857, "step": 16245 }, { "epoch": 0.474321917608245, "grad_norm": 0.7294648316989263, "learning_rate": 2.920519059205191e-06, "loss": 0.6556, "step": 16246 }, { "epoch": 0.47435111383609235, "grad_norm": 0.8139797893515632, "learning_rate": 2.920356853203569e-06, "loss": 0.7684, "step": 16247 }, { "epoch": 0.4743803100639397, "grad_norm": 0.7231229377446311, "learning_rate": 2.9201946472019464e-06, "loss": 0.668, "step": 16248 }, { "epoch": 0.4744095062917871, "grad_norm": 0.7095632432296447, "learning_rate": 2.9200324412003244e-06, "loss": 0.5468, "step": 16249 }, { "epoch": 0.47443870251963444, "grad_norm": 0.7375000994961669, "learning_rate": 2.9198702351987024e-06, "loss": 0.6628, "step": 16250 }, { "epoch": 0.4744678987474818, "grad_norm": 0.7547286199265953, "learning_rate": 2.9197080291970804e-06, "loss": 0.724, "step": 16251 }, { "epoch": 0.4744970949753292, "grad_norm": 0.7115914036238771, "learning_rate": 2.9195458231954584e-06, "loss": 0.5778, "step": 16252 }, { "epoch": 0.4745262912031766, "grad_norm": 0.725377857429511, "learning_rate": 2.919383617193837e-06, "loss": 0.6591, "step": 16253 }, { "epoch": 0.47455548743102394, "grad_norm": 0.7552633554574728, "learning_rate": 2.9192214111922144e-06, "loss": 0.703, "step": 16254 }, { "epoch": 0.4745846836588713, "grad_norm": 0.7268115335335552, "learning_rate": 2.9190592051905924e-06, "loss": 0.6368, "step": 16255 }, { "epoch": 0.47461387988671866, "grad_norm": 0.7570980479545435, "learning_rate": 2.9188969991889704e-06, "loss": 0.6447, "step": 16256 }, { "epoch": 0.474643076114566, "grad_norm": 0.7075533872790435, "learning_rate": 2.9187347931873484e-06, "loss": 0.6318, "step": 16257 }, { "epoch": 0.4746722723424134, "grad_norm": 0.7507033002012649, "learning_rate": 2.918572587185726e-06, "loss": 0.686, "step": 16258 }, { "epoch": 0.47470146857026074, "grad_norm": 0.6910568990976595, "learning_rate": 2.918410381184104e-06, "loss": 0.6187, "step": 16259 }, { "epoch": 0.4747306647981081, "grad_norm": 0.7291226874666417, "learning_rate": 2.918248175182482e-06, "loss": 0.6617, "step": 16260 }, { "epoch": 0.47475986102595547, "grad_norm": 0.759353333658811, "learning_rate": 2.91808596918086e-06, "loss": 0.7218, "step": 16261 }, { "epoch": 0.4747890572538028, "grad_norm": 0.6784002583853195, "learning_rate": 2.917923763179238e-06, "loss": 0.619, "step": 16262 }, { "epoch": 0.4748182534816502, "grad_norm": 0.741059452761197, "learning_rate": 2.9177615571776156e-06, "loss": 0.679, "step": 16263 }, { "epoch": 0.47484744970949755, "grad_norm": 0.9548469795331116, "learning_rate": 2.9175993511759936e-06, "loss": 0.8449, "step": 16264 }, { "epoch": 0.4748766459373449, "grad_norm": 0.7349016924761135, "learning_rate": 2.9174371451743717e-06, "loss": 0.692, "step": 16265 }, { "epoch": 0.47490584216519227, "grad_norm": 0.705548800226531, "learning_rate": 2.9172749391727497e-06, "loss": 0.6303, "step": 16266 }, { "epoch": 0.47493503839303963, "grad_norm": 0.7745500649511615, "learning_rate": 2.9171127331711272e-06, "loss": 0.7175, "step": 16267 }, { "epoch": 0.474964234620887, "grad_norm": 0.7064811579828132, "learning_rate": 2.9169505271695053e-06, "loss": 0.6624, "step": 16268 }, { "epoch": 0.47499343084873435, "grad_norm": 0.6876562813985191, "learning_rate": 2.9167883211678833e-06, "loss": 0.5464, "step": 16269 }, { "epoch": 0.4750226270765817, "grad_norm": 0.7188708094771473, "learning_rate": 2.9166261151662613e-06, "loss": 0.6545, "step": 16270 }, { "epoch": 0.4750518233044291, "grad_norm": 0.7483907226370298, "learning_rate": 2.9164639091646393e-06, "loss": 0.6605, "step": 16271 }, { "epoch": 0.47508101953227644, "grad_norm": 0.7614153421402768, "learning_rate": 2.9163017031630177e-06, "loss": 0.6767, "step": 16272 }, { "epoch": 0.4751102157601238, "grad_norm": 0.7966328910539614, "learning_rate": 2.9161394971613953e-06, "loss": 0.671, "step": 16273 }, { "epoch": 0.47513941198797116, "grad_norm": 0.7413820870030682, "learning_rate": 2.9159772911597733e-06, "loss": 0.6705, "step": 16274 }, { "epoch": 0.4751686082158185, "grad_norm": 0.7528231393018812, "learning_rate": 2.9158150851581513e-06, "loss": 0.7299, "step": 16275 }, { "epoch": 0.4751978044436659, "grad_norm": 0.7408196239134829, "learning_rate": 2.9156528791565293e-06, "loss": 0.7222, "step": 16276 }, { "epoch": 0.47522700067151324, "grad_norm": 0.7106153627862604, "learning_rate": 2.915490673154907e-06, "loss": 0.5766, "step": 16277 }, { "epoch": 0.4752561968993606, "grad_norm": 0.6851774622785705, "learning_rate": 2.915328467153285e-06, "loss": 0.6129, "step": 16278 }, { "epoch": 0.47528539312720797, "grad_norm": 0.7522745220631584, "learning_rate": 2.915166261151663e-06, "loss": 0.6881, "step": 16279 }, { "epoch": 0.47531458935505533, "grad_norm": 0.7526423194392888, "learning_rate": 2.915004055150041e-06, "loss": 0.6489, "step": 16280 }, { "epoch": 0.4753437855829027, "grad_norm": 0.7313377791114063, "learning_rate": 2.914841849148419e-06, "loss": 0.6218, "step": 16281 }, { "epoch": 0.47537298181075005, "grad_norm": 0.7033183484957639, "learning_rate": 2.9146796431467965e-06, "loss": 0.5745, "step": 16282 }, { "epoch": 0.4754021780385974, "grad_norm": 0.7567349424618183, "learning_rate": 2.9145174371451745e-06, "loss": 0.6721, "step": 16283 }, { "epoch": 0.4754313742664448, "grad_norm": 0.6990308976918298, "learning_rate": 2.9143552311435525e-06, "loss": 0.6256, "step": 16284 }, { "epoch": 0.47546057049429213, "grad_norm": 0.7682489308886101, "learning_rate": 2.9141930251419305e-06, "loss": 0.7765, "step": 16285 }, { "epoch": 0.4754897667221395, "grad_norm": 0.6878450862071083, "learning_rate": 2.914030819140308e-06, "loss": 0.5984, "step": 16286 }, { "epoch": 0.47551896294998686, "grad_norm": 0.6809620772442879, "learning_rate": 2.913868613138686e-06, "loss": 0.5858, "step": 16287 }, { "epoch": 0.4755481591778342, "grad_norm": 0.717886518213662, "learning_rate": 2.913706407137064e-06, "loss": 0.6174, "step": 16288 }, { "epoch": 0.4755773554056816, "grad_norm": 0.7822118291418076, "learning_rate": 2.913544201135442e-06, "loss": 0.7094, "step": 16289 }, { "epoch": 0.47560655163352894, "grad_norm": 0.760158806667505, "learning_rate": 2.9133819951338197e-06, "loss": 0.7446, "step": 16290 }, { "epoch": 0.4756357478613763, "grad_norm": 0.8320472180881148, "learning_rate": 2.9132197891321986e-06, "loss": 0.6648, "step": 16291 }, { "epoch": 0.47566494408922366, "grad_norm": 0.7670578373100524, "learning_rate": 2.913057583130576e-06, "loss": 0.6619, "step": 16292 }, { "epoch": 0.475694140317071, "grad_norm": 0.7827668817211407, "learning_rate": 2.912895377128954e-06, "loss": 0.6404, "step": 16293 }, { "epoch": 0.4757233365449184, "grad_norm": 0.722605808096308, "learning_rate": 2.912733171127332e-06, "loss": 0.6336, "step": 16294 }, { "epoch": 0.47575253277276575, "grad_norm": 0.7585842772099094, "learning_rate": 2.91257096512571e-06, "loss": 0.6452, "step": 16295 }, { "epoch": 0.4757817290006131, "grad_norm": 0.706656503698428, "learning_rate": 2.9124087591240877e-06, "loss": 0.644, "step": 16296 }, { "epoch": 0.47581092522846047, "grad_norm": 0.7449801520083058, "learning_rate": 2.9122465531224658e-06, "loss": 0.6823, "step": 16297 }, { "epoch": 0.47584012145630783, "grad_norm": 0.7516427759477426, "learning_rate": 2.9120843471208438e-06, "loss": 0.659, "step": 16298 }, { "epoch": 0.4758693176841552, "grad_norm": 0.7725414184022213, "learning_rate": 2.9119221411192218e-06, "loss": 0.711, "step": 16299 }, { "epoch": 0.47589851391200255, "grad_norm": 0.734477106614541, "learning_rate": 2.9117599351175998e-06, "loss": 0.6714, "step": 16300 }, { "epoch": 0.4759277101398499, "grad_norm": 0.7068532964679836, "learning_rate": 2.9115977291159774e-06, "loss": 0.601, "step": 16301 }, { "epoch": 0.4759569063676973, "grad_norm": 0.6745311485783669, "learning_rate": 2.9114355231143554e-06, "loss": 0.5724, "step": 16302 }, { "epoch": 0.47598610259554464, "grad_norm": 0.7359204742815412, "learning_rate": 2.9112733171127334e-06, "loss": 0.7391, "step": 16303 }, { "epoch": 0.476015298823392, "grad_norm": 0.7030754678190986, "learning_rate": 2.9111111111111114e-06, "loss": 0.6462, "step": 16304 }, { "epoch": 0.47604449505123936, "grad_norm": 0.7098610083441831, "learning_rate": 2.910948905109489e-06, "loss": 0.6398, "step": 16305 }, { "epoch": 0.4760736912790867, "grad_norm": 0.7221166206029268, "learning_rate": 2.910786699107867e-06, "loss": 0.6649, "step": 16306 }, { "epoch": 0.4761028875069341, "grad_norm": 0.7673587863126401, "learning_rate": 2.910624493106245e-06, "loss": 0.7197, "step": 16307 }, { "epoch": 0.47613208373478144, "grad_norm": 0.7209690134453273, "learning_rate": 2.910462287104623e-06, "loss": 0.6152, "step": 16308 }, { "epoch": 0.4761612799626288, "grad_norm": 0.731789672850652, "learning_rate": 2.9103000811030006e-06, "loss": 0.6436, "step": 16309 }, { "epoch": 0.47619047619047616, "grad_norm": 0.7087814672853753, "learning_rate": 2.9101378751013794e-06, "loss": 0.6402, "step": 16310 }, { "epoch": 0.4762196724183235, "grad_norm": 0.7234931707609297, "learning_rate": 2.909975669099757e-06, "loss": 0.6644, "step": 16311 }, { "epoch": 0.47624886864617094, "grad_norm": 0.7898671682670304, "learning_rate": 2.909813463098135e-06, "loss": 0.6363, "step": 16312 }, { "epoch": 0.4762780648740183, "grad_norm": 0.7824645309600728, "learning_rate": 2.909651257096513e-06, "loss": 0.7309, "step": 16313 }, { "epoch": 0.47630726110186566, "grad_norm": 0.7173101918480609, "learning_rate": 2.909489051094891e-06, "loss": 0.6761, "step": 16314 }, { "epoch": 0.476336457329713, "grad_norm": 0.7216381913695954, "learning_rate": 2.9093268450932686e-06, "loss": 0.6522, "step": 16315 }, { "epoch": 0.4763656535575604, "grad_norm": 0.7344545210992006, "learning_rate": 2.9091646390916466e-06, "loss": 0.658, "step": 16316 }, { "epoch": 0.47639484978540775, "grad_norm": 0.7192262076714016, "learning_rate": 2.9090024330900246e-06, "loss": 0.6251, "step": 16317 }, { "epoch": 0.4764240460132551, "grad_norm": 0.721093551082241, "learning_rate": 2.9088402270884026e-06, "loss": 0.5797, "step": 16318 }, { "epoch": 0.47645324224110247, "grad_norm": 0.7438180548003217, "learning_rate": 2.9086780210867806e-06, "loss": 0.6432, "step": 16319 }, { "epoch": 0.47648243846894983, "grad_norm": 0.8161346154651306, "learning_rate": 2.9085158150851582e-06, "loss": 0.775, "step": 16320 }, { "epoch": 0.4765116346967972, "grad_norm": 0.7123471473794267, "learning_rate": 2.9083536090835362e-06, "loss": 0.6748, "step": 16321 }, { "epoch": 0.47654083092464455, "grad_norm": 0.6863085656519788, "learning_rate": 2.9081914030819142e-06, "loss": 0.5905, "step": 16322 }, { "epoch": 0.4765700271524919, "grad_norm": 0.8853698833246499, "learning_rate": 2.9080291970802922e-06, "loss": 0.741, "step": 16323 }, { "epoch": 0.4765992233803393, "grad_norm": 0.6860601797788768, "learning_rate": 2.90786699107867e-06, "loss": 0.6139, "step": 16324 }, { "epoch": 0.47662841960818664, "grad_norm": 0.8497887070412378, "learning_rate": 2.907704785077048e-06, "loss": 0.7287, "step": 16325 }, { "epoch": 0.476657615836034, "grad_norm": 0.6782403224882203, "learning_rate": 2.907542579075426e-06, "loss": 0.5783, "step": 16326 }, { "epoch": 0.47668681206388136, "grad_norm": 0.724856330783381, "learning_rate": 2.907380373073804e-06, "loss": 0.6563, "step": 16327 }, { "epoch": 0.4767160082917287, "grad_norm": 0.7938200113267264, "learning_rate": 2.9072181670721823e-06, "loss": 0.7029, "step": 16328 }, { "epoch": 0.4767452045195761, "grad_norm": 0.7549560494594548, "learning_rate": 2.9070559610705603e-06, "loss": 0.7086, "step": 16329 }, { "epoch": 0.47677440074742344, "grad_norm": 0.6973110592009507, "learning_rate": 2.906893755068938e-06, "loss": 0.5619, "step": 16330 }, { "epoch": 0.4768035969752708, "grad_norm": 0.7075219575119829, "learning_rate": 2.906731549067316e-06, "loss": 0.5929, "step": 16331 }, { "epoch": 0.47683279320311817, "grad_norm": 0.9559664146810033, "learning_rate": 2.906569343065694e-06, "loss": 0.6758, "step": 16332 }, { "epoch": 0.4768619894309655, "grad_norm": 0.7468908018813613, "learning_rate": 2.906407137064072e-06, "loss": 0.6879, "step": 16333 }, { "epoch": 0.4768911856588129, "grad_norm": 0.8305924603196354, "learning_rate": 2.9062449310624495e-06, "loss": 0.6904, "step": 16334 }, { "epoch": 0.47692038188666025, "grad_norm": 0.7226611976958427, "learning_rate": 2.9060827250608275e-06, "loss": 0.6757, "step": 16335 }, { "epoch": 0.4769495781145076, "grad_norm": 0.7042727510761609, "learning_rate": 2.9059205190592055e-06, "loss": 0.5788, "step": 16336 }, { "epoch": 0.47697877434235497, "grad_norm": 0.7087675892575757, "learning_rate": 2.9057583130575835e-06, "loss": 0.5885, "step": 16337 }, { "epoch": 0.47700797057020233, "grad_norm": 0.7091418097637959, "learning_rate": 2.9055961070559615e-06, "loss": 0.6034, "step": 16338 }, { "epoch": 0.4770371667980497, "grad_norm": 0.7578404756579588, "learning_rate": 2.905433901054339e-06, "loss": 0.6175, "step": 16339 }, { "epoch": 0.47706636302589706, "grad_norm": 0.7614548673268311, "learning_rate": 2.905271695052717e-06, "loss": 0.6302, "step": 16340 }, { "epoch": 0.4770955592537444, "grad_norm": 0.6761467068759206, "learning_rate": 2.905109489051095e-06, "loss": 0.5862, "step": 16341 }, { "epoch": 0.4771247554815918, "grad_norm": 0.6942905074978682, "learning_rate": 2.904947283049473e-06, "loss": 0.5861, "step": 16342 }, { "epoch": 0.47715395170943914, "grad_norm": 0.7433935063118025, "learning_rate": 2.9047850770478507e-06, "loss": 0.6173, "step": 16343 }, { "epoch": 0.4771831479372865, "grad_norm": 0.7657535369253119, "learning_rate": 2.9046228710462287e-06, "loss": 0.7007, "step": 16344 }, { "epoch": 0.47721234416513386, "grad_norm": 0.7086689752418243, "learning_rate": 2.9044606650446067e-06, "loss": 0.6497, "step": 16345 }, { "epoch": 0.4772415403929812, "grad_norm": 0.7982315750031449, "learning_rate": 2.9042984590429847e-06, "loss": 0.6627, "step": 16346 }, { "epoch": 0.4772707366208286, "grad_norm": 0.6992301040230564, "learning_rate": 2.904136253041363e-06, "loss": 0.6181, "step": 16347 }, { "epoch": 0.47729993284867595, "grad_norm": 0.7664373454921872, "learning_rate": 2.903974047039741e-06, "loss": 0.7167, "step": 16348 }, { "epoch": 0.4773291290765233, "grad_norm": 0.7360939242607117, "learning_rate": 2.9038118410381187e-06, "loss": 0.6526, "step": 16349 }, { "epoch": 0.47735832530437067, "grad_norm": 0.7121666921593476, "learning_rate": 2.9036496350364967e-06, "loss": 0.6503, "step": 16350 }, { "epoch": 0.47738752153221803, "grad_norm": 0.7131528252162481, "learning_rate": 2.9034874290348747e-06, "loss": 0.6093, "step": 16351 }, { "epoch": 0.4774167177600654, "grad_norm": 0.7359188665078035, "learning_rate": 2.9033252230332527e-06, "loss": 0.61, "step": 16352 }, { "epoch": 0.47744591398791275, "grad_norm": 0.7222718418113334, "learning_rate": 2.9031630170316303e-06, "loss": 0.6287, "step": 16353 }, { "epoch": 0.4774751102157601, "grad_norm": 0.7154122264761134, "learning_rate": 2.9030008110300083e-06, "loss": 0.6318, "step": 16354 }, { "epoch": 0.4775043064436075, "grad_norm": 0.7203149908310532, "learning_rate": 2.9028386050283863e-06, "loss": 0.6589, "step": 16355 }, { "epoch": 0.47753350267145483, "grad_norm": 0.7270224681517268, "learning_rate": 2.9026763990267643e-06, "loss": 0.6203, "step": 16356 }, { "epoch": 0.4775626988993022, "grad_norm": 0.7082701265809149, "learning_rate": 2.9025141930251424e-06, "loss": 0.6088, "step": 16357 }, { "epoch": 0.47759189512714956, "grad_norm": 0.8024150021765126, "learning_rate": 2.90235198702352e-06, "loss": 0.6835, "step": 16358 }, { "epoch": 0.4776210913549969, "grad_norm": 0.7248710323602235, "learning_rate": 2.902189781021898e-06, "loss": 0.6638, "step": 16359 }, { "epoch": 0.4776502875828443, "grad_norm": 0.6700495600481065, "learning_rate": 2.902027575020276e-06, "loss": 0.6007, "step": 16360 }, { "epoch": 0.47767948381069164, "grad_norm": 0.7659123311270863, "learning_rate": 2.901865369018654e-06, "loss": 0.6531, "step": 16361 }, { "epoch": 0.477708680038539, "grad_norm": 0.7883169993116018, "learning_rate": 2.9017031630170315e-06, "loss": 0.709, "step": 16362 }, { "epoch": 0.47773787626638636, "grad_norm": 0.7270996866409558, "learning_rate": 2.9015409570154095e-06, "loss": 0.6648, "step": 16363 }, { "epoch": 0.4777670724942337, "grad_norm": 0.7352465556946377, "learning_rate": 2.9013787510137876e-06, "loss": 0.6672, "step": 16364 }, { "epoch": 0.4777962687220811, "grad_norm": 0.7053425231289134, "learning_rate": 2.9012165450121656e-06, "loss": 0.5694, "step": 16365 }, { "epoch": 0.47782546494992845, "grad_norm": 0.7328751981462055, "learning_rate": 2.901054339010544e-06, "loss": 0.6557, "step": 16366 }, { "epoch": 0.4778546611777758, "grad_norm": 0.7642409436441994, "learning_rate": 2.900892133008922e-06, "loss": 0.7138, "step": 16367 }, { "epoch": 0.47788385740562317, "grad_norm": 0.8414945035318243, "learning_rate": 2.9007299270072996e-06, "loss": 0.823, "step": 16368 }, { "epoch": 0.47791305363347053, "grad_norm": 0.6610178128088989, "learning_rate": 2.9005677210056776e-06, "loss": 0.5473, "step": 16369 }, { "epoch": 0.4779422498613179, "grad_norm": 0.7607478000320469, "learning_rate": 2.9004055150040556e-06, "loss": 0.7129, "step": 16370 }, { "epoch": 0.47797144608916525, "grad_norm": 0.6809773605278159, "learning_rate": 2.9002433090024336e-06, "loss": 0.6116, "step": 16371 }, { "epoch": 0.47800064231701267, "grad_norm": 0.6801585158594077, "learning_rate": 2.900081103000811e-06, "loss": 0.6099, "step": 16372 }, { "epoch": 0.47802983854486003, "grad_norm": 0.760421385747417, "learning_rate": 2.899918896999189e-06, "loss": 0.7216, "step": 16373 }, { "epoch": 0.4780590347727074, "grad_norm": 0.7420569979762097, "learning_rate": 2.899756690997567e-06, "loss": 0.6742, "step": 16374 }, { "epoch": 0.47808823100055475, "grad_norm": 0.6836410704217136, "learning_rate": 2.899594484995945e-06, "loss": 0.5981, "step": 16375 }, { "epoch": 0.4781174272284021, "grad_norm": 0.7844129995070472, "learning_rate": 2.899432278994323e-06, "loss": 0.6533, "step": 16376 }, { "epoch": 0.4781466234562495, "grad_norm": 0.7546839736360789, "learning_rate": 2.899270072992701e-06, "loss": 0.6833, "step": 16377 }, { "epoch": 0.47817581968409684, "grad_norm": 0.7597560392260488, "learning_rate": 2.899107866991079e-06, "loss": 0.7499, "step": 16378 }, { "epoch": 0.4782050159119442, "grad_norm": 0.7823368454973632, "learning_rate": 2.898945660989457e-06, "loss": 0.758, "step": 16379 }, { "epoch": 0.47823421213979156, "grad_norm": 0.7547890517417836, "learning_rate": 2.898783454987835e-06, "loss": 0.6855, "step": 16380 }, { "epoch": 0.4782634083676389, "grad_norm": 0.6905417435760215, "learning_rate": 2.8986212489862124e-06, "loss": 0.6045, "step": 16381 }, { "epoch": 0.4782926045954863, "grad_norm": 0.7087204918775734, "learning_rate": 2.8984590429845904e-06, "loss": 0.6206, "step": 16382 }, { "epoch": 0.47832180082333364, "grad_norm": 0.7619410695084899, "learning_rate": 2.8982968369829684e-06, "loss": 0.6547, "step": 16383 }, { "epoch": 0.478350997051181, "grad_norm": 0.7205735937354036, "learning_rate": 2.8981346309813464e-06, "loss": 0.5914, "step": 16384 }, { "epoch": 0.47838019327902837, "grad_norm": 0.7251607075086786, "learning_rate": 2.897972424979725e-06, "loss": 0.6229, "step": 16385 }, { "epoch": 0.4784093895068757, "grad_norm": 0.7518794843622377, "learning_rate": 2.897810218978103e-06, "loss": 0.7209, "step": 16386 }, { "epoch": 0.4784385857347231, "grad_norm": 0.7493422052985257, "learning_rate": 2.8976480129764804e-06, "loss": 0.6472, "step": 16387 }, { "epoch": 0.47846778196257045, "grad_norm": 0.7269022856793078, "learning_rate": 2.8974858069748584e-06, "loss": 0.66, "step": 16388 }, { "epoch": 0.4784969781904178, "grad_norm": 0.7280259407892221, "learning_rate": 2.8973236009732365e-06, "loss": 0.6489, "step": 16389 }, { "epoch": 0.47852617441826517, "grad_norm": 0.740168055624035, "learning_rate": 2.8971613949716145e-06, "loss": 0.6031, "step": 16390 }, { "epoch": 0.47855537064611253, "grad_norm": 0.7506085479126772, "learning_rate": 2.896999188969992e-06, "loss": 0.6573, "step": 16391 }, { "epoch": 0.4785845668739599, "grad_norm": 0.6989589505712458, "learning_rate": 2.89683698296837e-06, "loss": 0.6437, "step": 16392 }, { "epoch": 0.47861376310180725, "grad_norm": 0.7014590625648373, "learning_rate": 2.896674776966748e-06, "loss": 0.6416, "step": 16393 }, { "epoch": 0.4786429593296546, "grad_norm": 0.7916429289279392, "learning_rate": 2.896512570965126e-06, "loss": 0.6438, "step": 16394 }, { "epoch": 0.478672155557502, "grad_norm": 0.7716750615760568, "learning_rate": 2.896350364963504e-06, "loss": 0.7084, "step": 16395 }, { "epoch": 0.47870135178534934, "grad_norm": 0.6973949597828044, "learning_rate": 2.8961881589618817e-06, "loss": 0.6443, "step": 16396 }, { "epoch": 0.4787305480131967, "grad_norm": 0.7430530795985414, "learning_rate": 2.8960259529602597e-06, "loss": 0.6592, "step": 16397 }, { "epoch": 0.47875974424104406, "grad_norm": 0.7629361659811605, "learning_rate": 2.8958637469586377e-06, "loss": 0.6796, "step": 16398 }, { "epoch": 0.4787889404688914, "grad_norm": 0.7597571401238699, "learning_rate": 2.8957015409570157e-06, "loss": 0.7341, "step": 16399 }, { "epoch": 0.4788181366967388, "grad_norm": 0.7064707789615673, "learning_rate": 2.8955393349553933e-06, "loss": 0.5731, "step": 16400 }, { "epoch": 0.47884733292458614, "grad_norm": 0.7633546398766607, "learning_rate": 2.8953771289537713e-06, "loss": 0.7367, "step": 16401 }, { "epoch": 0.4788765291524335, "grad_norm": 0.6791989170017009, "learning_rate": 2.8952149229521493e-06, "loss": 0.6329, "step": 16402 }, { "epoch": 0.47890572538028087, "grad_norm": 0.7170247194366911, "learning_rate": 2.8950527169505273e-06, "loss": 0.6669, "step": 16403 }, { "epoch": 0.47893492160812823, "grad_norm": 0.7587407101796705, "learning_rate": 2.8948905109489057e-06, "loss": 0.698, "step": 16404 }, { "epoch": 0.4789641178359756, "grad_norm": 0.7640683153776839, "learning_rate": 2.8947283049472837e-06, "loss": 0.6878, "step": 16405 }, { "epoch": 0.47899331406382295, "grad_norm": 0.7244407174927335, "learning_rate": 2.8945660989456613e-06, "loss": 0.6111, "step": 16406 }, { "epoch": 0.4790225102916703, "grad_norm": 0.6824007842121935, "learning_rate": 2.8944038929440393e-06, "loss": 0.5767, "step": 16407 }, { "epoch": 0.4790517065195177, "grad_norm": 0.7542653852825596, "learning_rate": 2.8942416869424173e-06, "loss": 0.6803, "step": 16408 }, { "epoch": 0.47908090274736503, "grad_norm": 0.7223460180111804, "learning_rate": 2.8940794809407953e-06, "loss": 0.6609, "step": 16409 }, { "epoch": 0.4791100989752124, "grad_norm": 0.7752244576950619, "learning_rate": 2.893917274939173e-06, "loss": 0.6842, "step": 16410 }, { "epoch": 0.47913929520305976, "grad_norm": 0.7874080302061581, "learning_rate": 2.893755068937551e-06, "loss": 0.6696, "step": 16411 }, { "epoch": 0.4791684914309071, "grad_norm": 0.72622147840693, "learning_rate": 2.893592862935929e-06, "loss": 0.6886, "step": 16412 }, { "epoch": 0.4791976876587545, "grad_norm": 0.7449937406962615, "learning_rate": 2.893430656934307e-06, "loss": 0.6337, "step": 16413 }, { "epoch": 0.47922688388660184, "grad_norm": 0.6734911124062795, "learning_rate": 2.8932684509326845e-06, "loss": 0.5282, "step": 16414 }, { "epoch": 0.4792560801144492, "grad_norm": 0.6917177686092343, "learning_rate": 2.8931062449310625e-06, "loss": 0.5844, "step": 16415 }, { "epoch": 0.47928527634229656, "grad_norm": 0.7180437687491179, "learning_rate": 2.8929440389294405e-06, "loss": 0.6933, "step": 16416 }, { "epoch": 0.4793144725701439, "grad_norm": 0.7820483094913938, "learning_rate": 2.8927818329278185e-06, "loss": 0.7448, "step": 16417 }, { "epoch": 0.4793436687979913, "grad_norm": 0.7051438759320385, "learning_rate": 2.8926196269261965e-06, "loss": 0.6089, "step": 16418 }, { "epoch": 0.47937286502583865, "grad_norm": 0.7285827968562449, "learning_rate": 2.892457420924574e-06, "loss": 0.6374, "step": 16419 }, { "epoch": 0.479402061253686, "grad_norm": 0.7782182040446466, "learning_rate": 2.892295214922952e-06, "loss": 0.6854, "step": 16420 }, { "epoch": 0.47943125748153337, "grad_norm": 0.7277396729885861, "learning_rate": 2.89213300892133e-06, "loss": 0.6624, "step": 16421 }, { "epoch": 0.47946045370938073, "grad_norm": 0.8102326336300666, "learning_rate": 2.891970802919708e-06, "loss": 0.6816, "step": 16422 }, { "epoch": 0.4794896499372281, "grad_norm": 0.7317679430402875, "learning_rate": 2.8918085969180866e-06, "loss": 0.6365, "step": 16423 }, { "epoch": 0.47951884616507545, "grad_norm": 0.7580193040743736, "learning_rate": 2.8916463909164646e-06, "loss": 0.6882, "step": 16424 }, { "epoch": 0.4795480423929228, "grad_norm": 0.7622047173347244, "learning_rate": 2.891484184914842e-06, "loss": 0.6739, "step": 16425 }, { "epoch": 0.4795772386207702, "grad_norm": 0.7592776542139362, "learning_rate": 2.89132197891322e-06, "loss": 0.6909, "step": 16426 }, { "epoch": 0.47960643484861754, "grad_norm": 0.6802425701735172, "learning_rate": 2.891159772911598e-06, "loss": 0.5772, "step": 16427 }, { "epoch": 0.4796356310764649, "grad_norm": 0.7173857857733118, "learning_rate": 2.890997566909976e-06, "loss": 0.6076, "step": 16428 }, { "epoch": 0.47966482730431226, "grad_norm": 0.7427145067346922, "learning_rate": 2.8908353609083538e-06, "loss": 0.6664, "step": 16429 }, { "epoch": 0.4796940235321596, "grad_norm": 0.703639961221136, "learning_rate": 2.8906731549067318e-06, "loss": 0.6179, "step": 16430 }, { "epoch": 0.479723219760007, "grad_norm": 0.7322752002388428, "learning_rate": 2.8905109489051098e-06, "loss": 0.6403, "step": 16431 }, { "epoch": 0.47975241598785434, "grad_norm": 0.7628723239950611, "learning_rate": 2.8903487429034878e-06, "loss": 0.6941, "step": 16432 }, { "epoch": 0.47978161221570176, "grad_norm": 0.6841112991017125, "learning_rate": 2.8901865369018654e-06, "loss": 0.5637, "step": 16433 }, { "epoch": 0.4798108084435491, "grad_norm": 0.7014281482199458, "learning_rate": 2.8900243309002434e-06, "loss": 0.5953, "step": 16434 }, { "epoch": 0.4798400046713965, "grad_norm": 0.6934618980694878, "learning_rate": 2.8898621248986214e-06, "loss": 0.6131, "step": 16435 }, { "epoch": 0.47986920089924384, "grad_norm": 0.7127411363644607, "learning_rate": 2.8896999188969994e-06, "loss": 0.6176, "step": 16436 }, { "epoch": 0.4798983971270912, "grad_norm": 0.6890910134072266, "learning_rate": 2.8895377128953774e-06, "loss": 0.631, "step": 16437 }, { "epoch": 0.47992759335493856, "grad_norm": 0.6963561943416763, "learning_rate": 2.889375506893755e-06, "loss": 0.6117, "step": 16438 }, { "epoch": 0.4799567895827859, "grad_norm": 0.74229172687579, "learning_rate": 2.889213300892133e-06, "loss": 0.66, "step": 16439 }, { "epoch": 0.4799859858106333, "grad_norm": 0.7651353381469079, "learning_rate": 2.889051094890511e-06, "loss": 0.5887, "step": 16440 }, { "epoch": 0.48001518203848065, "grad_norm": 0.7230060364891158, "learning_rate": 2.888888888888889e-06, "loss": 0.6413, "step": 16441 }, { "epoch": 0.480044378266328, "grad_norm": 0.73812050034217, "learning_rate": 2.8887266828872674e-06, "loss": 0.6927, "step": 16442 }, { "epoch": 0.48007357449417537, "grad_norm": 0.7396505442655743, "learning_rate": 2.8885644768856454e-06, "loss": 0.6594, "step": 16443 }, { "epoch": 0.48010277072202273, "grad_norm": 0.7110205081923826, "learning_rate": 2.888402270884023e-06, "loss": 0.6553, "step": 16444 }, { "epoch": 0.4801319669498701, "grad_norm": 0.715498843847727, "learning_rate": 2.888240064882401e-06, "loss": 0.6168, "step": 16445 }, { "epoch": 0.48016116317771745, "grad_norm": 0.722934240483403, "learning_rate": 2.888077858880779e-06, "loss": 0.6464, "step": 16446 }, { "epoch": 0.4801903594055648, "grad_norm": 0.7248363447773449, "learning_rate": 2.887915652879157e-06, "loss": 0.6545, "step": 16447 }, { "epoch": 0.4802195556334122, "grad_norm": 0.6844526513382748, "learning_rate": 2.8877534468775346e-06, "loss": 0.6055, "step": 16448 }, { "epoch": 0.48024875186125954, "grad_norm": 0.6948875779047549, "learning_rate": 2.8875912408759126e-06, "loss": 0.6002, "step": 16449 }, { "epoch": 0.4802779480891069, "grad_norm": 0.7140648853678854, "learning_rate": 2.8874290348742906e-06, "loss": 0.6297, "step": 16450 }, { "epoch": 0.48030714431695426, "grad_norm": 0.670010982192381, "learning_rate": 2.8872668288726686e-06, "loss": 0.5808, "step": 16451 }, { "epoch": 0.4803363405448016, "grad_norm": 0.7301310269348733, "learning_rate": 2.8871046228710462e-06, "loss": 0.6536, "step": 16452 }, { "epoch": 0.480365536772649, "grad_norm": 0.7235836046008792, "learning_rate": 2.8869424168694242e-06, "loss": 0.639, "step": 16453 }, { "epoch": 0.48039473300049634, "grad_norm": 0.6850526350754369, "learning_rate": 2.8867802108678022e-06, "loss": 0.5729, "step": 16454 }, { "epoch": 0.4804239292283437, "grad_norm": 0.676685604102176, "learning_rate": 2.8866180048661802e-06, "loss": 0.5881, "step": 16455 }, { "epoch": 0.48045312545619107, "grad_norm": 0.6632263277784336, "learning_rate": 2.8864557988645582e-06, "loss": 0.5476, "step": 16456 }, { "epoch": 0.4804823216840384, "grad_norm": 0.7098428964367667, "learning_rate": 2.886293592862936e-06, "loss": 0.6543, "step": 16457 }, { "epoch": 0.4805115179118858, "grad_norm": 0.7178043993596392, "learning_rate": 2.886131386861314e-06, "loss": 0.5428, "step": 16458 }, { "epoch": 0.48054071413973315, "grad_norm": 0.7163432341205488, "learning_rate": 2.885969180859692e-06, "loss": 0.6336, "step": 16459 }, { "epoch": 0.4805699103675805, "grad_norm": 0.7593841340576469, "learning_rate": 2.88580697485807e-06, "loss": 0.5917, "step": 16460 }, { "epoch": 0.48059910659542787, "grad_norm": 0.7616551194607301, "learning_rate": 2.8856447688564483e-06, "loss": 0.7684, "step": 16461 }, { "epoch": 0.48062830282327523, "grad_norm": 0.7023401713421331, "learning_rate": 2.8854825628548263e-06, "loss": 0.6174, "step": 16462 }, { "epoch": 0.4806574990511226, "grad_norm": 0.7556982416887615, "learning_rate": 2.885320356853204e-06, "loss": 0.6775, "step": 16463 }, { "epoch": 0.48068669527896996, "grad_norm": 0.8032402782248729, "learning_rate": 2.885158150851582e-06, "loss": 0.7324, "step": 16464 }, { "epoch": 0.4807158915068173, "grad_norm": 0.7025297535952036, "learning_rate": 2.88499594484996e-06, "loss": 0.6187, "step": 16465 }, { "epoch": 0.4807450877346647, "grad_norm": 0.7670490180056801, "learning_rate": 2.884833738848338e-06, "loss": 0.715, "step": 16466 }, { "epoch": 0.48077428396251204, "grad_norm": 0.7521510012761796, "learning_rate": 2.8846715328467155e-06, "loss": 0.7098, "step": 16467 }, { "epoch": 0.4808034801903594, "grad_norm": 0.7305217879892744, "learning_rate": 2.8845093268450935e-06, "loss": 0.6357, "step": 16468 }, { "epoch": 0.48083267641820676, "grad_norm": 0.7666343395125949, "learning_rate": 2.8843471208434715e-06, "loss": 0.6873, "step": 16469 }, { "epoch": 0.4808618726460541, "grad_norm": 0.7443424241374831, "learning_rate": 2.8841849148418495e-06, "loss": 0.6955, "step": 16470 }, { "epoch": 0.4808910688739015, "grad_norm": 0.673904688938035, "learning_rate": 2.884022708840227e-06, "loss": 0.5655, "step": 16471 }, { "epoch": 0.48092026510174885, "grad_norm": 0.7185424983163775, "learning_rate": 2.883860502838605e-06, "loss": 0.6486, "step": 16472 }, { "epoch": 0.4809494613295962, "grad_norm": 0.905904670703869, "learning_rate": 2.883698296836983e-06, "loss": 0.7191, "step": 16473 }, { "epoch": 0.48097865755744357, "grad_norm": 0.7022163011512472, "learning_rate": 2.883536090835361e-06, "loss": 0.5944, "step": 16474 }, { "epoch": 0.48100785378529093, "grad_norm": 0.6981956380203658, "learning_rate": 2.883373884833739e-06, "loss": 0.5881, "step": 16475 }, { "epoch": 0.4810370500131383, "grad_norm": 0.7247243548904537, "learning_rate": 2.8832116788321167e-06, "loss": 0.6246, "step": 16476 }, { "epoch": 0.48106624624098565, "grad_norm": 0.6871016089188661, "learning_rate": 2.8830494728304947e-06, "loss": 0.6161, "step": 16477 }, { "epoch": 0.481095442468833, "grad_norm": 0.7518966671148668, "learning_rate": 2.8828872668288727e-06, "loss": 0.6916, "step": 16478 }, { "epoch": 0.4811246386966804, "grad_norm": 0.7165392139245672, "learning_rate": 2.882725060827251e-06, "loss": 0.6448, "step": 16479 }, { "epoch": 0.48115383492452773, "grad_norm": 0.7793276711199694, "learning_rate": 2.882562854825629e-06, "loss": 0.703, "step": 16480 }, { "epoch": 0.4811830311523751, "grad_norm": 0.7390025324452935, "learning_rate": 2.882400648824007e-06, "loss": 0.6529, "step": 16481 }, { "epoch": 0.48121222738022246, "grad_norm": 0.7065093150178471, "learning_rate": 2.8822384428223847e-06, "loss": 0.6248, "step": 16482 }, { "epoch": 0.4812414236080698, "grad_norm": 0.7382802574182945, "learning_rate": 2.8820762368207627e-06, "loss": 0.6357, "step": 16483 }, { "epoch": 0.4812706198359172, "grad_norm": 0.706165485975477, "learning_rate": 2.8819140308191407e-06, "loss": 0.6937, "step": 16484 }, { "epoch": 0.48129981606376454, "grad_norm": 0.7083717247218975, "learning_rate": 2.8817518248175188e-06, "loss": 0.6926, "step": 16485 }, { "epoch": 0.4813290122916119, "grad_norm": 0.766462322884764, "learning_rate": 2.8815896188158963e-06, "loss": 0.6667, "step": 16486 }, { "epoch": 0.48135820851945926, "grad_norm": 0.7684324951420387, "learning_rate": 2.8814274128142743e-06, "loss": 0.6736, "step": 16487 }, { "epoch": 0.4813874047473066, "grad_norm": 0.7491224547014517, "learning_rate": 2.8812652068126523e-06, "loss": 0.6696, "step": 16488 }, { "epoch": 0.481416600975154, "grad_norm": 0.7710399711609389, "learning_rate": 2.8811030008110304e-06, "loss": 0.7054, "step": 16489 }, { "epoch": 0.48144579720300135, "grad_norm": 0.7007359988953857, "learning_rate": 2.880940794809408e-06, "loss": 0.6593, "step": 16490 }, { "epoch": 0.4814749934308487, "grad_norm": 0.75177693221418, "learning_rate": 2.880778588807786e-06, "loss": 0.6309, "step": 16491 }, { "epoch": 0.48150418965869607, "grad_norm": 0.691656739734612, "learning_rate": 2.880616382806164e-06, "loss": 0.5852, "step": 16492 }, { "epoch": 0.4815333858865435, "grad_norm": 0.7328139580178609, "learning_rate": 2.880454176804542e-06, "loss": 0.6207, "step": 16493 }, { "epoch": 0.48156258211439085, "grad_norm": 0.6870089198993773, "learning_rate": 2.88029197080292e-06, "loss": 0.5764, "step": 16494 }, { "epoch": 0.4815917783422382, "grad_norm": 0.6877832871258643, "learning_rate": 2.8801297648012975e-06, "loss": 0.5931, "step": 16495 }, { "epoch": 0.48162097457008557, "grad_norm": 0.728911189203067, "learning_rate": 2.8799675587996756e-06, "loss": 0.6959, "step": 16496 }, { "epoch": 0.48165017079793293, "grad_norm": 0.763642184160678, "learning_rate": 2.8798053527980536e-06, "loss": 0.6975, "step": 16497 }, { "epoch": 0.4816793670257803, "grad_norm": 0.7431949784945854, "learning_rate": 2.879643146796432e-06, "loss": 0.624, "step": 16498 }, { "epoch": 0.48170856325362765, "grad_norm": 0.7143907846538871, "learning_rate": 2.87948094079481e-06, "loss": 0.6382, "step": 16499 }, { "epoch": 0.481737759481475, "grad_norm": 0.7540910179746547, "learning_rate": 2.879318734793188e-06, "loss": 0.6994, "step": 16500 }, { "epoch": 0.4817669557093224, "grad_norm": 0.7695313554635902, "learning_rate": 2.8791565287915656e-06, "loss": 0.744, "step": 16501 }, { "epoch": 0.48179615193716974, "grad_norm": 0.6703339986363006, "learning_rate": 2.8789943227899436e-06, "loss": 0.5737, "step": 16502 }, { "epoch": 0.4818253481650171, "grad_norm": 0.6687208788982539, "learning_rate": 2.8788321167883216e-06, "loss": 0.5562, "step": 16503 }, { "epoch": 0.48185454439286446, "grad_norm": 0.7519436177121857, "learning_rate": 2.8786699107866996e-06, "loss": 0.5717, "step": 16504 }, { "epoch": 0.4818837406207118, "grad_norm": 0.7282531005546465, "learning_rate": 2.878507704785077e-06, "loss": 0.6644, "step": 16505 }, { "epoch": 0.4819129368485592, "grad_norm": 0.712218559330846, "learning_rate": 2.878345498783455e-06, "loss": 0.5688, "step": 16506 }, { "epoch": 0.48194213307640654, "grad_norm": 0.7130687032089855, "learning_rate": 2.878183292781833e-06, "loss": 0.6316, "step": 16507 }, { "epoch": 0.4819713293042539, "grad_norm": 0.7553231499942359, "learning_rate": 2.8780210867802112e-06, "loss": 0.6628, "step": 16508 }, { "epoch": 0.48200052553210126, "grad_norm": 0.7395770544735005, "learning_rate": 2.877858880778589e-06, "loss": 0.6623, "step": 16509 }, { "epoch": 0.4820297217599486, "grad_norm": 0.702003768859665, "learning_rate": 2.877696674776967e-06, "loss": 0.6072, "step": 16510 }, { "epoch": 0.482058917987796, "grad_norm": 0.7326455806619766, "learning_rate": 2.877534468775345e-06, "loss": 0.5888, "step": 16511 }, { "epoch": 0.48208811421564335, "grad_norm": 0.7482185327388865, "learning_rate": 2.877372262773723e-06, "loss": 0.6836, "step": 16512 }, { "epoch": 0.4821173104434907, "grad_norm": 0.7262806412621222, "learning_rate": 2.877210056772101e-06, "loss": 0.6779, "step": 16513 }, { "epoch": 0.48214650667133807, "grad_norm": 0.7733802613402919, "learning_rate": 2.8770478507704784e-06, "loss": 0.6468, "step": 16514 }, { "epoch": 0.48217570289918543, "grad_norm": 0.6801660763310429, "learning_rate": 2.8768856447688564e-06, "loss": 0.6288, "step": 16515 }, { "epoch": 0.4822048991270328, "grad_norm": 0.657238831270976, "learning_rate": 2.8767234387672344e-06, "loss": 0.5609, "step": 16516 }, { "epoch": 0.48223409535488015, "grad_norm": 0.7169958276782094, "learning_rate": 2.876561232765613e-06, "loss": 0.5894, "step": 16517 }, { "epoch": 0.4822632915827275, "grad_norm": 0.7115546956119304, "learning_rate": 2.876399026763991e-06, "loss": 0.6142, "step": 16518 }, { "epoch": 0.4822924878105749, "grad_norm": 0.6629513674465639, "learning_rate": 2.8762368207623684e-06, "loss": 0.5894, "step": 16519 }, { "epoch": 0.48232168403842224, "grad_norm": 0.6674630198393301, "learning_rate": 2.8760746147607464e-06, "loss": 0.5319, "step": 16520 }, { "epoch": 0.4823508802662696, "grad_norm": 0.6905034532019042, "learning_rate": 2.8759124087591245e-06, "loss": 0.5835, "step": 16521 }, { "epoch": 0.48238007649411696, "grad_norm": 0.6637944004362561, "learning_rate": 2.8757502027575025e-06, "loss": 0.5452, "step": 16522 }, { "epoch": 0.4824092727219643, "grad_norm": 0.7335472417917525, "learning_rate": 2.8755879967558805e-06, "loss": 0.6952, "step": 16523 }, { "epoch": 0.4824384689498117, "grad_norm": 0.7405825251224027, "learning_rate": 2.875425790754258e-06, "loss": 0.6729, "step": 16524 }, { "epoch": 0.48246766517765904, "grad_norm": 0.7451049506792241, "learning_rate": 2.875263584752636e-06, "loss": 0.6395, "step": 16525 }, { "epoch": 0.4824968614055064, "grad_norm": 0.7250725856382576, "learning_rate": 2.875101378751014e-06, "loss": 0.6333, "step": 16526 }, { "epoch": 0.48252605763335377, "grad_norm": 0.6949260889692148, "learning_rate": 2.874939172749392e-06, "loss": 0.5459, "step": 16527 }, { "epoch": 0.4825552538612011, "grad_norm": 0.8605657044630407, "learning_rate": 2.8747769667477697e-06, "loss": 0.7041, "step": 16528 }, { "epoch": 0.4825844500890485, "grad_norm": 0.6932542293656655, "learning_rate": 2.8746147607461477e-06, "loss": 0.6029, "step": 16529 }, { "epoch": 0.48261364631689585, "grad_norm": 0.6878759864536484, "learning_rate": 2.8744525547445257e-06, "loss": 0.5727, "step": 16530 }, { "epoch": 0.4826428425447432, "grad_norm": 0.7343076147001626, "learning_rate": 2.8742903487429037e-06, "loss": 0.6442, "step": 16531 }, { "epoch": 0.4826720387725906, "grad_norm": 0.7400090341191328, "learning_rate": 2.8741281427412817e-06, "loss": 0.6886, "step": 16532 }, { "epoch": 0.48270123500043793, "grad_norm": 0.7585655256698387, "learning_rate": 2.8739659367396593e-06, "loss": 0.7162, "step": 16533 }, { "epoch": 0.4827304312282853, "grad_norm": 0.7539709884860539, "learning_rate": 2.8738037307380373e-06, "loss": 0.6568, "step": 16534 }, { "epoch": 0.48275962745613266, "grad_norm": 0.6799214908833395, "learning_rate": 2.8736415247364153e-06, "loss": 0.5967, "step": 16535 }, { "epoch": 0.48278882368398, "grad_norm": 0.7186482235164525, "learning_rate": 2.8734793187347937e-06, "loss": 0.581, "step": 16536 }, { "epoch": 0.4828180199118274, "grad_norm": 0.7502533143054274, "learning_rate": 2.8733171127331717e-06, "loss": 0.7321, "step": 16537 }, { "epoch": 0.48284721613967474, "grad_norm": 0.7105281486125743, "learning_rate": 2.8731549067315493e-06, "loss": 0.6375, "step": 16538 }, { "epoch": 0.4828764123675221, "grad_norm": 0.785673393125336, "learning_rate": 2.8729927007299273e-06, "loss": 0.7419, "step": 16539 }, { "epoch": 0.48290560859536946, "grad_norm": 0.7308409409773795, "learning_rate": 2.8728304947283053e-06, "loss": 0.6548, "step": 16540 }, { "epoch": 0.4829348048232168, "grad_norm": 0.746347741477383, "learning_rate": 2.8726682887266833e-06, "loss": 0.6524, "step": 16541 }, { "epoch": 0.4829640010510642, "grad_norm": 0.736079683194353, "learning_rate": 2.8725060827250613e-06, "loss": 0.6379, "step": 16542 }, { "epoch": 0.48299319727891155, "grad_norm": 0.762297354614522, "learning_rate": 2.872343876723439e-06, "loss": 0.7149, "step": 16543 }, { "epoch": 0.4830223935067589, "grad_norm": 0.7483211625976068, "learning_rate": 2.872181670721817e-06, "loss": 0.695, "step": 16544 }, { "epoch": 0.48305158973460627, "grad_norm": 0.7505638408924404, "learning_rate": 2.872019464720195e-06, "loss": 0.7105, "step": 16545 }, { "epoch": 0.48308078596245363, "grad_norm": 0.7330911272123148, "learning_rate": 2.871857258718573e-06, "loss": 0.6222, "step": 16546 }, { "epoch": 0.483109982190301, "grad_norm": 0.6599328522937569, "learning_rate": 2.8716950527169505e-06, "loss": 0.575, "step": 16547 }, { "epoch": 0.48313917841814835, "grad_norm": 0.7084648340198909, "learning_rate": 2.8715328467153285e-06, "loss": 0.6063, "step": 16548 }, { "epoch": 0.4831683746459957, "grad_norm": 0.7112300376034113, "learning_rate": 2.8713706407137065e-06, "loss": 0.5929, "step": 16549 }, { "epoch": 0.4831975708738431, "grad_norm": 0.7546250329950231, "learning_rate": 2.8712084347120845e-06, "loss": 0.688, "step": 16550 }, { "epoch": 0.48322676710169044, "grad_norm": 0.7840785828757636, "learning_rate": 2.8710462287104625e-06, "loss": 0.7425, "step": 16551 }, { "epoch": 0.4832559633295378, "grad_norm": 0.7166812626821073, "learning_rate": 2.87088402270884e-06, "loss": 0.6242, "step": 16552 }, { "epoch": 0.4832851595573852, "grad_norm": 0.7379593583810306, "learning_rate": 2.870721816707218e-06, "loss": 0.6433, "step": 16553 }, { "epoch": 0.4833143557852326, "grad_norm": 0.7380643614608909, "learning_rate": 2.870559610705596e-06, "loss": 0.7031, "step": 16554 }, { "epoch": 0.48334355201307994, "grad_norm": 0.7173093264836046, "learning_rate": 2.8703974047039746e-06, "loss": 0.6871, "step": 16555 }, { "epoch": 0.4833727482409273, "grad_norm": 0.7101692809943209, "learning_rate": 2.8702351987023526e-06, "loss": 0.6406, "step": 16556 }, { "epoch": 0.48340194446877466, "grad_norm": 0.6649933870342007, "learning_rate": 2.87007299270073e-06, "loss": 0.6177, "step": 16557 }, { "epoch": 0.483431140696622, "grad_norm": 0.7514613795662066, "learning_rate": 2.869910786699108e-06, "loss": 0.6658, "step": 16558 }, { "epoch": 0.4834603369244694, "grad_norm": 0.6657798771413571, "learning_rate": 2.869748580697486e-06, "loss": 0.5342, "step": 16559 }, { "epoch": 0.48348953315231674, "grad_norm": 0.7755171579087908, "learning_rate": 2.869586374695864e-06, "loss": 0.7315, "step": 16560 }, { "epoch": 0.4835187293801641, "grad_norm": 0.7630245575596374, "learning_rate": 2.869424168694242e-06, "loss": 0.6821, "step": 16561 }, { "epoch": 0.48354792560801146, "grad_norm": 0.7894173319655979, "learning_rate": 2.8692619626926198e-06, "loss": 0.6209, "step": 16562 }, { "epoch": 0.4835771218358588, "grad_norm": 0.728454701351082, "learning_rate": 2.8690997566909978e-06, "loss": 0.6565, "step": 16563 }, { "epoch": 0.4836063180637062, "grad_norm": 0.7150657227258791, "learning_rate": 2.8689375506893758e-06, "loss": 0.679, "step": 16564 }, { "epoch": 0.48363551429155355, "grad_norm": 0.7690713223133504, "learning_rate": 2.868775344687754e-06, "loss": 0.6861, "step": 16565 }, { "epoch": 0.4836647105194009, "grad_norm": 0.7606840795291842, "learning_rate": 2.8686131386861314e-06, "loss": 0.6868, "step": 16566 }, { "epoch": 0.48369390674724827, "grad_norm": 0.7701679629877811, "learning_rate": 2.8684509326845094e-06, "loss": 0.6806, "step": 16567 }, { "epoch": 0.48372310297509563, "grad_norm": 0.682632086249573, "learning_rate": 2.8682887266828874e-06, "loss": 0.5601, "step": 16568 }, { "epoch": 0.483752299202943, "grad_norm": 0.794674365674852, "learning_rate": 2.8681265206812654e-06, "loss": 0.7447, "step": 16569 }, { "epoch": 0.48378149543079035, "grad_norm": 0.6934160733759163, "learning_rate": 2.8679643146796434e-06, "loss": 0.6271, "step": 16570 }, { "epoch": 0.4838106916586377, "grad_norm": 0.6951233655043548, "learning_rate": 2.867802108678021e-06, "loss": 0.5738, "step": 16571 }, { "epoch": 0.4838398878864851, "grad_norm": 0.8201048738034539, "learning_rate": 2.867639902676399e-06, "loss": 0.7778, "step": 16572 }, { "epoch": 0.48386908411433244, "grad_norm": 0.7498720708599197, "learning_rate": 2.867477696674777e-06, "loss": 0.678, "step": 16573 }, { "epoch": 0.4838982803421798, "grad_norm": 0.7684008827647792, "learning_rate": 2.8673154906731554e-06, "loss": 0.7257, "step": 16574 }, { "epoch": 0.48392747657002716, "grad_norm": 0.7065689040962091, "learning_rate": 2.8671532846715334e-06, "loss": 0.6044, "step": 16575 }, { "epoch": 0.4839566727978745, "grad_norm": 0.7032910502044812, "learning_rate": 2.866991078669911e-06, "loss": 0.6097, "step": 16576 }, { "epoch": 0.4839858690257219, "grad_norm": 0.7882171138499129, "learning_rate": 2.866828872668289e-06, "loss": 0.6901, "step": 16577 }, { "epoch": 0.48401506525356924, "grad_norm": 0.7292058871284677, "learning_rate": 2.866666666666667e-06, "loss": 0.6619, "step": 16578 }, { "epoch": 0.4840442614814166, "grad_norm": 0.7446979473864315, "learning_rate": 2.866504460665045e-06, "loss": 0.617, "step": 16579 }, { "epoch": 0.48407345770926397, "grad_norm": 0.7516241334125934, "learning_rate": 2.866342254663423e-06, "loss": 0.7322, "step": 16580 }, { "epoch": 0.4841026539371113, "grad_norm": 0.7544598827026102, "learning_rate": 2.8661800486618006e-06, "loss": 0.7515, "step": 16581 }, { "epoch": 0.4841318501649587, "grad_norm": 0.870642018388234, "learning_rate": 2.8660178426601786e-06, "loss": 0.7158, "step": 16582 }, { "epoch": 0.48416104639280605, "grad_norm": 0.7435877838128025, "learning_rate": 2.8658556366585566e-06, "loss": 0.6847, "step": 16583 }, { "epoch": 0.4841902426206534, "grad_norm": 0.7614280062932313, "learning_rate": 2.8656934306569346e-06, "loss": 0.6751, "step": 16584 }, { "epoch": 0.48421943884850077, "grad_norm": 0.7307109549847759, "learning_rate": 2.8655312246553122e-06, "loss": 0.695, "step": 16585 }, { "epoch": 0.48424863507634813, "grad_norm": 0.7653554489565895, "learning_rate": 2.8653690186536902e-06, "loss": 0.7096, "step": 16586 }, { "epoch": 0.4842778313041955, "grad_norm": 0.729377858306874, "learning_rate": 2.8652068126520682e-06, "loss": 0.68, "step": 16587 }, { "epoch": 0.48430702753204286, "grad_norm": 0.6875854528416823, "learning_rate": 2.8650446066504463e-06, "loss": 0.5857, "step": 16588 }, { "epoch": 0.4843362237598902, "grad_norm": 0.788112272477921, "learning_rate": 2.8648824006488243e-06, "loss": 0.7603, "step": 16589 }, { "epoch": 0.4843654199877376, "grad_norm": 0.7320616440980867, "learning_rate": 2.864720194647202e-06, "loss": 0.653, "step": 16590 }, { "epoch": 0.48439461621558494, "grad_norm": 0.6870750259455413, "learning_rate": 2.86455798864558e-06, "loss": 0.6156, "step": 16591 }, { "epoch": 0.4844238124434323, "grad_norm": 0.7122630687543089, "learning_rate": 2.864395782643958e-06, "loss": 0.6398, "step": 16592 }, { "epoch": 0.48445300867127966, "grad_norm": 0.7696032275271002, "learning_rate": 2.8642335766423363e-06, "loss": 0.758, "step": 16593 }, { "epoch": 0.484482204899127, "grad_norm": 0.6691089192665747, "learning_rate": 2.8640713706407143e-06, "loss": 0.5872, "step": 16594 }, { "epoch": 0.4845114011269744, "grad_norm": 0.7794123605711647, "learning_rate": 2.863909164639092e-06, "loss": 0.6715, "step": 16595 }, { "epoch": 0.48454059735482174, "grad_norm": 0.680397753311076, "learning_rate": 2.86374695863747e-06, "loss": 0.5781, "step": 16596 }, { "epoch": 0.4845697935826691, "grad_norm": 0.7076399537963636, "learning_rate": 2.863584752635848e-06, "loss": 0.6098, "step": 16597 }, { "epoch": 0.48459898981051647, "grad_norm": 0.7195474312012622, "learning_rate": 2.863422546634226e-06, "loss": 0.6535, "step": 16598 }, { "epoch": 0.48462818603836383, "grad_norm": 0.7346174600036023, "learning_rate": 2.863260340632604e-06, "loss": 0.6405, "step": 16599 }, { "epoch": 0.4846573822662112, "grad_norm": 0.7338752792432853, "learning_rate": 2.8630981346309815e-06, "loss": 0.678, "step": 16600 }, { "epoch": 0.48468657849405855, "grad_norm": 0.6939996855582984, "learning_rate": 2.8629359286293595e-06, "loss": 0.5928, "step": 16601 }, { "epoch": 0.4847157747219059, "grad_norm": 0.7066183221141991, "learning_rate": 2.8627737226277375e-06, "loss": 0.575, "step": 16602 }, { "epoch": 0.4847449709497533, "grad_norm": 0.7944608533472983, "learning_rate": 2.8626115166261155e-06, "loss": 0.6683, "step": 16603 }, { "epoch": 0.48477416717760063, "grad_norm": 0.6899859903871572, "learning_rate": 2.862449310624493e-06, "loss": 0.5932, "step": 16604 }, { "epoch": 0.484803363405448, "grad_norm": 0.7601039530006268, "learning_rate": 2.862287104622871e-06, "loss": 0.7327, "step": 16605 }, { "epoch": 0.48483255963329536, "grad_norm": 0.7442272143662182, "learning_rate": 2.862124898621249e-06, "loss": 0.6148, "step": 16606 }, { "epoch": 0.4848617558611427, "grad_norm": 0.7127164035287802, "learning_rate": 2.861962692619627e-06, "loss": 0.6048, "step": 16607 }, { "epoch": 0.4848909520889901, "grad_norm": 0.6770621664334975, "learning_rate": 2.861800486618005e-06, "loss": 0.5873, "step": 16608 }, { "epoch": 0.48492014831683744, "grad_norm": 0.7439424609277253, "learning_rate": 2.8616382806163827e-06, "loss": 0.6473, "step": 16609 }, { "epoch": 0.4849493445446848, "grad_norm": 0.7054983683258901, "learning_rate": 2.8614760746147607e-06, "loss": 0.6773, "step": 16610 }, { "epoch": 0.48497854077253216, "grad_norm": 0.7161137313045831, "learning_rate": 2.8613138686131387e-06, "loss": 0.6215, "step": 16611 }, { "epoch": 0.4850077370003795, "grad_norm": 0.7178530070753293, "learning_rate": 2.861151662611517e-06, "loss": 0.6326, "step": 16612 }, { "epoch": 0.48503693322822694, "grad_norm": 0.7023885820442919, "learning_rate": 2.860989456609895e-06, "loss": 0.6044, "step": 16613 }, { "epoch": 0.4850661294560743, "grad_norm": 0.6657478876889512, "learning_rate": 2.8608272506082727e-06, "loss": 0.5428, "step": 16614 }, { "epoch": 0.48509532568392166, "grad_norm": 0.7949396650823172, "learning_rate": 2.8606650446066507e-06, "loss": 0.6313, "step": 16615 }, { "epoch": 0.485124521911769, "grad_norm": 0.6521101196281036, "learning_rate": 2.8605028386050287e-06, "loss": 0.5511, "step": 16616 }, { "epoch": 0.4851537181396164, "grad_norm": 0.8186243017167832, "learning_rate": 2.8603406326034068e-06, "loss": 0.6625, "step": 16617 }, { "epoch": 0.48518291436746375, "grad_norm": 0.6866870542764333, "learning_rate": 2.8601784266017848e-06, "loss": 0.6039, "step": 16618 }, { "epoch": 0.4852121105953111, "grad_norm": 0.7176455618749884, "learning_rate": 2.8600162206001623e-06, "loss": 0.5666, "step": 16619 }, { "epoch": 0.48524130682315847, "grad_norm": 0.718760877296868, "learning_rate": 2.8598540145985404e-06, "loss": 0.6569, "step": 16620 }, { "epoch": 0.48527050305100583, "grad_norm": 0.7962087075464599, "learning_rate": 2.8596918085969184e-06, "loss": 0.6082, "step": 16621 }, { "epoch": 0.4852996992788532, "grad_norm": 0.6784258421936135, "learning_rate": 2.8595296025952964e-06, "loss": 0.5769, "step": 16622 }, { "epoch": 0.48532889550670055, "grad_norm": 0.7098473211030603, "learning_rate": 2.859367396593674e-06, "loss": 0.6327, "step": 16623 }, { "epoch": 0.4853580917345479, "grad_norm": 0.7544580394676879, "learning_rate": 2.859205190592052e-06, "loss": 0.6178, "step": 16624 }, { "epoch": 0.4853872879623953, "grad_norm": 0.8000650614102244, "learning_rate": 2.85904298459043e-06, "loss": 0.7089, "step": 16625 }, { "epoch": 0.48541648419024264, "grad_norm": 0.6820424184879996, "learning_rate": 2.858880778588808e-06, "loss": 0.5755, "step": 16626 }, { "epoch": 0.48544568041809, "grad_norm": 0.754645136556406, "learning_rate": 2.858718572587186e-06, "loss": 0.6563, "step": 16627 }, { "epoch": 0.48547487664593736, "grad_norm": 0.7848035166129491, "learning_rate": 2.8585563665855636e-06, "loss": 0.6791, "step": 16628 }, { "epoch": 0.4855040728737847, "grad_norm": 0.7613608198388655, "learning_rate": 2.8583941605839416e-06, "loss": 0.6994, "step": 16629 }, { "epoch": 0.4855332691016321, "grad_norm": 0.7100106947544362, "learning_rate": 2.85823195458232e-06, "loss": 0.6087, "step": 16630 }, { "epoch": 0.48556246532947944, "grad_norm": 0.6519441167755307, "learning_rate": 2.858069748580698e-06, "loss": 0.5425, "step": 16631 }, { "epoch": 0.4855916615573268, "grad_norm": 0.7865098528859594, "learning_rate": 2.857907542579076e-06, "loss": 0.7158, "step": 16632 }, { "epoch": 0.48562085778517416, "grad_norm": 0.7194602224583687, "learning_rate": 2.8577453365774536e-06, "loss": 0.678, "step": 16633 }, { "epoch": 0.4856500540130215, "grad_norm": 0.6993310188489232, "learning_rate": 2.8575831305758316e-06, "loss": 0.629, "step": 16634 }, { "epoch": 0.4856792502408689, "grad_norm": 0.8251243621194873, "learning_rate": 2.8574209245742096e-06, "loss": 0.7402, "step": 16635 }, { "epoch": 0.48570844646871625, "grad_norm": 0.7836428101393356, "learning_rate": 2.8572587185725876e-06, "loss": 0.7548, "step": 16636 }, { "epoch": 0.4857376426965636, "grad_norm": 0.7243983478466134, "learning_rate": 2.8570965125709656e-06, "loss": 0.6411, "step": 16637 }, { "epoch": 0.48576683892441097, "grad_norm": 0.7085324055459561, "learning_rate": 2.856934306569343e-06, "loss": 0.6209, "step": 16638 }, { "epoch": 0.48579603515225833, "grad_norm": 0.7180897499309763, "learning_rate": 2.856772100567721e-06, "loss": 0.6368, "step": 16639 }, { "epoch": 0.4858252313801057, "grad_norm": 0.7095182468172634, "learning_rate": 2.8566098945660992e-06, "loss": 0.6178, "step": 16640 }, { "epoch": 0.48585442760795305, "grad_norm": 0.7450191313210096, "learning_rate": 2.8564476885644772e-06, "loss": 0.625, "step": 16641 }, { "epoch": 0.4858836238358004, "grad_norm": 0.7581603760423342, "learning_rate": 2.856285482562855e-06, "loss": 0.6852, "step": 16642 }, { "epoch": 0.4859128200636478, "grad_norm": 0.7701407446339799, "learning_rate": 2.856123276561233e-06, "loss": 0.7429, "step": 16643 }, { "epoch": 0.48594201629149514, "grad_norm": 0.7035416017962755, "learning_rate": 2.855961070559611e-06, "loss": 0.5267, "step": 16644 }, { "epoch": 0.4859712125193425, "grad_norm": 0.7736672817161159, "learning_rate": 2.855798864557989e-06, "loss": 0.7101, "step": 16645 }, { "epoch": 0.48600040874718986, "grad_norm": 0.7373017109691634, "learning_rate": 2.855636658556367e-06, "loss": 0.6651, "step": 16646 }, { "epoch": 0.4860296049750372, "grad_norm": 0.7159820310385125, "learning_rate": 2.8554744525547444e-06, "loss": 0.6381, "step": 16647 }, { "epoch": 0.4860588012028846, "grad_norm": 0.7444482520805487, "learning_rate": 2.8553122465531224e-06, "loss": 0.638, "step": 16648 }, { "epoch": 0.48608799743073194, "grad_norm": 0.7420500765565429, "learning_rate": 2.855150040551501e-06, "loss": 0.7039, "step": 16649 }, { "epoch": 0.4861171936585793, "grad_norm": 0.7455808273666232, "learning_rate": 2.854987834549879e-06, "loss": 0.6644, "step": 16650 }, { "epoch": 0.48614638988642667, "grad_norm": 0.7080904067145675, "learning_rate": 2.854825628548257e-06, "loss": 0.6444, "step": 16651 }, { "epoch": 0.486175586114274, "grad_norm": 0.717340570935036, "learning_rate": 2.8546634225466345e-06, "loss": 0.638, "step": 16652 }, { "epoch": 0.4862047823421214, "grad_norm": 0.7443329776599251, "learning_rate": 2.8545012165450125e-06, "loss": 0.5946, "step": 16653 }, { "epoch": 0.48623397856996875, "grad_norm": 0.6960504790755478, "learning_rate": 2.8543390105433905e-06, "loss": 0.6284, "step": 16654 }, { "epoch": 0.4862631747978161, "grad_norm": 0.696391328834102, "learning_rate": 2.8541768045417685e-06, "loss": 0.5729, "step": 16655 }, { "epoch": 0.4862923710256635, "grad_norm": 0.7775021116791129, "learning_rate": 2.8540145985401465e-06, "loss": 0.7626, "step": 16656 }, { "epoch": 0.48632156725351083, "grad_norm": 0.7241125821743033, "learning_rate": 2.853852392538524e-06, "loss": 0.5867, "step": 16657 }, { "epoch": 0.4863507634813582, "grad_norm": 0.6832172203538266, "learning_rate": 2.853690186536902e-06, "loss": 0.5993, "step": 16658 }, { "epoch": 0.48637995970920556, "grad_norm": 0.7426018952294592, "learning_rate": 2.85352798053528e-06, "loss": 0.6474, "step": 16659 }, { "epoch": 0.4864091559370529, "grad_norm": 0.6727090552023424, "learning_rate": 2.853365774533658e-06, "loss": 0.5355, "step": 16660 }, { "epoch": 0.4864383521649003, "grad_norm": 0.70948527903818, "learning_rate": 2.8532035685320357e-06, "loss": 0.5978, "step": 16661 }, { "epoch": 0.48646754839274764, "grad_norm": 0.8501313628799919, "learning_rate": 2.8530413625304137e-06, "loss": 0.7161, "step": 16662 }, { "epoch": 0.486496744620595, "grad_norm": 0.6855037363792257, "learning_rate": 2.8528791565287917e-06, "loss": 0.5496, "step": 16663 }, { "epoch": 0.48652594084844236, "grad_norm": 0.6998020068506945, "learning_rate": 2.8527169505271697e-06, "loss": 0.6117, "step": 16664 }, { "epoch": 0.4865551370762897, "grad_norm": 0.7866071675069046, "learning_rate": 2.8525547445255473e-06, "loss": 0.7125, "step": 16665 }, { "epoch": 0.4865843333041371, "grad_norm": 0.7553474557388399, "learning_rate": 2.8523925385239253e-06, "loss": 0.6546, "step": 16666 }, { "epoch": 0.48661352953198445, "grad_norm": 0.7606304253928788, "learning_rate": 2.8522303325223033e-06, "loss": 0.6979, "step": 16667 }, { "epoch": 0.4866427257598318, "grad_norm": 0.7741539550195176, "learning_rate": 2.8520681265206817e-06, "loss": 0.7093, "step": 16668 }, { "epoch": 0.48667192198767917, "grad_norm": 0.7159631470665928, "learning_rate": 2.8519059205190597e-06, "loss": 0.5858, "step": 16669 }, { "epoch": 0.48670111821552653, "grad_norm": 0.7588409872574227, "learning_rate": 2.8517437145174377e-06, "loss": 0.7328, "step": 16670 }, { "epoch": 0.4867303144433739, "grad_norm": 0.7800995070061367, "learning_rate": 2.8515815085158153e-06, "loss": 0.6953, "step": 16671 }, { "epoch": 0.48675951067122125, "grad_norm": 0.6892995046856784, "learning_rate": 2.8514193025141933e-06, "loss": 0.6022, "step": 16672 }, { "epoch": 0.4867887068990686, "grad_norm": 0.6941266433018161, "learning_rate": 2.8512570965125713e-06, "loss": 0.609, "step": 16673 }, { "epoch": 0.48681790312691603, "grad_norm": 0.7544328250498237, "learning_rate": 2.8510948905109493e-06, "loss": 0.6582, "step": 16674 }, { "epoch": 0.4868470993547634, "grad_norm": 0.6835338300468894, "learning_rate": 2.8509326845093273e-06, "loss": 0.6348, "step": 16675 }, { "epoch": 0.48687629558261075, "grad_norm": 0.7129081979489337, "learning_rate": 2.850770478507705e-06, "loss": 0.6412, "step": 16676 }, { "epoch": 0.4869054918104581, "grad_norm": 0.7194495272728952, "learning_rate": 2.850608272506083e-06, "loss": 0.6394, "step": 16677 }, { "epoch": 0.4869346880383055, "grad_norm": 0.7401336374936445, "learning_rate": 2.850446066504461e-06, "loss": 0.6744, "step": 16678 }, { "epoch": 0.48696388426615284, "grad_norm": 0.7578585386914859, "learning_rate": 2.850283860502839e-06, "loss": 0.7236, "step": 16679 }, { "epoch": 0.4869930804940002, "grad_norm": 0.8274902884662071, "learning_rate": 2.8501216545012165e-06, "loss": 0.6354, "step": 16680 }, { "epoch": 0.48702227672184756, "grad_norm": 0.7365538917948775, "learning_rate": 2.8499594484995945e-06, "loss": 0.668, "step": 16681 }, { "epoch": 0.4870514729496949, "grad_norm": 0.6949820547698847, "learning_rate": 2.8497972424979725e-06, "loss": 0.5779, "step": 16682 }, { "epoch": 0.4870806691775423, "grad_norm": 0.8720169134855662, "learning_rate": 2.8496350364963505e-06, "loss": 0.7052, "step": 16683 }, { "epoch": 0.48710986540538964, "grad_norm": 0.7511294950371415, "learning_rate": 2.849472830494728e-06, "loss": 0.6246, "step": 16684 }, { "epoch": 0.487139061633237, "grad_norm": 0.7723025412030528, "learning_rate": 2.849310624493106e-06, "loss": 0.5931, "step": 16685 }, { "epoch": 0.48716825786108436, "grad_norm": 0.7247074028859033, "learning_rate": 2.849148418491484e-06, "loss": 0.6623, "step": 16686 }, { "epoch": 0.4871974540889317, "grad_norm": 0.786080630512789, "learning_rate": 2.8489862124898626e-06, "loss": 0.7857, "step": 16687 }, { "epoch": 0.4872266503167791, "grad_norm": 0.7245256315514582, "learning_rate": 2.8488240064882406e-06, "loss": 0.5878, "step": 16688 }, { "epoch": 0.48725584654462645, "grad_norm": 0.7800887529236141, "learning_rate": 2.8486618004866186e-06, "loss": 0.682, "step": 16689 }, { "epoch": 0.4872850427724738, "grad_norm": 0.7465684561974288, "learning_rate": 2.848499594484996e-06, "loss": 0.6447, "step": 16690 }, { "epoch": 0.48731423900032117, "grad_norm": 0.7270437206839392, "learning_rate": 2.848337388483374e-06, "loss": 0.6738, "step": 16691 }, { "epoch": 0.48734343522816853, "grad_norm": 0.7165315941668617, "learning_rate": 2.848175182481752e-06, "loss": 0.678, "step": 16692 }, { "epoch": 0.4873726314560159, "grad_norm": 0.7573068785545405, "learning_rate": 2.84801297648013e-06, "loss": 0.6945, "step": 16693 }, { "epoch": 0.48740182768386325, "grad_norm": 0.7087237697024544, "learning_rate": 2.847850770478508e-06, "loss": 0.5684, "step": 16694 }, { "epoch": 0.4874310239117106, "grad_norm": 0.7316011026440288, "learning_rate": 2.8476885644768858e-06, "loss": 0.6355, "step": 16695 }, { "epoch": 0.487460220139558, "grad_norm": 0.644124184361672, "learning_rate": 2.8475263584752638e-06, "loss": 0.5027, "step": 16696 }, { "epoch": 0.48748941636740534, "grad_norm": 0.7650516481164459, "learning_rate": 2.847364152473642e-06, "loss": 0.63, "step": 16697 }, { "epoch": 0.4875186125952527, "grad_norm": 0.7474719239270893, "learning_rate": 2.84720194647202e-06, "loss": 0.7275, "step": 16698 }, { "epoch": 0.48754780882310006, "grad_norm": 0.7549578577845211, "learning_rate": 2.8470397404703974e-06, "loss": 0.704, "step": 16699 }, { "epoch": 0.4875770050509474, "grad_norm": 0.7807134350654006, "learning_rate": 2.8468775344687754e-06, "loss": 0.6344, "step": 16700 }, { "epoch": 0.4876062012787948, "grad_norm": 0.661993574242351, "learning_rate": 2.8467153284671534e-06, "loss": 0.5419, "step": 16701 }, { "epoch": 0.48763539750664214, "grad_norm": 0.7469600108082514, "learning_rate": 2.8465531224655314e-06, "loss": 0.6421, "step": 16702 }, { "epoch": 0.4876645937344895, "grad_norm": 0.7132603882972445, "learning_rate": 2.846390916463909e-06, "loss": 0.6148, "step": 16703 }, { "epoch": 0.48769378996233687, "grad_norm": 0.744480620469513, "learning_rate": 2.846228710462287e-06, "loss": 0.6676, "step": 16704 }, { "epoch": 0.4877229861901842, "grad_norm": 0.746185429221777, "learning_rate": 2.846066504460665e-06, "loss": 0.5978, "step": 16705 }, { "epoch": 0.4877521824180316, "grad_norm": 0.7077142856058737, "learning_rate": 2.8459042984590434e-06, "loss": 0.6301, "step": 16706 }, { "epoch": 0.48778137864587895, "grad_norm": 0.7034084981625914, "learning_rate": 2.8457420924574214e-06, "loss": 0.67, "step": 16707 }, { "epoch": 0.4878105748737263, "grad_norm": 0.6865835701696942, "learning_rate": 2.8455798864557994e-06, "loss": 0.5556, "step": 16708 }, { "epoch": 0.48783977110157367, "grad_norm": 0.6688193583450421, "learning_rate": 2.845417680454177e-06, "loss": 0.552, "step": 16709 }, { "epoch": 0.48786896732942103, "grad_norm": 0.7019046521690142, "learning_rate": 2.845255474452555e-06, "loss": 0.5601, "step": 16710 }, { "epoch": 0.4878981635572684, "grad_norm": 0.699399518650657, "learning_rate": 2.845093268450933e-06, "loss": 0.6551, "step": 16711 }, { "epoch": 0.48792735978511576, "grad_norm": 0.659690005434181, "learning_rate": 2.844931062449311e-06, "loss": 0.5697, "step": 16712 }, { "epoch": 0.4879565560129631, "grad_norm": 1.1033554710035378, "learning_rate": 2.844768856447689e-06, "loss": 0.7003, "step": 16713 }, { "epoch": 0.4879857522408105, "grad_norm": 0.6629794115586015, "learning_rate": 2.8446066504460666e-06, "loss": 0.5401, "step": 16714 }, { "epoch": 0.48801494846865784, "grad_norm": 0.7868030294508577, "learning_rate": 2.8444444444444446e-06, "loss": 0.7866, "step": 16715 }, { "epoch": 0.4880441446965052, "grad_norm": 0.646833127721564, "learning_rate": 2.8442822384428227e-06, "loss": 0.5345, "step": 16716 }, { "epoch": 0.48807334092435256, "grad_norm": 0.6965492835881432, "learning_rate": 2.8441200324412007e-06, "loss": 0.6139, "step": 16717 }, { "epoch": 0.4881025371521999, "grad_norm": 0.700450963114248, "learning_rate": 2.8439578264395782e-06, "loss": 0.6425, "step": 16718 }, { "epoch": 0.4881317333800473, "grad_norm": 0.6939628005386158, "learning_rate": 2.8437956204379562e-06, "loss": 0.5852, "step": 16719 }, { "epoch": 0.48816092960789464, "grad_norm": 0.7276339910631555, "learning_rate": 2.8436334144363343e-06, "loss": 0.6301, "step": 16720 }, { "epoch": 0.488190125835742, "grad_norm": 0.6570313442847733, "learning_rate": 2.8434712084347123e-06, "loss": 0.5723, "step": 16721 }, { "epoch": 0.48821932206358937, "grad_norm": 0.7027269310397415, "learning_rate": 2.84330900243309e-06, "loss": 0.6021, "step": 16722 }, { "epoch": 0.48824851829143673, "grad_norm": 0.7009109458699393, "learning_rate": 2.843146796431468e-06, "loss": 0.6294, "step": 16723 }, { "epoch": 0.4882777145192841, "grad_norm": 0.6552150180955668, "learning_rate": 2.842984590429846e-06, "loss": 0.5441, "step": 16724 }, { "epoch": 0.48830691074713145, "grad_norm": 0.7224470771594157, "learning_rate": 2.8428223844282243e-06, "loss": 0.6865, "step": 16725 }, { "epoch": 0.4883361069749788, "grad_norm": 0.7363917817552021, "learning_rate": 2.8426601784266023e-06, "loss": 0.6506, "step": 16726 }, { "epoch": 0.4883653032028262, "grad_norm": 0.7039451783098055, "learning_rate": 2.8424979724249803e-06, "loss": 0.6538, "step": 16727 }, { "epoch": 0.48839449943067353, "grad_norm": 0.655012309501769, "learning_rate": 2.842335766423358e-06, "loss": 0.4998, "step": 16728 }, { "epoch": 0.4884236956585209, "grad_norm": 0.75196898411499, "learning_rate": 2.842173560421736e-06, "loss": 0.7074, "step": 16729 }, { "epoch": 0.48845289188636826, "grad_norm": 0.7125136884819153, "learning_rate": 2.842011354420114e-06, "loss": 0.6474, "step": 16730 }, { "epoch": 0.4884820881142156, "grad_norm": 0.7648664797134704, "learning_rate": 2.841849148418492e-06, "loss": 0.7698, "step": 16731 }, { "epoch": 0.488511284342063, "grad_norm": 0.7302597494249974, "learning_rate": 2.84168694241687e-06, "loss": 0.6377, "step": 16732 }, { "epoch": 0.48854048056991034, "grad_norm": 0.8185674718223959, "learning_rate": 2.8415247364152475e-06, "loss": 0.6065, "step": 16733 }, { "epoch": 0.48856967679775776, "grad_norm": 0.6885102388853959, "learning_rate": 2.8413625304136255e-06, "loss": 0.639, "step": 16734 }, { "epoch": 0.4885988730256051, "grad_norm": 0.726344423110387, "learning_rate": 2.8412003244120035e-06, "loss": 0.6442, "step": 16735 }, { "epoch": 0.4886280692534525, "grad_norm": 0.7961335459431658, "learning_rate": 2.8410381184103815e-06, "loss": 0.7371, "step": 16736 }, { "epoch": 0.48865726548129984, "grad_norm": 0.8197174948528723, "learning_rate": 2.840875912408759e-06, "loss": 0.8072, "step": 16737 }, { "epoch": 0.4886864617091472, "grad_norm": 0.6985496379591692, "learning_rate": 2.840713706407137e-06, "loss": 0.5769, "step": 16738 }, { "epoch": 0.48871565793699456, "grad_norm": 0.7293575571852466, "learning_rate": 2.840551500405515e-06, "loss": 0.6023, "step": 16739 }, { "epoch": 0.4887448541648419, "grad_norm": 0.7518790609245316, "learning_rate": 2.840389294403893e-06, "loss": 0.7241, "step": 16740 }, { "epoch": 0.4887740503926893, "grad_norm": 0.7233832327558699, "learning_rate": 2.8402270884022707e-06, "loss": 0.6571, "step": 16741 }, { "epoch": 0.48880324662053665, "grad_norm": 0.6903037564494076, "learning_rate": 2.8400648824006487e-06, "loss": 0.5831, "step": 16742 }, { "epoch": 0.488832442848384, "grad_norm": 0.6943585456562101, "learning_rate": 2.8399026763990267e-06, "loss": 0.6113, "step": 16743 }, { "epoch": 0.48886163907623137, "grad_norm": 0.7931681846344166, "learning_rate": 2.839740470397405e-06, "loss": 0.7119, "step": 16744 }, { "epoch": 0.48889083530407873, "grad_norm": 0.7274518010571537, "learning_rate": 2.839578264395783e-06, "loss": 0.6672, "step": 16745 }, { "epoch": 0.4889200315319261, "grad_norm": 0.7253647053668268, "learning_rate": 2.839416058394161e-06, "loss": 0.6371, "step": 16746 }, { "epoch": 0.48894922775977345, "grad_norm": 0.76203701218994, "learning_rate": 2.8392538523925387e-06, "loss": 0.7186, "step": 16747 }, { "epoch": 0.4889784239876208, "grad_norm": 0.7955355170974452, "learning_rate": 2.8390916463909168e-06, "loss": 0.7561, "step": 16748 }, { "epoch": 0.4890076202154682, "grad_norm": 0.7242435017897602, "learning_rate": 2.8389294403892948e-06, "loss": 0.6452, "step": 16749 }, { "epoch": 0.48903681644331554, "grad_norm": 0.7344831173405102, "learning_rate": 2.8387672343876728e-06, "loss": 0.6628, "step": 16750 }, { "epoch": 0.4890660126711629, "grad_norm": 0.7291601176909724, "learning_rate": 2.8386050283860508e-06, "loss": 0.6154, "step": 16751 }, { "epoch": 0.48909520889901026, "grad_norm": 0.7161969385461111, "learning_rate": 2.8384428223844284e-06, "loss": 0.6432, "step": 16752 }, { "epoch": 0.4891244051268576, "grad_norm": 0.7322603844441291, "learning_rate": 2.8382806163828064e-06, "loss": 0.6419, "step": 16753 }, { "epoch": 0.489153601354705, "grad_norm": 0.7452875807460269, "learning_rate": 2.8381184103811844e-06, "loss": 0.6825, "step": 16754 }, { "epoch": 0.48918279758255234, "grad_norm": 0.7580326442974598, "learning_rate": 2.8379562043795624e-06, "loss": 0.6272, "step": 16755 }, { "epoch": 0.4892119938103997, "grad_norm": 0.7356861088947368, "learning_rate": 2.83779399837794e-06, "loss": 0.6583, "step": 16756 }, { "epoch": 0.48924119003824706, "grad_norm": 0.756705288980043, "learning_rate": 2.837631792376318e-06, "loss": 0.7358, "step": 16757 }, { "epoch": 0.4892703862660944, "grad_norm": 0.6687198677232112, "learning_rate": 2.837469586374696e-06, "loss": 0.5205, "step": 16758 }, { "epoch": 0.4892995824939418, "grad_norm": 0.8080523621161861, "learning_rate": 2.837307380373074e-06, "loss": 0.7372, "step": 16759 }, { "epoch": 0.48932877872178915, "grad_norm": 0.7516965094697359, "learning_rate": 2.8371451743714516e-06, "loss": 0.7437, "step": 16760 }, { "epoch": 0.4893579749496365, "grad_norm": 0.7545498795616932, "learning_rate": 2.8369829683698296e-06, "loss": 0.6714, "step": 16761 }, { "epoch": 0.48938717117748387, "grad_norm": 0.7161047683777307, "learning_rate": 2.8368207623682076e-06, "loss": 0.6418, "step": 16762 }, { "epoch": 0.48941636740533123, "grad_norm": 0.7681998065432758, "learning_rate": 2.836658556366586e-06, "loss": 0.7052, "step": 16763 }, { "epoch": 0.4894455636331786, "grad_norm": 0.7060082595168491, "learning_rate": 2.836496350364964e-06, "loss": 0.6459, "step": 16764 }, { "epoch": 0.48947475986102595, "grad_norm": 0.7179439805533233, "learning_rate": 2.836334144363342e-06, "loss": 0.5689, "step": 16765 }, { "epoch": 0.4895039560888733, "grad_norm": 0.7340204204635049, "learning_rate": 2.8361719383617196e-06, "loss": 0.6425, "step": 16766 }, { "epoch": 0.4895331523167207, "grad_norm": 0.657813091831698, "learning_rate": 2.8360097323600976e-06, "loss": 0.5515, "step": 16767 }, { "epoch": 0.48956234854456804, "grad_norm": 0.6881373088758381, "learning_rate": 2.8358475263584756e-06, "loss": 0.6068, "step": 16768 }, { "epoch": 0.4895915447724154, "grad_norm": 0.7300733200938859, "learning_rate": 2.8356853203568536e-06, "loss": 0.6338, "step": 16769 }, { "epoch": 0.48962074100026276, "grad_norm": 0.7007287856324655, "learning_rate": 2.8355231143552316e-06, "loss": 0.6218, "step": 16770 }, { "epoch": 0.4896499372281101, "grad_norm": 0.7844447761828729, "learning_rate": 2.8353609083536092e-06, "loss": 0.6306, "step": 16771 }, { "epoch": 0.4896791334559575, "grad_norm": 0.7488791743839056, "learning_rate": 2.8351987023519872e-06, "loss": 0.6658, "step": 16772 }, { "epoch": 0.48970832968380484, "grad_norm": 0.6872752154393433, "learning_rate": 2.8350364963503652e-06, "loss": 0.6056, "step": 16773 }, { "epoch": 0.4897375259116522, "grad_norm": 0.7075475825013577, "learning_rate": 2.8348742903487432e-06, "loss": 0.6291, "step": 16774 }, { "epoch": 0.48976672213949957, "grad_norm": 0.7277900149182579, "learning_rate": 2.834712084347121e-06, "loss": 0.6243, "step": 16775 }, { "epoch": 0.4897959183673469, "grad_norm": 0.7269577089690105, "learning_rate": 2.834549878345499e-06, "loss": 0.6425, "step": 16776 }, { "epoch": 0.4898251145951943, "grad_norm": 0.7360384045375693, "learning_rate": 2.834387672343877e-06, "loss": 0.7042, "step": 16777 }, { "epoch": 0.48985431082304165, "grad_norm": 0.7877547293719682, "learning_rate": 2.834225466342255e-06, "loss": 0.6394, "step": 16778 }, { "epoch": 0.489883507050889, "grad_norm": 0.7919154301518155, "learning_rate": 2.8340632603406324e-06, "loss": 0.693, "step": 16779 }, { "epoch": 0.4899127032787364, "grad_norm": 0.749138658390515, "learning_rate": 2.8339010543390104e-06, "loss": 0.6667, "step": 16780 }, { "epoch": 0.48994189950658373, "grad_norm": 0.7781613244278942, "learning_rate": 2.833738848337389e-06, "loss": 0.7761, "step": 16781 }, { "epoch": 0.4899710957344311, "grad_norm": 0.6927754236231856, "learning_rate": 2.833576642335767e-06, "loss": 0.5818, "step": 16782 }, { "epoch": 0.49000029196227846, "grad_norm": 0.7300388293122438, "learning_rate": 2.833414436334145e-06, "loss": 0.6548, "step": 16783 }, { "epoch": 0.4900294881901258, "grad_norm": 0.7314737131913118, "learning_rate": 2.833252230332523e-06, "loss": 0.629, "step": 16784 }, { "epoch": 0.4900586844179732, "grad_norm": 0.7152881623164923, "learning_rate": 2.8330900243309005e-06, "loss": 0.5856, "step": 16785 }, { "epoch": 0.49008788064582054, "grad_norm": 0.7393580351220488, "learning_rate": 2.8329278183292785e-06, "loss": 0.685, "step": 16786 }, { "epoch": 0.4901170768736679, "grad_norm": 0.7063421286879358, "learning_rate": 2.8327656123276565e-06, "loss": 0.5908, "step": 16787 }, { "epoch": 0.49014627310151526, "grad_norm": 0.6965551068341361, "learning_rate": 2.8326034063260345e-06, "loss": 0.5951, "step": 16788 }, { "epoch": 0.4901754693293626, "grad_norm": 0.6984690850872299, "learning_rate": 2.832441200324412e-06, "loss": 0.6337, "step": 16789 }, { "epoch": 0.49020466555721, "grad_norm": 0.7210045047558683, "learning_rate": 2.83227899432279e-06, "loss": 0.678, "step": 16790 }, { "epoch": 0.49023386178505735, "grad_norm": 0.763361801821466, "learning_rate": 2.832116788321168e-06, "loss": 0.6696, "step": 16791 }, { "epoch": 0.4902630580129047, "grad_norm": 0.7086542002547567, "learning_rate": 2.831954582319546e-06, "loss": 0.6266, "step": 16792 }, { "epoch": 0.49029225424075207, "grad_norm": 0.7397324191393839, "learning_rate": 2.831792376317924e-06, "loss": 0.7049, "step": 16793 }, { "epoch": 0.4903214504685995, "grad_norm": 0.7082339074443513, "learning_rate": 2.8316301703163017e-06, "loss": 0.6386, "step": 16794 }, { "epoch": 0.49035064669644685, "grad_norm": 0.7609470064230471, "learning_rate": 2.8314679643146797e-06, "loss": 0.7141, "step": 16795 }, { "epoch": 0.4903798429242942, "grad_norm": 0.7388886606647792, "learning_rate": 2.8313057583130577e-06, "loss": 0.6666, "step": 16796 }, { "epoch": 0.49040903915214157, "grad_norm": 0.7252034787604463, "learning_rate": 2.8311435523114357e-06, "loss": 0.6093, "step": 16797 }, { "epoch": 0.49043823537998893, "grad_norm": 0.6841542089695081, "learning_rate": 2.8309813463098133e-06, "loss": 0.5959, "step": 16798 }, { "epoch": 0.4904674316078363, "grad_norm": 0.7222415190459033, "learning_rate": 2.8308191403081913e-06, "loss": 0.6571, "step": 16799 }, { "epoch": 0.49049662783568365, "grad_norm": 0.7436420980065864, "learning_rate": 2.8306569343065697e-06, "loss": 0.6462, "step": 16800 }, { "epoch": 0.490525824063531, "grad_norm": 0.7089149487783718, "learning_rate": 2.8304947283049477e-06, "loss": 0.6475, "step": 16801 }, { "epoch": 0.4905550202913784, "grad_norm": 0.7161008637013179, "learning_rate": 2.8303325223033257e-06, "loss": 0.6313, "step": 16802 }, { "epoch": 0.49058421651922574, "grad_norm": 0.656797231485742, "learning_rate": 2.8301703163017037e-06, "loss": 0.5516, "step": 16803 }, { "epoch": 0.4906134127470731, "grad_norm": 0.788663075539075, "learning_rate": 2.8300081103000813e-06, "loss": 0.6756, "step": 16804 }, { "epoch": 0.49064260897492046, "grad_norm": 0.7796172188514371, "learning_rate": 2.8298459042984593e-06, "loss": 0.6593, "step": 16805 }, { "epoch": 0.4906718052027678, "grad_norm": 0.7082598241797121, "learning_rate": 2.8296836982968373e-06, "loss": 0.6688, "step": 16806 }, { "epoch": 0.4907010014306152, "grad_norm": 0.779504949051036, "learning_rate": 2.8295214922952153e-06, "loss": 0.7186, "step": 16807 }, { "epoch": 0.49073019765846254, "grad_norm": 0.7479580296026864, "learning_rate": 2.829359286293593e-06, "loss": 0.6374, "step": 16808 }, { "epoch": 0.4907593938863099, "grad_norm": 0.649507719614761, "learning_rate": 2.829197080291971e-06, "loss": 0.5348, "step": 16809 }, { "epoch": 0.49078859011415726, "grad_norm": 0.7796377650557094, "learning_rate": 2.829034874290349e-06, "loss": 0.6878, "step": 16810 }, { "epoch": 0.4908177863420046, "grad_norm": 0.7015417125812669, "learning_rate": 2.828872668288727e-06, "loss": 0.5954, "step": 16811 }, { "epoch": 0.490846982569852, "grad_norm": 0.7146285406025367, "learning_rate": 2.828710462287105e-06, "loss": 0.6353, "step": 16812 }, { "epoch": 0.49087617879769935, "grad_norm": 0.6586107499280701, "learning_rate": 2.8285482562854825e-06, "loss": 0.5384, "step": 16813 }, { "epoch": 0.4909053750255467, "grad_norm": 0.7138620683508742, "learning_rate": 2.8283860502838605e-06, "loss": 0.6468, "step": 16814 }, { "epoch": 0.49093457125339407, "grad_norm": 0.7548627212940243, "learning_rate": 2.8282238442822385e-06, "loss": 0.6879, "step": 16815 }, { "epoch": 0.49096376748124143, "grad_norm": 0.6788508391218688, "learning_rate": 2.8280616382806166e-06, "loss": 0.5917, "step": 16816 }, { "epoch": 0.4909929637090888, "grad_norm": 0.8087901005067524, "learning_rate": 2.827899432278994e-06, "loss": 0.6654, "step": 16817 }, { "epoch": 0.49102215993693615, "grad_norm": 0.7829598031192578, "learning_rate": 2.827737226277372e-06, "loss": 0.6621, "step": 16818 }, { "epoch": 0.4910513561647835, "grad_norm": 0.6908373010718087, "learning_rate": 2.8275750202757506e-06, "loss": 0.6017, "step": 16819 }, { "epoch": 0.4910805523926309, "grad_norm": 0.7178977678236922, "learning_rate": 2.8274128142741286e-06, "loss": 0.6482, "step": 16820 }, { "epoch": 0.49110974862047824, "grad_norm": 0.8018469574529027, "learning_rate": 2.8272506082725066e-06, "loss": 0.7031, "step": 16821 }, { "epoch": 0.4911389448483256, "grad_norm": 0.7467022470800826, "learning_rate": 2.8270884022708846e-06, "loss": 0.703, "step": 16822 }, { "epoch": 0.49116814107617296, "grad_norm": 0.7610941615305497, "learning_rate": 2.826926196269262e-06, "loss": 0.6886, "step": 16823 }, { "epoch": 0.4911973373040203, "grad_norm": 0.7776485548788845, "learning_rate": 2.82676399026764e-06, "loss": 0.7139, "step": 16824 }, { "epoch": 0.4912265335318677, "grad_norm": 0.766683983249457, "learning_rate": 2.826601784266018e-06, "loss": 0.6717, "step": 16825 }, { "epoch": 0.49125572975971504, "grad_norm": 0.7749525847059728, "learning_rate": 2.826439578264396e-06, "loss": 0.7028, "step": 16826 }, { "epoch": 0.4912849259875624, "grad_norm": 0.7969390551693526, "learning_rate": 2.8262773722627738e-06, "loss": 0.6907, "step": 16827 }, { "epoch": 0.49131412221540977, "grad_norm": 0.702274029115354, "learning_rate": 2.826115166261152e-06, "loss": 0.6426, "step": 16828 }, { "epoch": 0.4913433184432571, "grad_norm": 0.6951038185927632, "learning_rate": 2.82595296025953e-06, "loss": 0.6355, "step": 16829 }, { "epoch": 0.4913725146711045, "grad_norm": 0.7009423382330161, "learning_rate": 2.825790754257908e-06, "loss": 0.6315, "step": 16830 }, { "epoch": 0.49140171089895185, "grad_norm": 0.7710241616513623, "learning_rate": 2.825628548256286e-06, "loss": 0.6416, "step": 16831 }, { "epoch": 0.4914309071267992, "grad_norm": 0.6910013770268166, "learning_rate": 2.8254663422546634e-06, "loss": 0.6325, "step": 16832 }, { "epoch": 0.49146010335464657, "grad_norm": 0.9037206024699298, "learning_rate": 2.8253041362530414e-06, "loss": 0.6762, "step": 16833 }, { "epoch": 0.49148929958249393, "grad_norm": 0.7480942449930407, "learning_rate": 2.8251419302514194e-06, "loss": 0.6429, "step": 16834 }, { "epoch": 0.4915184958103413, "grad_norm": 0.6985253287937302, "learning_rate": 2.8249797242497974e-06, "loss": 0.5984, "step": 16835 }, { "epoch": 0.49154769203818865, "grad_norm": 0.6870419073313223, "learning_rate": 2.824817518248175e-06, "loss": 0.5788, "step": 16836 }, { "epoch": 0.491576888266036, "grad_norm": 0.7595000509846467, "learning_rate": 2.824655312246553e-06, "loss": 0.6714, "step": 16837 }, { "epoch": 0.4916060844938834, "grad_norm": 0.7249178088610058, "learning_rate": 2.8244931062449314e-06, "loss": 0.6641, "step": 16838 }, { "epoch": 0.49163528072173074, "grad_norm": 0.6785601414017748, "learning_rate": 2.8243309002433094e-06, "loss": 0.5798, "step": 16839 }, { "epoch": 0.4916644769495781, "grad_norm": 0.6756006937951943, "learning_rate": 2.8241686942416874e-06, "loss": 0.5688, "step": 16840 }, { "epoch": 0.49169367317742546, "grad_norm": 0.7006423770553518, "learning_rate": 2.8240064882400655e-06, "loss": 0.6347, "step": 16841 }, { "epoch": 0.4917228694052728, "grad_norm": 0.7849426716988099, "learning_rate": 2.823844282238443e-06, "loss": 0.7451, "step": 16842 }, { "epoch": 0.4917520656331202, "grad_norm": 0.7250970785778367, "learning_rate": 2.823682076236821e-06, "loss": 0.6842, "step": 16843 }, { "epoch": 0.49178126186096754, "grad_norm": 0.6922315707173354, "learning_rate": 2.823519870235199e-06, "loss": 0.6365, "step": 16844 }, { "epoch": 0.4918104580888149, "grad_norm": 0.7326953351831883, "learning_rate": 2.823357664233577e-06, "loss": 0.6469, "step": 16845 }, { "epoch": 0.49183965431666227, "grad_norm": 0.7539734156031367, "learning_rate": 2.8231954582319546e-06, "loss": 0.6639, "step": 16846 }, { "epoch": 0.49186885054450963, "grad_norm": 0.7173202683548856, "learning_rate": 2.8230332522303326e-06, "loss": 0.6493, "step": 16847 }, { "epoch": 0.491898046772357, "grad_norm": 0.7596483238711255, "learning_rate": 2.8228710462287107e-06, "loss": 0.6673, "step": 16848 }, { "epoch": 0.49192724300020435, "grad_norm": 0.7722839067467271, "learning_rate": 2.8227088402270887e-06, "loss": 0.6654, "step": 16849 }, { "epoch": 0.4919564392280517, "grad_norm": 0.6977475805714022, "learning_rate": 2.8225466342254667e-06, "loss": 0.6307, "step": 16850 }, { "epoch": 0.4919856354558991, "grad_norm": 0.7100026331968107, "learning_rate": 2.8223844282238443e-06, "loss": 0.622, "step": 16851 }, { "epoch": 0.49201483168374643, "grad_norm": 0.7299449484464604, "learning_rate": 2.8222222222222223e-06, "loss": 0.5946, "step": 16852 }, { "epoch": 0.4920440279115938, "grad_norm": 0.6893523718867782, "learning_rate": 2.8220600162206003e-06, "loss": 0.6143, "step": 16853 }, { "epoch": 0.49207322413944116, "grad_norm": 0.7670246981049609, "learning_rate": 2.8218978102189783e-06, "loss": 0.7019, "step": 16854 }, { "epoch": 0.4921024203672886, "grad_norm": 0.6658759585712364, "learning_rate": 2.821735604217356e-06, "loss": 0.571, "step": 16855 }, { "epoch": 0.49213161659513593, "grad_norm": 0.7467066839808031, "learning_rate": 2.821573398215734e-06, "loss": 0.6277, "step": 16856 }, { "epoch": 0.4921608128229833, "grad_norm": 0.747694682278104, "learning_rate": 2.8214111922141123e-06, "loss": 0.6513, "step": 16857 }, { "epoch": 0.49219000905083066, "grad_norm": 0.7582685215025093, "learning_rate": 2.8212489862124903e-06, "loss": 0.746, "step": 16858 }, { "epoch": 0.492219205278678, "grad_norm": 0.7263410265561835, "learning_rate": 2.8210867802108683e-06, "loss": 0.657, "step": 16859 }, { "epoch": 0.4922484015065254, "grad_norm": 0.7097131075528352, "learning_rate": 2.8209245742092463e-06, "loss": 0.649, "step": 16860 }, { "epoch": 0.49227759773437274, "grad_norm": 0.7150415802895603, "learning_rate": 2.820762368207624e-06, "loss": 0.6693, "step": 16861 }, { "epoch": 0.4923067939622201, "grad_norm": 0.7012113406840188, "learning_rate": 2.820600162206002e-06, "loss": 0.6533, "step": 16862 }, { "epoch": 0.49233599019006746, "grad_norm": 0.6596651202667517, "learning_rate": 2.82043795620438e-06, "loss": 0.5349, "step": 16863 }, { "epoch": 0.4923651864179148, "grad_norm": 0.7710579559955028, "learning_rate": 2.820275750202758e-06, "loss": 0.7134, "step": 16864 }, { "epoch": 0.4923943826457622, "grad_norm": 0.7533914110890739, "learning_rate": 2.8201135442011355e-06, "loss": 0.6881, "step": 16865 }, { "epoch": 0.49242357887360955, "grad_norm": 0.6969147789569051, "learning_rate": 2.8199513381995135e-06, "loss": 0.656, "step": 16866 }, { "epoch": 0.4924527751014569, "grad_norm": 0.7073373059303365, "learning_rate": 2.8197891321978915e-06, "loss": 0.6482, "step": 16867 }, { "epoch": 0.49248197132930427, "grad_norm": 0.7022043185460468, "learning_rate": 2.8196269261962695e-06, "loss": 0.589, "step": 16868 }, { "epoch": 0.49251116755715163, "grad_norm": 0.6856636296723555, "learning_rate": 2.8194647201946475e-06, "loss": 0.6123, "step": 16869 }, { "epoch": 0.492540363784999, "grad_norm": 0.8107514481350351, "learning_rate": 2.819302514193025e-06, "loss": 0.6119, "step": 16870 }, { "epoch": 0.49256956001284635, "grad_norm": 0.7300541261022001, "learning_rate": 2.819140308191403e-06, "loss": 0.6267, "step": 16871 }, { "epoch": 0.4925987562406937, "grad_norm": 0.7554363432589928, "learning_rate": 2.818978102189781e-06, "loss": 0.6439, "step": 16872 }, { "epoch": 0.4926279524685411, "grad_norm": 0.7697565612008235, "learning_rate": 2.818815896188159e-06, "loss": 0.7245, "step": 16873 }, { "epoch": 0.49265714869638844, "grad_norm": 0.6906343399142991, "learning_rate": 2.8186536901865367e-06, "loss": 0.6325, "step": 16874 }, { "epoch": 0.4926863449242358, "grad_norm": 0.8174683692024982, "learning_rate": 2.8184914841849147e-06, "loss": 0.6843, "step": 16875 }, { "epoch": 0.49271554115208316, "grad_norm": 0.9223991720786819, "learning_rate": 2.818329278183293e-06, "loss": 0.6951, "step": 16876 }, { "epoch": 0.4927447373799305, "grad_norm": 0.705698212041546, "learning_rate": 2.818167072181671e-06, "loss": 0.6489, "step": 16877 }, { "epoch": 0.4927739336077779, "grad_norm": 0.7514708264041904, "learning_rate": 2.818004866180049e-06, "loss": 0.6611, "step": 16878 }, { "epoch": 0.49280312983562524, "grad_norm": 0.7680478146743351, "learning_rate": 2.817842660178427e-06, "loss": 0.7061, "step": 16879 }, { "epoch": 0.4928323260634726, "grad_norm": 0.7193076607502351, "learning_rate": 2.8176804541768048e-06, "loss": 0.6362, "step": 16880 }, { "epoch": 0.49286152229131996, "grad_norm": 0.7078948912891107, "learning_rate": 2.8175182481751828e-06, "loss": 0.6443, "step": 16881 }, { "epoch": 0.4928907185191673, "grad_norm": 0.6667008684439214, "learning_rate": 2.8173560421735608e-06, "loss": 0.5678, "step": 16882 }, { "epoch": 0.4929199147470147, "grad_norm": 0.7882489496492611, "learning_rate": 2.8171938361719388e-06, "loss": 0.7556, "step": 16883 }, { "epoch": 0.49294911097486205, "grad_norm": 0.7165055453291149, "learning_rate": 2.8170316301703164e-06, "loss": 0.6485, "step": 16884 }, { "epoch": 0.4929783072027094, "grad_norm": 0.8017705934451907, "learning_rate": 2.8168694241686944e-06, "loss": 0.6262, "step": 16885 }, { "epoch": 0.49300750343055677, "grad_norm": 0.7799586973181396, "learning_rate": 2.8167072181670724e-06, "loss": 0.7618, "step": 16886 }, { "epoch": 0.49303669965840413, "grad_norm": 0.744244532032464, "learning_rate": 2.8165450121654504e-06, "loss": 0.7247, "step": 16887 }, { "epoch": 0.4930658958862515, "grad_norm": 0.7465108324525033, "learning_rate": 2.8163828061638284e-06, "loss": 0.6368, "step": 16888 }, { "epoch": 0.49309509211409885, "grad_norm": 0.7705616357536031, "learning_rate": 2.816220600162206e-06, "loss": 0.7214, "step": 16889 }, { "epoch": 0.4931242883419462, "grad_norm": 0.6977620012255955, "learning_rate": 2.816058394160584e-06, "loss": 0.6541, "step": 16890 }, { "epoch": 0.4931534845697936, "grad_norm": 0.6501210989522935, "learning_rate": 2.815896188158962e-06, "loss": 0.5425, "step": 16891 }, { "epoch": 0.49318268079764094, "grad_norm": 0.7170988254936379, "learning_rate": 2.81573398215734e-06, "loss": 0.5864, "step": 16892 }, { "epoch": 0.4932118770254883, "grad_norm": 0.7002464289047603, "learning_rate": 2.8155717761557176e-06, "loss": 0.6408, "step": 16893 }, { "epoch": 0.49324107325333566, "grad_norm": 0.6976138920396749, "learning_rate": 2.8154095701540956e-06, "loss": 0.5754, "step": 16894 }, { "epoch": 0.493270269481183, "grad_norm": 0.804823528773105, "learning_rate": 2.815247364152474e-06, "loss": 0.6588, "step": 16895 }, { "epoch": 0.4932994657090304, "grad_norm": 0.7499888869790564, "learning_rate": 2.815085158150852e-06, "loss": 0.7068, "step": 16896 }, { "epoch": 0.49332866193687774, "grad_norm": 0.6538749070081425, "learning_rate": 2.81492295214923e-06, "loss": 0.5368, "step": 16897 }, { "epoch": 0.4933578581647251, "grad_norm": 0.7727798754697225, "learning_rate": 2.814760746147608e-06, "loss": 0.6153, "step": 16898 }, { "epoch": 0.49338705439257247, "grad_norm": 0.6934919958232125, "learning_rate": 2.8145985401459856e-06, "loss": 0.592, "step": 16899 }, { "epoch": 0.4934162506204198, "grad_norm": 0.7682577147240502, "learning_rate": 2.8144363341443636e-06, "loss": 0.7682, "step": 16900 }, { "epoch": 0.4934454468482672, "grad_norm": 0.6990517900954338, "learning_rate": 2.8142741281427416e-06, "loss": 0.5806, "step": 16901 }, { "epoch": 0.49347464307611455, "grad_norm": 0.7325255513788088, "learning_rate": 2.8141119221411196e-06, "loss": 0.6719, "step": 16902 }, { "epoch": 0.4935038393039619, "grad_norm": 0.7558300344807884, "learning_rate": 2.8139497161394972e-06, "loss": 0.6578, "step": 16903 }, { "epoch": 0.49353303553180927, "grad_norm": 0.7453005024160084, "learning_rate": 2.8137875101378752e-06, "loss": 0.5963, "step": 16904 }, { "epoch": 0.49356223175965663, "grad_norm": 0.7508955838319681, "learning_rate": 2.8136253041362532e-06, "loss": 0.6499, "step": 16905 }, { "epoch": 0.493591427987504, "grad_norm": 0.7234430286674377, "learning_rate": 2.8134630981346312e-06, "loss": 0.6291, "step": 16906 }, { "epoch": 0.49362062421535136, "grad_norm": 0.702409248480006, "learning_rate": 2.8133008921330092e-06, "loss": 0.6034, "step": 16907 }, { "epoch": 0.4936498204431987, "grad_norm": 0.71482095688402, "learning_rate": 2.813138686131387e-06, "loss": 0.622, "step": 16908 }, { "epoch": 0.4936790166710461, "grad_norm": 0.7308769970599858, "learning_rate": 2.812976480129765e-06, "loss": 0.6418, "step": 16909 }, { "epoch": 0.49370821289889344, "grad_norm": 0.7349671379690057, "learning_rate": 2.812814274128143e-06, "loss": 0.5872, "step": 16910 }, { "epoch": 0.4937374091267408, "grad_norm": 0.6781516369158072, "learning_rate": 2.812652068126521e-06, "loss": 0.5755, "step": 16911 }, { "epoch": 0.49376660535458816, "grad_norm": 0.7400169441000638, "learning_rate": 2.8124898621248984e-06, "loss": 0.623, "step": 16912 }, { "epoch": 0.4937958015824355, "grad_norm": 0.740141840891498, "learning_rate": 2.8123276561232764e-06, "loss": 0.6717, "step": 16913 }, { "epoch": 0.4938249978102829, "grad_norm": 0.8088552678000847, "learning_rate": 2.812165450121655e-06, "loss": 0.7288, "step": 16914 }, { "epoch": 0.4938541940381303, "grad_norm": 0.7335915007439829, "learning_rate": 2.812003244120033e-06, "loss": 0.6652, "step": 16915 }, { "epoch": 0.49388339026597766, "grad_norm": 0.7264215434935365, "learning_rate": 2.811841038118411e-06, "loss": 0.6771, "step": 16916 }, { "epoch": 0.493912586493825, "grad_norm": 0.7318019323368061, "learning_rate": 2.811678832116789e-06, "loss": 0.6072, "step": 16917 }, { "epoch": 0.4939417827216724, "grad_norm": 0.7600333392404662, "learning_rate": 2.8115166261151665e-06, "loss": 0.6554, "step": 16918 }, { "epoch": 0.49397097894951975, "grad_norm": 0.7543851009749968, "learning_rate": 2.8113544201135445e-06, "loss": 0.6573, "step": 16919 }, { "epoch": 0.4940001751773671, "grad_norm": 0.7932014672967259, "learning_rate": 2.8111922141119225e-06, "loss": 0.7153, "step": 16920 }, { "epoch": 0.49402937140521447, "grad_norm": 0.7297127479134992, "learning_rate": 2.8110300081103005e-06, "loss": 0.6515, "step": 16921 }, { "epoch": 0.49405856763306183, "grad_norm": 0.7584217112292476, "learning_rate": 2.810867802108678e-06, "loss": 0.7131, "step": 16922 }, { "epoch": 0.4940877638609092, "grad_norm": 0.7269317428170835, "learning_rate": 2.810705596107056e-06, "loss": 0.659, "step": 16923 }, { "epoch": 0.49411696008875655, "grad_norm": 0.6732594098629752, "learning_rate": 2.810543390105434e-06, "loss": 0.6103, "step": 16924 }, { "epoch": 0.4941461563166039, "grad_norm": 0.7612397422263659, "learning_rate": 2.810381184103812e-06, "loss": 0.7404, "step": 16925 }, { "epoch": 0.4941753525444513, "grad_norm": 0.6800460165202982, "learning_rate": 2.81021897810219e-06, "loss": 0.6044, "step": 16926 }, { "epoch": 0.49420454877229864, "grad_norm": 0.7754857429792167, "learning_rate": 2.8100567721005677e-06, "loss": 0.6972, "step": 16927 }, { "epoch": 0.494233745000146, "grad_norm": 0.7096412968691763, "learning_rate": 2.8098945660989457e-06, "loss": 0.621, "step": 16928 }, { "epoch": 0.49426294122799336, "grad_norm": 0.6930316756886667, "learning_rate": 2.8097323600973237e-06, "loss": 0.5705, "step": 16929 }, { "epoch": 0.4942921374558407, "grad_norm": 0.6964822214574939, "learning_rate": 2.8095701540957017e-06, "loss": 0.5969, "step": 16930 }, { "epoch": 0.4943213336836881, "grad_norm": 0.7333353958335901, "learning_rate": 2.8094079480940793e-06, "loss": 0.6791, "step": 16931 }, { "epoch": 0.49435052991153544, "grad_norm": 0.7314187529579363, "learning_rate": 2.8092457420924577e-06, "loss": 0.6314, "step": 16932 }, { "epoch": 0.4943797261393828, "grad_norm": 0.738443017376195, "learning_rate": 2.8090835360908357e-06, "loss": 0.5798, "step": 16933 }, { "epoch": 0.49440892236723016, "grad_norm": 0.784298427321419, "learning_rate": 2.8089213300892137e-06, "loss": 0.7092, "step": 16934 }, { "epoch": 0.4944381185950775, "grad_norm": 0.6981976761131284, "learning_rate": 2.8087591240875917e-06, "loss": 0.6317, "step": 16935 }, { "epoch": 0.4944673148229249, "grad_norm": 0.7339877313731302, "learning_rate": 2.8085969180859697e-06, "loss": 0.6322, "step": 16936 }, { "epoch": 0.49449651105077225, "grad_norm": 0.7125482746656103, "learning_rate": 2.8084347120843473e-06, "loss": 0.6077, "step": 16937 }, { "epoch": 0.4945257072786196, "grad_norm": 0.7488954317681822, "learning_rate": 2.8082725060827253e-06, "loss": 0.6371, "step": 16938 }, { "epoch": 0.49455490350646697, "grad_norm": 0.751325148016862, "learning_rate": 2.8081103000811033e-06, "loss": 0.6581, "step": 16939 }, { "epoch": 0.49458409973431433, "grad_norm": 0.7465605634325304, "learning_rate": 2.8079480940794814e-06, "loss": 0.6406, "step": 16940 }, { "epoch": 0.4946132959621617, "grad_norm": 0.7229059020050409, "learning_rate": 2.807785888077859e-06, "loss": 0.607, "step": 16941 }, { "epoch": 0.49464249219000905, "grad_norm": 0.7549004633503401, "learning_rate": 2.807623682076237e-06, "loss": 0.6271, "step": 16942 }, { "epoch": 0.4946716884178564, "grad_norm": 0.8187392913785698, "learning_rate": 2.807461476074615e-06, "loss": 0.5921, "step": 16943 }, { "epoch": 0.4947008846457038, "grad_norm": 0.748341744481813, "learning_rate": 2.807299270072993e-06, "loss": 0.6823, "step": 16944 }, { "epoch": 0.49473008087355114, "grad_norm": 0.7601338475392215, "learning_rate": 2.807137064071371e-06, "loss": 0.6707, "step": 16945 }, { "epoch": 0.4947592771013985, "grad_norm": 0.6836546412486784, "learning_rate": 2.8069748580697485e-06, "loss": 0.6135, "step": 16946 }, { "epoch": 0.49478847332924586, "grad_norm": 0.6963369418745139, "learning_rate": 2.8068126520681266e-06, "loss": 0.5326, "step": 16947 }, { "epoch": 0.4948176695570932, "grad_norm": 0.7071822366933507, "learning_rate": 2.8066504460665046e-06, "loss": 0.6155, "step": 16948 }, { "epoch": 0.4948468657849406, "grad_norm": 0.6840109602471722, "learning_rate": 2.8064882400648826e-06, "loss": 0.6198, "step": 16949 }, { "epoch": 0.49487606201278794, "grad_norm": 0.743776044757551, "learning_rate": 2.80632603406326e-06, "loss": 0.6345, "step": 16950 }, { "epoch": 0.4949052582406353, "grad_norm": 0.771235279466862, "learning_rate": 2.8061638280616386e-06, "loss": 0.7002, "step": 16951 }, { "epoch": 0.49493445446848267, "grad_norm": 0.7400632520565371, "learning_rate": 2.8060016220600166e-06, "loss": 0.6839, "step": 16952 }, { "epoch": 0.49496365069633, "grad_norm": 0.740853479576444, "learning_rate": 2.8058394160583946e-06, "loss": 0.6922, "step": 16953 }, { "epoch": 0.4949928469241774, "grad_norm": 0.7103549577497024, "learning_rate": 2.8056772100567726e-06, "loss": 0.6061, "step": 16954 }, { "epoch": 0.49502204315202475, "grad_norm": 0.6926871603347914, "learning_rate": 2.8055150040551506e-06, "loss": 0.6122, "step": 16955 }, { "epoch": 0.4950512393798721, "grad_norm": 0.673406701488172, "learning_rate": 2.805352798053528e-06, "loss": 0.582, "step": 16956 }, { "epoch": 0.49508043560771947, "grad_norm": 0.6959700696670958, "learning_rate": 2.805190592051906e-06, "loss": 0.6697, "step": 16957 }, { "epoch": 0.49510963183556683, "grad_norm": 0.7675751856861909, "learning_rate": 2.805028386050284e-06, "loss": 0.6778, "step": 16958 }, { "epoch": 0.4951388280634142, "grad_norm": 0.6443633836680525, "learning_rate": 2.804866180048662e-06, "loss": 0.5501, "step": 16959 }, { "epoch": 0.49516802429126155, "grad_norm": 0.7262349813046715, "learning_rate": 2.80470397404704e-06, "loss": 0.6411, "step": 16960 }, { "epoch": 0.4951972205191089, "grad_norm": 0.7682628713597499, "learning_rate": 2.804541768045418e-06, "loss": 0.7058, "step": 16961 }, { "epoch": 0.4952264167469563, "grad_norm": 0.7246455381470369, "learning_rate": 2.804379562043796e-06, "loss": 0.6034, "step": 16962 }, { "epoch": 0.49525561297480364, "grad_norm": 0.7131125548175052, "learning_rate": 2.804217356042174e-06, "loss": 0.6524, "step": 16963 }, { "epoch": 0.495284809202651, "grad_norm": 0.7273937998456206, "learning_rate": 2.804055150040552e-06, "loss": 0.6904, "step": 16964 }, { "epoch": 0.49531400543049836, "grad_norm": 0.7469483409361207, "learning_rate": 2.8038929440389294e-06, "loss": 0.706, "step": 16965 }, { "epoch": 0.4953432016583457, "grad_norm": 0.672767779142827, "learning_rate": 2.8037307380373074e-06, "loss": 0.5468, "step": 16966 }, { "epoch": 0.4953723978861931, "grad_norm": 0.818792940382674, "learning_rate": 2.8035685320356854e-06, "loss": 0.7245, "step": 16967 }, { "epoch": 0.49540159411404044, "grad_norm": 0.7658555513378308, "learning_rate": 2.8034063260340634e-06, "loss": 0.699, "step": 16968 }, { "epoch": 0.4954307903418878, "grad_norm": 0.7913104526514122, "learning_rate": 2.803244120032441e-06, "loss": 0.72, "step": 16969 }, { "epoch": 0.49545998656973517, "grad_norm": 0.7825549363071903, "learning_rate": 2.8030819140308194e-06, "loss": 0.726, "step": 16970 }, { "epoch": 0.49548918279758253, "grad_norm": 0.7787940134459722, "learning_rate": 2.8029197080291974e-06, "loss": 0.7109, "step": 16971 }, { "epoch": 0.4955183790254299, "grad_norm": 0.7085272847748644, "learning_rate": 2.8027575020275755e-06, "loss": 0.5906, "step": 16972 }, { "epoch": 0.49554757525327725, "grad_norm": 0.6563479323679894, "learning_rate": 2.8025952960259535e-06, "loss": 0.5832, "step": 16973 }, { "epoch": 0.4955767714811246, "grad_norm": 0.6823353686368417, "learning_rate": 2.8024330900243315e-06, "loss": 0.5611, "step": 16974 }, { "epoch": 0.49560596770897203, "grad_norm": 0.8218185132386463, "learning_rate": 2.802270884022709e-06, "loss": 0.7823, "step": 16975 }, { "epoch": 0.4956351639368194, "grad_norm": 0.7011416120136942, "learning_rate": 2.802108678021087e-06, "loss": 0.6373, "step": 16976 }, { "epoch": 0.49566436016466675, "grad_norm": 0.7466223396346966, "learning_rate": 2.801946472019465e-06, "loss": 0.6782, "step": 16977 }, { "epoch": 0.4956935563925141, "grad_norm": 0.7486633185854896, "learning_rate": 2.801784266017843e-06, "loss": 0.6836, "step": 16978 }, { "epoch": 0.4957227526203615, "grad_norm": 0.6989849863243563, "learning_rate": 2.8016220600162207e-06, "loss": 0.622, "step": 16979 }, { "epoch": 0.49575194884820883, "grad_norm": 0.7219238429934832, "learning_rate": 2.8014598540145987e-06, "loss": 0.6966, "step": 16980 }, { "epoch": 0.4957811450760562, "grad_norm": 0.7806312220632647, "learning_rate": 2.8012976480129767e-06, "loss": 0.7184, "step": 16981 }, { "epoch": 0.49581034130390356, "grad_norm": 0.7289391382832786, "learning_rate": 2.8011354420113547e-06, "loss": 0.6448, "step": 16982 }, { "epoch": 0.4958395375317509, "grad_norm": 0.7310004959402508, "learning_rate": 2.8009732360097327e-06, "loss": 0.6622, "step": 16983 }, { "epoch": 0.4958687337595983, "grad_norm": 0.7192793590037848, "learning_rate": 2.8008110300081103e-06, "loss": 0.6146, "step": 16984 }, { "epoch": 0.49589792998744564, "grad_norm": 0.7011911134404206, "learning_rate": 2.8006488240064883e-06, "loss": 0.5796, "step": 16985 }, { "epoch": 0.495927126215293, "grad_norm": 0.7368526898537062, "learning_rate": 2.8004866180048663e-06, "loss": 0.6694, "step": 16986 }, { "epoch": 0.49595632244314036, "grad_norm": 0.6932118427855135, "learning_rate": 2.8003244120032443e-06, "loss": 0.6072, "step": 16987 }, { "epoch": 0.4959855186709877, "grad_norm": 0.7236882916178766, "learning_rate": 2.800162206001622e-06, "loss": 0.6453, "step": 16988 }, { "epoch": 0.4960147148988351, "grad_norm": 0.7083161193976579, "learning_rate": 2.8000000000000003e-06, "loss": 0.6661, "step": 16989 }, { "epoch": 0.49604391112668245, "grad_norm": 0.6862602713101206, "learning_rate": 2.7998377939983783e-06, "loss": 0.6016, "step": 16990 }, { "epoch": 0.4960731073545298, "grad_norm": 0.742701218584112, "learning_rate": 2.7996755879967563e-06, "loss": 0.6415, "step": 16991 }, { "epoch": 0.49610230358237717, "grad_norm": 0.6911281418500217, "learning_rate": 2.7995133819951343e-06, "loss": 0.6151, "step": 16992 }, { "epoch": 0.49613149981022453, "grad_norm": 0.7586893725406467, "learning_rate": 2.7993511759935123e-06, "loss": 0.6826, "step": 16993 }, { "epoch": 0.4961606960380719, "grad_norm": 0.7023613115806838, "learning_rate": 2.79918896999189e-06, "loss": 0.5883, "step": 16994 }, { "epoch": 0.49618989226591925, "grad_norm": 0.7055233919868329, "learning_rate": 2.799026763990268e-06, "loss": 0.6352, "step": 16995 }, { "epoch": 0.4962190884937666, "grad_norm": 0.7902472204965915, "learning_rate": 2.798864557988646e-06, "loss": 0.7178, "step": 16996 }, { "epoch": 0.496248284721614, "grad_norm": 0.6937364998271911, "learning_rate": 2.798702351987024e-06, "loss": 0.6113, "step": 16997 }, { "epoch": 0.49627748094946134, "grad_norm": 0.8056690180008338, "learning_rate": 2.7985401459854015e-06, "loss": 0.7408, "step": 16998 }, { "epoch": 0.4963066771773087, "grad_norm": 0.7025950427705282, "learning_rate": 2.7983779399837795e-06, "loss": 0.5707, "step": 16999 }, { "epoch": 0.49633587340515606, "grad_norm": 0.7635216436889823, "learning_rate": 2.7982157339821575e-06, "loss": 0.626, "step": 17000 }, { "epoch": 0.4963650696330034, "grad_norm": 0.7838577253534526, "learning_rate": 2.7980535279805355e-06, "loss": 0.7066, "step": 17001 }, { "epoch": 0.4963942658608508, "grad_norm": 0.685184397582456, "learning_rate": 2.7978913219789135e-06, "loss": 0.5947, "step": 17002 }, { "epoch": 0.49642346208869814, "grad_norm": 0.7161647761901381, "learning_rate": 2.797729115977291e-06, "loss": 0.6186, "step": 17003 }, { "epoch": 0.4964526583165455, "grad_norm": 0.7082218125057012, "learning_rate": 2.797566909975669e-06, "loss": 0.5897, "step": 17004 }, { "epoch": 0.49648185454439286, "grad_norm": 0.6764484829076339, "learning_rate": 2.797404703974047e-06, "loss": 0.5824, "step": 17005 }, { "epoch": 0.4965110507722402, "grad_norm": 0.7111040699424221, "learning_rate": 2.797242497972425e-06, "loss": 0.6699, "step": 17006 }, { "epoch": 0.4965402470000876, "grad_norm": 0.7734618463677183, "learning_rate": 2.7970802919708027e-06, "loss": 0.7898, "step": 17007 }, { "epoch": 0.49656944322793495, "grad_norm": 0.8342520915612782, "learning_rate": 2.796918085969181e-06, "loss": 0.7027, "step": 17008 }, { "epoch": 0.4965986394557823, "grad_norm": 0.7857508942395274, "learning_rate": 2.796755879967559e-06, "loss": 0.7407, "step": 17009 }, { "epoch": 0.49662783568362967, "grad_norm": 0.7504762750409474, "learning_rate": 2.796593673965937e-06, "loss": 0.6893, "step": 17010 }, { "epoch": 0.49665703191147703, "grad_norm": 0.7721983157190894, "learning_rate": 2.796431467964315e-06, "loss": 0.7, "step": 17011 }, { "epoch": 0.4966862281393244, "grad_norm": 0.7465643074550818, "learning_rate": 2.796269261962693e-06, "loss": 0.6679, "step": 17012 }, { "epoch": 0.49671542436717175, "grad_norm": 0.6949114674347394, "learning_rate": 2.7961070559610708e-06, "loss": 0.579, "step": 17013 }, { "epoch": 0.4967446205950191, "grad_norm": 0.7072813842378317, "learning_rate": 2.7959448499594488e-06, "loss": 0.6092, "step": 17014 }, { "epoch": 0.4967738168228665, "grad_norm": 0.721679749503914, "learning_rate": 2.7957826439578268e-06, "loss": 0.6848, "step": 17015 }, { "epoch": 0.49680301305071384, "grad_norm": 0.7188178552309692, "learning_rate": 2.7956204379562048e-06, "loss": 0.6286, "step": 17016 }, { "epoch": 0.4968322092785612, "grad_norm": 0.8051543344094291, "learning_rate": 2.7954582319545824e-06, "loss": 0.5912, "step": 17017 }, { "epoch": 0.49686140550640856, "grad_norm": 0.8029590921444295, "learning_rate": 2.7952960259529604e-06, "loss": 0.7519, "step": 17018 }, { "epoch": 0.4968906017342559, "grad_norm": 0.7386212663314763, "learning_rate": 2.7951338199513384e-06, "loss": 0.6747, "step": 17019 }, { "epoch": 0.4969197979621033, "grad_norm": 0.7314710479209913, "learning_rate": 2.7949716139497164e-06, "loss": 0.6854, "step": 17020 }, { "epoch": 0.49694899418995064, "grad_norm": 0.7636743592010843, "learning_rate": 2.7948094079480944e-06, "loss": 0.6549, "step": 17021 }, { "epoch": 0.496978190417798, "grad_norm": 0.7519535005445552, "learning_rate": 2.794647201946472e-06, "loss": 0.7291, "step": 17022 }, { "epoch": 0.49700738664564537, "grad_norm": 0.7434683879384912, "learning_rate": 2.79448499594485e-06, "loss": 0.6417, "step": 17023 }, { "epoch": 0.4970365828734927, "grad_norm": 0.8162000348235776, "learning_rate": 2.794322789943228e-06, "loss": 0.7065, "step": 17024 }, { "epoch": 0.4970657791013401, "grad_norm": 0.6790566327171005, "learning_rate": 2.794160583941606e-06, "loss": 0.5654, "step": 17025 }, { "epoch": 0.49709497532918745, "grad_norm": 0.7513036587514392, "learning_rate": 2.7939983779399836e-06, "loss": 0.7141, "step": 17026 }, { "epoch": 0.4971241715570348, "grad_norm": 0.7439227096895735, "learning_rate": 2.793836171938362e-06, "loss": 0.6958, "step": 17027 }, { "epoch": 0.49715336778488217, "grad_norm": 0.7412255193691039, "learning_rate": 2.79367396593674e-06, "loss": 0.6975, "step": 17028 }, { "epoch": 0.49718256401272953, "grad_norm": 0.7282483206314543, "learning_rate": 2.793511759935118e-06, "loss": 0.5983, "step": 17029 }, { "epoch": 0.4972117602405769, "grad_norm": 0.6703789624450811, "learning_rate": 2.793349553933496e-06, "loss": 0.53, "step": 17030 }, { "epoch": 0.49724095646842426, "grad_norm": 0.756463603193331, "learning_rate": 2.793187347931874e-06, "loss": 0.679, "step": 17031 }, { "epoch": 0.4972701526962716, "grad_norm": 0.7688864623266793, "learning_rate": 2.7930251419302516e-06, "loss": 0.694, "step": 17032 }, { "epoch": 0.497299348924119, "grad_norm": 0.6901115011658941, "learning_rate": 2.7928629359286296e-06, "loss": 0.5993, "step": 17033 }, { "epoch": 0.49732854515196634, "grad_norm": 0.6322152791891108, "learning_rate": 2.7927007299270076e-06, "loss": 0.5243, "step": 17034 }, { "epoch": 0.4973577413798137, "grad_norm": 0.7695744277053844, "learning_rate": 2.7925385239253856e-06, "loss": 0.5189, "step": 17035 }, { "epoch": 0.4973869376076611, "grad_norm": 0.7340908649135814, "learning_rate": 2.7923763179237632e-06, "loss": 0.6353, "step": 17036 }, { "epoch": 0.4974161338355085, "grad_norm": 0.7210866259080772, "learning_rate": 2.7922141119221412e-06, "loss": 0.614, "step": 17037 }, { "epoch": 0.49744533006335584, "grad_norm": 0.712692569635037, "learning_rate": 2.7920519059205192e-06, "loss": 0.635, "step": 17038 }, { "epoch": 0.4974745262912032, "grad_norm": 0.6781055784030383, "learning_rate": 2.7918896999188972e-06, "loss": 0.5637, "step": 17039 }, { "epoch": 0.49750372251905056, "grad_norm": 0.6898644794973239, "learning_rate": 2.7917274939172753e-06, "loss": 0.6048, "step": 17040 }, { "epoch": 0.4975329187468979, "grad_norm": 0.6945293997702501, "learning_rate": 2.791565287915653e-06, "loss": 0.6243, "step": 17041 }, { "epoch": 0.4975621149747453, "grad_norm": 0.8342714528312293, "learning_rate": 2.791403081914031e-06, "loss": 0.699, "step": 17042 }, { "epoch": 0.49759131120259265, "grad_norm": 0.7544020820948515, "learning_rate": 2.791240875912409e-06, "loss": 0.6731, "step": 17043 }, { "epoch": 0.49762050743044, "grad_norm": 0.7424120677862974, "learning_rate": 2.791078669910787e-06, "loss": 0.6342, "step": 17044 }, { "epoch": 0.49764970365828737, "grad_norm": 0.8312108082381368, "learning_rate": 2.7909164639091644e-06, "loss": 0.6819, "step": 17045 }, { "epoch": 0.49767889988613473, "grad_norm": 0.7423037903872434, "learning_rate": 2.790754257907543e-06, "loss": 0.6275, "step": 17046 }, { "epoch": 0.4977080961139821, "grad_norm": 0.7557257543581519, "learning_rate": 2.790592051905921e-06, "loss": 0.701, "step": 17047 }, { "epoch": 0.49773729234182945, "grad_norm": 0.7421578445273888, "learning_rate": 2.790429845904299e-06, "loss": 0.6986, "step": 17048 }, { "epoch": 0.4977664885696768, "grad_norm": 0.7149824484885814, "learning_rate": 2.790267639902677e-06, "loss": 0.6493, "step": 17049 }, { "epoch": 0.4977956847975242, "grad_norm": 0.7069853564389353, "learning_rate": 2.790105433901055e-06, "loss": 0.6278, "step": 17050 }, { "epoch": 0.49782488102537154, "grad_norm": 0.7199632941951298, "learning_rate": 2.7899432278994325e-06, "loss": 0.6423, "step": 17051 }, { "epoch": 0.4978540772532189, "grad_norm": 0.6844234423656232, "learning_rate": 2.7897810218978105e-06, "loss": 0.6111, "step": 17052 }, { "epoch": 0.49788327348106626, "grad_norm": 0.754696068645859, "learning_rate": 2.7896188158961885e-06, "loss": 0.7293, "step": 17053 }, { "epoch": 0.4979124697089136, "grad_norm": 0.7195782028605536, "learning_rate": 2.7894566098945665e-06, "loss": 0.5967, "step": 17054 }, { "epoch": 0.497941665936761, "grad_norm": 0.7523659577912137, "learning_rate": 2.789294403892944e-06, "loss": 0.732, "step": 17055 }, { "epoch": 0.49797086216460834, "grad_norm": 0.8666406678242872, "learning_rate": 2.789132197891322e-06, "loss": 0.6829, "step": 17056 }, { "epoch": 0.4980000583924557, "grad_norm": 0.7509507209821089, "learning_rate": 2.7889699918897e-06, "loss": 0.6452, "step": 17057 }, { "epoch": 0.49802925462030306, "grad_norm": 0.7977407962803773, "learning_rate": 2.788807785888078e-06, "loss": 0.5798, "step": 17058 }, { "epoch": 0.4980584508481504, "grad_norm": 0.682579733180441, "learning_rate": 2.7886455798864557e-06, "loss": 0.5738, "step": 17059 }, { "epoch": 0.4980876470759978, "grad_norm": 0.75979027039097, "learning_rate": 2.7884833738848337e-06, "loss": 0.7208, "step": 17060 }, { "epoch": 0.49811684330384515, "grad_norm": 0.787889435597027, "learning_rate": 2.7883211678832117e-06, "loss": 0.7473, "step": 17061 }, { "epoch": 0.4981460395316925, "grad_norm": 0.7085756208688754, "learning_rate": 2.7881589618815897e-06, "loss": 0.5854, "step": 17062 }, { "epoch": 0.49817523575953987, "grad_norm": 0.7032134486392597, "learning_rate": 2.7879967558799677e-06, "loss": 0.5766, "step": 17063 }, { "epoch": 0.49820443198738723, "grad_norm": 0.9344608525246566, "learning_rate": 2.7878345498783453e-06, "loss": 0.644, "step": 17064 }, { "epoch": 0.4982336282152346, "grad_norm": 0.6845094212022959, "learning_rate": 2.7876723438767237e-06, "loss": 0.5959, "step": 17065 }, { "epoch": 0.49826282444308195, "grad_norm": 0.8027787428925253, "learning_rate": 2.7875101378751017e-06, "loss": 0.753, "step": 17066 }, { "epoch": 0.4982920206709293, "grad_norm": 0.7216393722479438, "learning_rate": 2.7873479318734797e-06, "loss": 0.6106, "step": 17067 }, { "epoch": 0.4983212168987767, "grad_norm": 0.7374420667070287, "learning_rate": 2.7871857258718578e-06, "loss": 0.6336, "step": 17068 }, { "epoch": 0.49835041312662404, "grad_norm": 0.7308981416008236, "learning_rate": 2.7870235198702358e-06, "loss": 0.7205, "step": 17069 }, { "epoch": 0.4983796093544714, "grad_norm": 0.7448316386194553, "learning_rate": 2.7868613138686133e-06, "loss": 0.6377, "step": 17070 }, { "epoch": 0.49840880558231876, "grad_norm": 0.734370675686374, "learning_rate": 2.7866991078669913e-06, "loss": 0.6166, "step": 17071 }, { "epoch": 0.4984380018101661, "grad_norm": 0.735122863597735, "learning_rate": 2.7865369018653694e-06, "loss": 0.6421, "step": 17072 }, { "epoch": 0.4984671980380135, "grad_norm": 0.7475956696158602, "learning_rate": 2.7863746958637474e-06, "loss": 0.552, "step": 17073 }, { "epoch": 0.49849639426586084, "grad_norm": 0.7430589675201813, "learning_rate": 2.786212489862125e-06, "loss": 0.6801, "step": 17074 }, { "epoch": 0.4985255904937082, "grad_norm": 0.6756514673851258, "learning_rate": 2.786050283860503e-06, "loss": 0.6052, "step": 17075 }, { "epoch": 0.49855478672155557, "grad_norm": 0.7193402084588219, "learning_rate": 2.785888077858881e-06, "loss": 0.5725, "step": 17076 }, { "epoch": 0.4985839829494029, "grad_norm": 0.7508654665510073, "learning_rate": 2.785725871857259e-06, "loss": 0.7179, "step": 17077 }, { "epoch": 0.4986131791772503, "grad_norm": 0.7922627831026438, "learning_rate": 2.7855636658556365e-06, "loss": 0.7103, "step": 17078 }, { "epoch": 0.49864237540509765, "grad_norm": 0.786922894292255, "learning_rate": 2.7854014598540146e-06, "loss": 0.6511, "step": 17079 }, { "epoch": 0.498671571632945, "grad_norm": 0.7737911096183097, "learning_rate": 2.7852392538523926e-06, "loss": 0.643, "step": 17080 }, { "epoch": 0.49870076786079237, "grad_norm": 0.7646721985760919, "learning_rate": 2.7850770478507706e-06, "loss": 0.6936, "step": 17081 }, { "epoch": 0.49872996408863973, "grad_norm": 0.7657883500892407, "learning_rate": 2.7849148418491486e-06, "loss": 0.7406, "step": 17082 }, { "epoch": 0.4987591603164871, "grad_norm": 0.7029938198831819, "learning_rate": 2.784752635847526e-06, "loss": 0.6543, "step": 17083 }, { "epoch": 0.49878835654433445, "grad_norm": 0.7243821227100983, "learning_rate": 2.7845904298459046e-06, "loss": 0.6356, "step": 17084 }, { "epoch": 0.4988175527721818, "grad_norm": 0.716336013661067, "learning_rate": 2.7844282238442826e-06, "loss": 0.6099, "step": 17085 }, { "epoch": 0.4988467490000292, "grad_norm": 0.7497564075914658, "learning_rate": 2.7842660178426606e-06, "loss": 0.6995, "step": 17086 }, { "epoch": 0.49887594522787654, "grad_norm": 0.6701209896403467, "learning_rate": 2.7841038118410386e-06, "loss": 0.5457, "step": 17087 }, { "epoch": 0.4989051414557239, "grad_norm": 0.7180550385451933, "learning_rate": 2.7839416058394166e-06, "loss": 0.6307, "step": 17088 }, { "epoch": 0.49893433768357126, "grad_norm": 0.7150051446461624, "learning_rate": 2.783779399837794e-06, "loss": 0.617, "step": 17089 }, { "epoch": 0.4989635339114186, "grad_norm": 0.7666920527758936, "learning_rate": 2.783617193836172e-06, "loss": 0.6966, "step": 17090 }, { "epoch": 0.498992730139266, "grad_norm": 0.6559667998539539, "learning_rate": 2.7834549878345502e-06, "loss": 0.5701, "step": 17091 }, { "epoch": 0.49902192636711334, "grad_norm": 0.755296739162588, "learning_rate": 2.7832927818329282e-06, "loss": 0.6869, "step": 17092 }, { "epoch": 0.4990511225949607, "grad_norm": 0.6555074182428824, "learning_rate": 2.783130575831306e-06, "loss": 0.5389, "step": 17093 }, { "epoch": 0.49908031882280807, "grad_norm": 0.699900919267943, "learning_rate": 2.782968369829684e-06, "loss": 0.6281, "step": 17094 }, { "epoch": 0.49910951505065543, "grad_norm": 0.7423723481564606, "learning_rate": 2.782806163828062e-06, "loss": 0.6855, "step": 17095 }, { "epoch": 0.49913871127850284, "grad_norm": 0.6946043980132537, "learning_rate": 2.78264395782644e-06, "loss": 0.5829, "step": 17096 }, { "epoch": 0.4991679075063502, "grad_norm": 0.7039991483092595, "learning_rate": 2.7824817518248174e-06, "loss": 0.637, "step": 17097 }, { "epoch": 0.49919710373419757, "grad_norm": 0.7507323327173493, "learning_rate": 2.7823195458231954e-06, "loss": 0.6431, "step": 17098 }, { "epoch": 0.49922629996204493, "grad_norm": 0.6819614810896018, "learning_rate": 2.7821573398215734e-06, "loss": 0.6213, "step": 17099 }, { "epoch": 0.4992554961898923, "grad_norm": 0.810806896647407, "learning_rate": 2.7819951338199514e-06, "loss": 0.6556, "step": 17100 }, { "epoch": 0.49928469241773965, "grad_norm": 0.7920374196347959, "learning_rate": 2.7818329278183294e-06, "loss": 0.6111, "step": 17101 }, { "epoch": 0.499313888645587, "grad_norm": 0.702467614059367, "learning_rate": 2.781670721816708e-06, "loss": 0.6088, "step": 17102 }, { "epoch": 0.4993430848734344, "grad_norm": 0.7465378191389849, "learning_rate": 2.7815085158150854e-06, "loss": 0.6593, "step": 17103 }, { "epoch": 0.49937228110128173, "grad_norm": 0.7468750089573226, "learning_rate": 2.7813463098134635e-06, "loss": 0.6612, "step": 17104 }, { "epoch": 0.4994014773291291, "grad_norm": 0.6974468452881111, "learning_rate": 2.7811841038118415e-06, "loss": 0.5985, "step": 17105 }, { "epoch": 0.49943067355697646, "grad_norm": 1.0213510176506782, "learning_rate": 2.7810218978102195e-06, "loss": 0.6729, "step": 17106 }, { "epoch": 0.4994598697848238, "grad_norm": 0.7390967994390729, "learning_rate": 2.7808596918085975e-06, "loss": 0.6111, "step": 17107 }, { "epoch": 0.4994890660126712, "grad_norm": 0.741504496730825, "learning_rate": 2.780697485806975e-06, "loss": 0.6816, "step": 17108 }, { "epoch": 0.49951826224051854, "grad_norm": 0.725187808209799, "learning_rate": 2.780535279805353e-06, "loss": 0.6275, "step": 17109 }, { "epoch": 0.4995474584683659, "grad_norm": 0.7312530770013158, "learning_rate": 2.780373073803731e-06, "loss": 0.6371, "step": 17110 }, { "epoch": 0.49957665469621326, "grad_norm": 0.7840900438189132, "learning_rate": 2.780210867802109e-06, "loss": 0.6632, "step": 17111 }, { "epoch": 0.4996058509240606, "grad_norm": 0.7329377788663402, "learning_rate": 2.7800486618004867e-06, "loss": 0.6412, "step": 17112 }, { "epoch": 0.499635047151908, "grad_norm": 0.7374328182465012, "learning_rate": 2.7798864557988647e-06, "loss": 0.6869, "step": 17113 }, { "epoch": 0.49966424337975535, "grad_norm": 0.7168054948884431, "learning_rate": 2.7797242497972427e-06, "loss": 0.654, "step": 17114 }, { "epoch": 0.4996934396076027, "grad_norm": 0.6792952624864641, "learning_rate": 2.7795620437956207e-06, "loss": 0.5812, "step": 17115 }, { "epoch": 0.49972263583545007, "grad_norm": 0.7271824859384792, "learning_rate": 2.7793998377939983e-06, "loss": 0.6234, "step": 17116 }, { "epoch": 0.49975183206329743, "grad_norm": 0.6961115216804166, "learning_rate": 2.7792376317923763e-06, "loss": 0.5975, "step": 17117 }, { "epoch": 0.4997810282911448, "grad_norm": 0.7251051397576199, "learning_rate": 2.7790754257907543e-06, "loss": 0.681, "step": 17118 }, { "epoch": 0.49981022451899215, "grad_norm": 0.7601839726610301, "learning_rate": 2.7789132197891323e-06, "loss": 0.6523, "step": 17119 }, { "epoch": 0.4998394207468395, "grad_norm": 0.7454009105739605, "learning_rate": 2.7787510137875103e-06, "loss": 0.6861, "step": 17120 }, { "epoch": 0.4998686169746869, "grad_norm": 0.7004319217731073, "learning_rate": 2.7785888077858887e-06, "loss": 0.664, "step": 17121 }, { "epoch": 0.49989781320253424, "grad_norm": 0.8118106390746618, "learning_rate": 2.7784266017842663e-06, "loss": 0.6825, "step": 17122 }, { "epoch": 0.4999270094303816, "grad_norm": 0.7339105109135936, "learning_rate": 2.7782643957826443e-06, "loss": 0.6738, "step": 17123 }, { "epoch": 0.49995620565822896, "grad_norm": 0.7615994668767907, "learning_rate": 2.7781021897810223e-06, "loss": 0.7244, "step": 17124 }, { "epoch": 0.4999854018860763, "grad_norm": 0.7766163992797935, "learning_rate": 2.7779399837794003e-06, "loss": 0.6351, "step": 17125 }, { "epoch": 0.5000145981139237, "grad_norm": 0.6899958566240624, "learning_rate": 2.7777777777777783e-06, "loss": 0.5479, "step": 17126 }, { "epoch": 0.500043794341771, "grad_norm": 0.7556969406811532, "learning_rate": 2.777615571776156e-06, "loss": 0.6605, "step": 17127 }, { "epoch": 0.5000729905696184, "grad_norm": 0.7387139480404866, "learning_rate": 2.777453365774534e-06, "loss": 0.6703, "step": 17128 }, { "epoch": 0.5001021867974658, "grad_norm": 0.715203394581138, "learning_rate": 2.777291159772912e-06, "loss": 0.6314, "step": 17129 }, { "epoch": 0.5001313830253131, "grad_norm": 0.7548343535872271, "learning_rate": 2.77712895377129e-06, "loss": 0.6825, "step": 17130 }, { "epoch": 0.5001605792531605, "grad_norm": 0.7255340047439447, "learning_rate": 2.7769667477696675e-06, "loss": 0.6992, "step": 17131 }, { "epoch": 0.5001897754810078, "grad_norm": 0.7148261725617796, "learning_rate": 2.7768045417680455e-06, "loss": 0.639, "step": 17132 }, { "epoch": 0.5002189717088552, "grad_norm": 0.7010919537468442, "learning_rate": 2.7766423357664235e-06, "loss": 0.5902, "step": 17133 }, { "epoch": 0.5002481679367026, "grad_norm": 0.7515113680607272, "learning_rate": 2.7764801297648015e-06, "loss": 0.6401, "step": 17134 }, { "epoch": 0.5002773641645499, "grad_norm": 0.7444716456112096, "learning_rate": 2.776317923763179e-06, "loss": 0.6385, "step": 17135 }, { "epoch": 0.5003065603923973, "grad_norm": 0.8099500393424901, "learning_rate": 2.776155717761557e-06, "loss": 0.7407, "step": 17136 }, { "epoch": 0.5003357566202447, "grad_norm": 0.7085601363855633, "learning_rate": 2.775993511759935e-06, "loss": 0.659, "step": 17137 }, { "epoch": 0.500364952848092, "grad_norm": 0.7613228967441008, "learning_rate": 2.775831305758313e-06, "loss": 0.6596, "step": 17138 }, { "epoch": 0.5003941490759394, "grad_norm": 0.7264734214026816, "learning_rate": 2.775669099756691e-06, "loss": 0.646, "step": 17139 }, { "epoch": 0.5004233453037867, "grad_norm": 0.7448260758948505, "learning_rate": 2.7755068937550696e-06, "loss": 0.6356, "step": 17140 }, { "epoch": 0.5004525415316341, "grad_norm": 0.7680042056242554, "learning_rate": 2.775344687753447e-06, "loss": 0.725, "step": 17141 }, { "epoch": 0.5004817377594815, "grad_norm": 0.7096436392067128, "learning_rate": 2.775182481751825e-06, "loss": 0.6224, "step": 17142 }, { "epoch": 0.5005109339873288, "grad_norm": 0.7468659875203187, "learning_rate": 2.775020275750203e-06, "loss": 0.6521, "step": 17143 }, { "epoch": 0.5005401302151762, "grad_norm": 0.7815450937571424, "learning_rate": 2.774858069748581e-06, "loss": 0.6805, "step": 17144 }, { "epoch": 0.5005693264430235, "grad_norm": 0.7105202987359502, "learning_rate": 2.774695863746959e-06, "loss": 0.6404, "step": 17145 }, { "epoch": 0.5005985226708709, "grad_norm": 0.7112317074623336, "learning_rate": 2.7745336577453368e-06, "loss": 0.6379, "step": 17146 }, { "epoch": 0.5006277188987183, "grad_norm": 0.7250446007183171, "learning_rate": 2.7743714517437148e-06, "loss": 0.632, "step": 17147 }, { "epoch": 0.5006569151265656, "grad_norm": 0.7579559430585227, "learning_rate": 2.774209245742093e-06, "loss": 0.6717, "step": 17148 }, { "epoch": 0.500686111354413, "grad_norm": 0.7668705424715999, "learning_rate": 2.774047039740471e-06, "loss": 0.6753, "step": 17149 }, { "epoch": 0.5007153075822603, "grad_norm": 0.8428343705966153, "learning_rate": 2.7738848337388484e-06, "loss": 0.6296, "step": 17150 }, { "epoch": 0.5007445038101077, "grad_norm": 0.73629962721883, "learning_rate": 2.7737226277372264e-06, "loss": 0.6738, "step": 17151 }, { "epoch": 0.5007737000379551, "grad_norm": 0.7467833947575403, "learning_rate": 2.7735604217356044e-06, "loss": 0.7023, "step": 17152 }, { "epoch": 0.5008028962658024, "grad_norm": 0.7490858898796293, "learning_rate": 2.7733982157339824e-06, "loss": 0.7018, "step": 17153 }, { "epoch": 0.5008320924936498, "grad_norm": 0.7550112393974644, "learning_rate": 2.77323600973236e-06, "loss": 0.6368, "step": 17154 }, { "epoch": 0.5008612887214972, "grad_norm": 0.7128460429636702, "learning_rate": 2.773073803730738e-06, "loss": 0.6626, "step": 17155 }, { "epoch": 0.5008904849493445, "grad_norm": 0.7260123001199192, "learning_rate": 2.772911597729116e-06, "loss": 0.6216, "step": 17156 }, { "epoch": 0.5009196811771919, "grad_norm": 0.727880884640853, "learning_rate": 2.772749391727494e-06, "loss": 0.65, "step": 17157 }, { "epoch": 0.5009488774050392, "grad_norm": 0.7466614827759084, "learning_rate": 2.772587185725872e-06, "loss": 0.628, "step": 17158 }, { "epoch": 0.5009780736328866, "grad_norm": 0.7955739350644515, "learning_rate": 2.7724249797242504e-06, "loss": 0.7057, "step": 17159 }, { "epoch": 0.501007269860734, "grad_norm": 0.7257434671851651, "learning_rate": 2.772262773722628e-06, "loss": 0.639, "step": 17160 }, { "epoch": 0.5010364660885813, "grad_norm": 0.7813857305685947, "learning_rate": 2.772100567721006e-06, "loss": 0.6421, "step": 17161 }, { "epoch": 0.5010656623164287, "grad_norm": 0.7885535766689189, "learning_rate": 2.771938361719384e-06, "loss": 0.6726, "step": 17162 }, { "epoch": 0.501094858544276, "grad_norm": 0.7474437553216802, "learning_rate": 2.771776155717762e-06, "loss": 0.7085, "step": 17163 }, { "epoch": 0.5011240547721234, "grad_norm": 0.6842127362338557, "learning_rate": 2.77161394971614e-06, "loss": 0.5666, "step": 17164 }, { "epoch": 0.5011532509999708, "grad_norm": 0.7334103775984843, "learning_rate": 2.7714517437145176e-06, "loss": 0.6496, "step": 17165 }, { "epoch": 0.5011824472278181, "grad_norm": 0.7312653733961654, "learning_rate": 2.7712895377128956e-06, "loss": 0.6963, "step": 17166 }, { "epoch": 0.5012116434556655, "grad_norm": 0.751840385890809, "learning_rate": 2.7711273317112736e-06, "loss": 0.6651, "step": 17167 }, { "epoch": 0.5012408396835129, "grad_norm": 0.7461301843960377, "learning_rate": 2.7709651257096517e-06, "loss": 0.655, "step": 17168 }, { "epoch": 0.5012700359113602, "grad_norm": 0.7391276477895182, "learning_rate": 2.7708029197080292e-06, "loss": 0.6102, "step": 17169 }, { "epoch": 0.5012992321392076, "grad_norm": 0.7553166518109695, "learning_rate": 2.7706407137064072e-06, "loss": 0.6251, "step": 17170 }, { "epoch": 0.5013284283670549, "grad_norm": 0.7069811284106325, "learning_rate": 2.7704785077047853e-06, "loss": 0.6239, "step": 17171 }, { "epoch": 0.5013576245949023, "grad_norm": 0.8093255031426939, "learning_rate": 2.7703163017031633e-06, "loss": 0.7089, "step": 17172 }, { "epoch": 0.5013868208227497, "grad_norm": 0.7274470156104413, "learning_rate": 2.770154095701541e-06, "loss": 0.6603, "step": 17173 }, { "epoch": 0.501416017050597, "grad_norm": 0.7045289601361956, "learning_rate": 2.769991889699919e-06, "loss": 0.6024, "step": 17174 }, { "epoch": 0.5014452132784444, "grad_norm": 0.697042667142185, "learning_rate": 2.769829683698297e-06, "loss": 0.6069, "step": 17175 }, { "epoch": 0.5014744095062917, "grad_norm": 0.7252450871473499, "learning_rate": 2.769667477696675e-06, "loss": 0.6454, "step": 17176 }, { "epoch": 0.5015036057341391, "grad_norm": 0.7690122746566417, "learning_rate": 2.769505271695053e-06, "loss": 0.6967, "step": 17177 }, { "epoch": 0.5015328019619865, "grad_norm": 0.7446581645794077, "learning_rate": 2.7693430656934313e-06, "loss": 0.6721, "step": 17178 }, { "epoch": 0.5015619981898338, "grad_norm": 0.7825762728872842, "learning_rate": 2.769180859691809e-06, "loss": 0.5928, "step": 17179 }, { "epoch": 0.5015911944176812, "grad_norm": 0.765316048719458, "learning_rate": 2.769018653690187e-06, "loss": 0.6053, "step": 17180 }, { "epoch": 0.5016203906455285, "grad_norm": 0.815114660228051, "learning_rate": 2.768856447688565e-06, "loss": 0.5972, "step": 17181 }, { "epoch": 0.5016495868733759, "grad_norm": 0.7322165106980157, "learning_rate": 2.768694241686943e-06, "loss": 0.6344, "step": 17182 }, { "epoch": 0.5016787831012233, "grad_norm": 0.7160309953148899, "learning_rate": 2.7685320356853205e-06, "loss": 0.6259, "step": 17183 }, { "epoch": 0.5017079793290706, "grad_norm": 0.7503443935599543, "learning_rate": 2.7683698296836985e-06, "loss": 0.6399, "step": 17184 }, { "epoch": 0.501737175556918, "grad_norm": 0.7408261554034703, "learning_rate": 2.7682076236820765e-06, "loss": 0.6879, "step": 17185 }, { "epoch": 0.5017663717847655, "grad_norm": 0.7599617782125387, "learning_rate": 2.7680454176804545e-06, "loss": 0.676, "step": 17186 }, { "epoch": 0.5017955680126128, "grad_norm": 0.8427316210727666, "learning_rate": 2.7678832116788325e-06, "loss": 0.7345, "step": 17187 }, { "epoch": 0.5018247642404602, "grad_norm": 0.7216050728833724, "learning_rate": 2.76772100567721e-06, "loss": 0.6771, "step": 17188 }, { "epoch": 0.5018539604683075, "grad_norm": 0.7348663631500431, "learning_rate": 2.767558799675588e-06, "loss": 0.6514, "step": 17189 }, { "epoch": 0.5018831566961549, "grad_norm": 0.6741877196415831, "learning_rate": 2.767396593673966e-06, "loss": 0.5794, "step": 17190 }, { "epoch": 0.5019123529240023, "grad_norm": 0.7125286650437211, "learning_rate": 2.767234387672344e-06, "loss": 0.6134, "step": 17191 }, { "epoch": 0.5019415491518496, "grad_norm": 0.7138424300062527, "learning_rate": 2.7670721816707217e-06, "loss": 0.6455, "step": 17192 }, { "epoch": 0.501970745379697, "grad_norm": 0.6958381288188348, "learning_rate": 2.7669099756690997e-06, "loss": 0.5796, "step": 17193 }, { "epoch": 0.5019999416075444, "grad_norm": 0.7190674506974609, "learning_rate": 2.7667477696674777e-06, "loss": 0.557, "step": 17194 }, { "epoch": 0.5020291378353917, "grad_norm": 0.8213768389498111, "learning_rate": 2.7665855636658557e-06, "loss": 0.6412, "step": 17195 }, { "epoch": 0.5020583340632391, "grad_norm": 0.7366157701992737, "learning_rate": 2.7664233576642337e-06, "loss": 0.66, "step": 17196 }, { "epoch": 0.5020875302910864, "grad_norm": 0.7109493226208753, "learning_rate": 2.766261151662612e-06, "loss": 0.6062, "step": 17197 }, { "epoch": 0.5021167265189338, "grad_norm": 0.68675663696234, "learning_rate": 2.7660989456609897e-06, "loss": 0.6024, "step": 17198 }, { "epoch": 0.5021459227467812, "grad_norm": 0.7421536210640337, "learning_rate": 2.7659367396593677e-06, "loss": 0.7041, "step": 17199 }, { "epoch": 0.5021751189746285, "grad_norm": 0.7774334090162298, "learning_rate": 2.7657745336577458e-06, "loss": 0.6928, "step": 17200 }, { "epoch": 0.5022043152024759, "grad_norm": 0.7154685072138994, "learning_rate": 2.7656123276561238e-06, "loss": 0.6283, "step": 17201 }, { "epoch": 0.5022335114303232, "grad_norm": 0.7095785515560956, "learning_rate": 2.7654501216545013e-06, "loss": 0.6215, "step": 17202 }, { "epoch": 0.5022627076581706, "grad_norm": 0.6898671886124741, "learning_rate": 2.7652879156528794e-06, "loss": 0.6144, "step": 17203 }, { "epoch": 0.502291903886018, "grad_norm": 0.7816141537098285, "learning_rate": 2.7651257096512574e-06, "loss": 0.6533, "step": 17204 }, { "epoch": 0.5023211001138653, "grad_norm": 0.729309659656118, "learning_rate": 2.7649635036496354e-06, "loss": 0.5975, "step": 17205 }, { "epoch": 0.5023502963417127, "grad_norm": 0.7057307603879975, "learning_rate": 2.7648012976480134e-06, "loss": 0.6069, "step": 17206 }, { "epoch": 0.50237949256956, "grad_norm": 0.6892508020724848, "learning_rate": 2.764639091646391e-06, "loss": 0.6152, "step": 17207 }, { "epoch": 0.5024086887974074, "grad_norm": 0.7490843443899854, "learning_rate": 2.764476885644769e-06, "loss": 0.7391, "step": 17208 }, { "epoch": 0.5024378850252548, "grad_norm": 0.772270408043154, "learning_rate": 2.764314679643147e-06, "loss": 0.6524, "step": 17209 }, { "epoch": 0.5024670812531021, "grad_norm": 0.6882333569008793, "learning_rate": 2.764152473641525e-06, "loss": 0.5765, "step": 17210 }, { "epoch": 0.5024962774809495, "grad_norm": 0.8107711748833968, "learning_rate": 2.7639902676399026e-06, "loss": 0.7212, "step": 17211 }, { "epoch": 0.5025254737087969, "grad_norm": 0.7096497515296114, "learning_rate": 2.7638280616382806e-06, "loss": 0.5675, "step": 17212 }, { "epoch": 0.5025546699366442, "grad_norm": 0.7826564851322594, "learning_rate": 2.7636658556366586e-06, "loss": 0.6071, "step": 17213 }, { "epoch": 0.5025838661644916, "grad_norm": 0.7685427207015133, "learning_rate": 2.7635036496350366e-06, "loss": 0.6708, "step": 17214 }, { "epoch": 0.5026130623923389, "grad_norm": 0.6983660826114797, "learning_rate": 2.7633414436334146e-06, "loss": 0.6217, "step": 17215 }, { "epoch": 0.5026422586201863, "grad_norm": 0.8327308503081547, "learning_rate": 2.763179237631793e-06, "loss": 0.8126, "step": 17216 }, { "epoch": 0.5026714548480337, "grad_norm": 0.7364665700903472, "learning_rate": 2.7630170316301706e-06, "loss": 0.7028, "step": 17217 }, { "epoch": 0.502700651075881, "grad_norm": 0.7592007334839568, "learning_rate": 2.7628548256285486e-06, "loss": 0.663, "step": 17218 }, { "epoch": 0.5027298473037284, "grad_norm": 0.7788337232630942, "learning_rate": 2.7626926196269266e-06, "loss": 0.6851, "step": 17219 }, { "epoch": 0.5027590435315757, "grad_norm": 0.7899175359394922, "learning_rate": 2.7625304136253046e-06, "loss": 0.6679, "step": 17220 }, { "epoch": 0.5027882397594231, "grad_norm": 0.6814917153714285, "learning_rate": 2.762368207623682e-06, "loss": 0.5389, "step": 17221 }, { "epoch": 0.5028174359872705, "grad_norm": 0.7841974291321383, "learning_rate": 2.76220600162206e-06, "loss": 0.6786, "step": 17222 }, { "epoch": 0.5028466322151178, "grad_norm": 0.8105327152665015, "learning_rate": 2.7620437956204382e-06, "loss": 0.705, "step": 17223 }, { "epoch": 0.5028758284429652, "grad_norm": 0.8466883598135774, "learning_rate": 2.7618815896188162e-06, "loss": 0.6667, "step": 17224 }, { "epoch": 0.5029050246708126, "grad_norm": 0.788716968113729, "learning_rate": 2.7617193836171942e-06, "loss": 0.6388, "step": 17225 }, { "epoch": 0.5029342208986599, "grad_norm": 0.6926628542867435, "learning_rate": 2.761557177615572e-06, "loss": 0.603, "step": 17226 }, { "epoch": 0.5029634171265073, "grad_norm": 0.7649956395128039, "learning_rate": 2.76139497161395e-06, "loss": 0.6847, "step": 17227 }, { "epoch": 0.5029926133543546, "grad_norm": 0.7921538175204892, "learning_rate": 2.761232765612328e-06, "loss": 0.7177, "step": 17228 }, { "epoch": 0.503021809582202, "grad_norm": 0.7700606175808189, "learning_rate": 2.761070559610706e-06, "loss": 0.718, "step": 17229 }, { "epoch": 0.5030510058100494, "grad_norm": 0.7120030031348544, "learning_rate": 2.7609083536090834e-06, "loss": 0.6139, "step": 17230 }, { "epoch": 0.5030802020378967, "grad_norm": 0.7488007814969291, "learning_rate": 2.7607461476074614e-06, "loss": 0.6751, "step": 17231 }, { "epoch": 0.5031093982657441, "grad_norm": 0.7415206000695016, "learning_rate": 2.7605839416058394e-06, "loss": 0.6147, "step": 17232 }, { "epoch": 0.5031385944935914, "grad_norm": 0.7407478852794958, "learning_rate": 2.7604217356042174e-06, "loss": 0.6755, "step": 17233 }, { "epoch": 0.5031677907214388, "grad_norm": 0.7523729593648465, "learning_rate": 2.7602595296025954e-06, "loss": 0.6675, "step": 17234 }, { "epoch": 0.5031969869492862, "grad_norm": 0.6950618718991212, "learning_rate": 2.760097323600974e-06, "loss": 0.5982, "step": 17235 }, { "epoch": 0.5032261831771335, "grad_norm": 0.7573102017565321, "learning_rate": 2.7599351175993515e-06, "loss": 0.668, "step": 17236 }, { "epoch": 0.5032553794049809, "grad_norm": 0.6924620439460195, "learning_rate": 2.7597729115977295e-06, "loss": 0.5736, "step": 17237 }, { "epoch": 0.5032845756328282, "grad_norm": 0.7802538518650479, "learning_rate": 2.7596107055961075e-06, "loss": 0.6759, "step": 17238 }, { "epoch": 0.5033137718606756, "grad_norm": 0.7466847463108691, "learning_rate": 2.7594484995944855e-06, "loss": 0.6242, "step": 17239 }, { "epoch": 0.503342968088523, "grad_norm": 0.7250981225102576, "learning_rate": 2.759286293592863e-06, "loss": 0.6069, "step": 17240 }, { "epoch": 0.5033721643163703, "grad_norm": 0.7606378315122189, "learning_rate": 2.759124087591241e-06, "loss": 0.6877, "step": 17241 }, { "epoch": 0.5034013605442177, "grad_norm": 0.7248482066561777, "learning_rate": 2.758961881589619e-06, "loss": 0.5994, "step": 17242 }, { "epoch": 0.503430556772065, "grad_norm": 0.7130331301296765, "learning_rate": 2.758799675587997e-06, "loss": 0.6241, "step": 17243 }, { "epoch": 0.5034597529999124, "grad_norm": 0.7459239078763519, "learning_rate": 2.758637469586375e-06, "loss": 0.7261, "step": 17244 }, { "epoch": 0.5034889492277598, "grad_norm": 0.7261257722669381, "learning_rate": 2.7584752635847527e-06, "loss": 0.6708, "step": 17245 }, { "epoch": 0.5035181454556071, "grad_norm": 0.6794259912544826, "learning_rate": 2.7583130575831307e-06, "loss": 0.5684, "step": 17246 }, { "epoch": 0.5035473416834545, "grad_norm": 0.7469940076881899, "learning_rate": 2.7581508515815087e-06, "loss": 0.7094, "step": 17247 }, { "epoch": 0.5035765379113019, "grad_norm": 0.7430029986449451, "learning_rate": 2.7579886455798867e-06, "loss": 0.6576, "step": 17248 }, { "epoch": 0.5036057341391492, "grad_norm": 0.763071662834986, "learning_rate": 2.7578264395782643e-06, "loss": 0.7568, "step": 17249 }, { "epoch": 0.5036349303669966, "grad_norm": 0.6803669998737283, "learning_rate": 2.7576642335766423e-06, "loss": 0.6074, "step": 17250 }, { "epoch": 0.5036641265948439, "grad_norm": 0.7375512690518923, "learning_rate": 2.7575020275750203e-06, "loss": 0.6387, "step": 17251 }, { "epoch": 0.5036933228226913, "grad_norm": 0.7021354812262623, "learning_rate": 2.7573398215733983e-06, "loss": 0.6185, "step": 17252 }, { "epoch": 0.5037225190505387, "grad_norm": 0.6988703685408011, "learning_rate": 2.7571776155717767e-06, "loss": 0.6276, "step": 17253 }, { "epoch": 0.503751715278386, "grad_norm": 0.7114368410348119, "learning_rate": 2.7570154095701547e-06, "loss": 0.6278, "step": 17254 }, { "epoch": 0.5037809115062334, "grad_norm": 0.738972934485958, "learning_rate": 2.7568532035685323e-06, "loss": 0.6948, "step": 17255 }, { "epoch": 0.5038101077340807, "grad_norm": 0.7648312200913612, "learning_rate": 2.7566909975669103e-06, "loss": 0.6384, "step": 17256 }, { "epoch": 0.5038393039619281, "grad_norm": 0.8349258981286963, "learning_rate": 2.7565287915652883e-06, "loss": 0.7139, "step": 17257 }, { "epoch": 0.5038685001897755, "grad_norm": 0.7178732787462313, "learning_rate": 2.7563665855636663e-06, "loss": 0.6368, "step": 17258 }, { "epoch": 0.5038976964176228, "grad_norm": 0.6861973394087849, "learning_rate": 2.756204379562044e-06, "loss": 0.5939, "step": 17259 }, { "epoch": 0.5039268926454702, "grad_norm": 0.7307897854209983, "learning_rate": 2.756042173560422e-06, "loss": 0.619, "step": 17260 }, { "epoch": 0.5039560888733176, "grad_norm": 0.7729454965220934, "learning_rate": 2.7558799675588e-06, "loss": 0.7238, "step": 17261 }, { "epoch": 0.5039852851011649, "grad_norm": 0.7416078563341993, "learning_rate": 2.755717761557178e-06, "loss": 0.6444, "step": 17262 }, { "epoch": 0.5040144813290123, "grad_norm": 0.6863079930529483, "learning_rate": 2.755555555555556e-06, "loss": 0.5919, "step": 17263 }, { "epoch": 0.5040436775568596, "grad_norm": 0.7513987415428464, "learning_rate": 2.7553933495539335e-06, "loss": 0.6848, "step": 17264 }, { "epoch": 0.504072873784707, "grad_norm": 0.7358149626311966, "learning_rate": 2.7552311435523115e-06, "loss": 0.6676, "step": 17265 }, { "epoch": 0.5041020700125544, "grad_norm": 0.7207286327490116, "learning_rate": 2.7550689375506895e-06, "loss": 0.6483, "step": 17266 }, { "epoch": 0.5041312662404017, "grad_norm": 0.7283634985905085, "learning_rate": 2.7549067315490676e-06, "loss": 0.6543, "step": 17267 }, { "epoch": 0.5041604624682491, "grad_norm": 0.6792447518582347, "learning_rate": 2.754744525547445e-06, "loss": 0.5747, "step": 17268 }, { "epoch": 0.5041896586960964, "grad_norm": 0.7108921138586082, "learning_rate": 2.754582319545823e-06, "loss": 0.5935, "step": 17269 }, { "epoch": 0.5042188549239438, "grad_norm": 0.7883915330819012, "learning_rate": 2.754420113544201e-06, "loss": 0.7295, "step": 17270 }, { "epoch": 0.5042480511517912, "grad_norm": 1.2642690019978988, "learning_rate": 2.754257907542579e-06, "loss": 0.6138, "step": 17271 }, { "epoch": 0.5042772473796385, "grad_norm": 0.7703981669646603, "learning_rate": 2.7540957015409576e-06, "loss": 0.7081, "step": 17272 }, { "epoch": 0.5043064436074859, "grad_norm": 0.7227588536982147, "learning_rate": 2.7539334955393356e-06, "loss": 0.6086, "step": 17273 }, { "epoch": 0.5043356398353332, "grad_norm": 0.7543304056274251, "learning_rate": 2.753771289537713e-06, "loss": 0.6765, "step": 17274 }, { "epoch": 0.5043648360631806, "grad_norm": 0.7507739704756266, "learning_rate": 2.753609083536091e-06, "loss": 0.6638, "step": 17275 }, { "epoch": 0.504394032291028, "grad_norm": 0.7578120313655708, "learning_rate": 2.753446877534469e-06, "loss": 0.7431, "step": 17276 }, { "epoch": 0.5044232285188753, "grad_norm": 0.7157604008969792, "learning_rate": 2.753284671532847e-06, "loss": 0.6348, "step": 17277 }, { "epoch": 0.5044524247467227, "grad_norm": 0.802178161146296, "learning_rate": 2.7531224655312248e-06, "loss": 0.6839, "step": 17278 }, { "epoch": 0.50448162097457, "grad_norm": 0.7400366754320372, "learning_rate": 2.7529602595296028e-06, "loss": 0.6759, "step": 17279 }, { "epoch": 0.5045108172024174, "grad_norm": 0.7515418988608731, "learning_rate": 2.752798053527981e-06, "loss": 0.6904, "step": 17280 }, { "epoch": 0.5045400134302648, "grad_norm": 0.7355661284615274, "learning_rate": 2.752635847526359e-06, "loss": 0.6039, "step": 17281 }, { "epoch": 0.5045692096581121, "grad_norm": 0.704543976440137, "learning_rate": 2.752473641524737e-06, "loss": 0.6131, "step": 17282 }, { "epoch": 0.5045984058859595, "grad_norm": 0.7381266428619032, "learning_rate": 2.7523114355231144e-06, "loss": 0.6421, "step": 17283 }, { "epoch": 0.5046276021138069, "grad_norm": 0.7057048195448088, "learning_rate": 2.7521492295214924e-06, "loss": 0.6729, "step": 17284 }, { "epoch": 0.5046567983416542, "grad_norm": 0.6974036865169518, "learning_rate": 2.7519870235198704e-06, "loss": 0.6325, "step": 17285 }, { "epoch": 0.5046859945695016, "grad_norm": 0.6908549493422363, "learning_rate": 2.7518248175182484e-06, "loss": 0.609, "step": 17286 }, { "epoch": 0.504715190797349, "grad_norm": 0.8909422565859829, "learning_rate": 2.751662611516626e-06, "loss": 0.7088, "step": 17287 }, { "epoch": 0.5047443870251963, "grad_norm": 0.7351678396264366, "learning_rate": 2.751500405515004e-06, "loss": 0.6115, "step": 17288 }, { "epoch": 0.5047735832530437, "grad_norm": 0.7979919585903106, "learning_rate": 2.751338199513382e-06, "loss": 0.6936, "step": 17289 }, { "epoch": 0.504802779480891, "grad_norm": 0.6753289926602818, "learning_rate": 2.75117599351176e-06, "loss": 0.5841, "step": 17290 }, { "epoch": 0.5048319757087384, "grad_norm": 0.6807565026182145, "learning_rate": 2.7510137875101384e-06, "loss": 0.5907, "step": 17291 }, { "epoch": 0.5048611719365858, "grad_norm": 0.7532704707254233, "learning_rate": 2.7508515815085165e-06, "loss": 0.7175, "step": 17292 }, { "epoch": 0.5048903681644331, "grad_norm": 0.7174434458897028, "learning_rate": 2.750689375506894e-06, "loss": 0.6537, "step": 17293 }, { "epoch": 0.5049195643922805, "grad_norm": 0.7119800966077848, "learning_rate": 2.750527169505272e-06, "loss": 0.6084, "step": 17294 }, { "epoch": 0.5049487606201278, "grad_norm": 0.6939488248920417, "learning_rate": 2.75036496350365e-06, "loss": 0.5567, "step": 17295 }, { "epoch": 0.5049779568479752, "grad_norm": 0.769387855394945, "learning_rate": 2.750202757502028e-06, "loss": 0.7954, "step": 17296 }, { "epoch": 0.5050071530758226, "grad_norm": 0.7484616001138997, "learning_rate": 2.7500405515004056e-06, "loss": 0.698, "step": 17297 }, { "epoch": 0.5050363493036699, "grad_norm": 0.6862429664413353, "learning_rate": 2.7498783454987836e-06, "loss": 0.5849, "step": 17298 }, { "epoch": 0.5050655455315173, "grad_norm": 0.6550022720204234, "learning_rate": 2.7497161394971617e-06, "loss": 0.5613, "step": 17299 }, { "epoch": 0.5050947417593646, "grad_norm": 0.6811303992310417, "learning_rate": 2.7495539334955397e-06, "loss": 0.5358, "step": 17300 }, { "epoch": 0.505123937987212, "grad_norm": 0.702438215797314, "learning_rate": 2.7493917274939177e-06, "loss": 0.6342, "step": 17301 }, { "epoch": 0.5051531342150594, "grad_norm": 0.7487046345451636, "learning_rate": 2.7492295214922952e-06, "loss": 0.6569, "step": 17302 }, { "epoch": 0.5051823304429067, "grad_norm": 0.6914928279442005, "learning_rate": 2.7490673154906733e-06, "loss": 0.5867, "step": 17303 }, { "epoch": 0.5052115266707541, "grad_norm": 0.7334146571921755, "learning_rate": 2.7489051094890513e-06, "loss": 0.6696, "step": 17304 }, { "epoch": 0.5052407228986014, "grad_norm": 0.7735040364922597, "learning_rate": 2.7487429034874293e-06, "loss": 0.7463, "step": 17305 }, { "epoch": 0.5052699191264488, "grad_norm": 0.7336981525024091, "learning_rate": 2.748580697485807e-06, "loss": 0.7127, "step": 17306 }, { "epoch": 0.5052991153542963, "grad_norm": 0.7429716034650189, "learning_rate": 2.748418491484185e-06, "loss": 0.6485, "step": 17307 }, { "epoch": 0.5053283115821436, "grad_norm": 0.6947317465978229, "learning_rate": 2.748256285482563e-06, "loss": 0.5938, "step": 17308 }, { "epoch": 0.505357507809991, "grad_norm": 0.7461583467995484, "learning_rate": 2.748094079480941e-06, "loss": 0.6693, "step": 17309 }, { "epoch": 0.5053867040378384, "grad_norm": 0.7603128141291415, "learning_rate": 2.7479318734793193e-06, "loss": 0.6965, "step": 17310 }, { "epoch": 0.5054159002656857, "grad_norm": 0.7488037637484335, "learning_rate": 2.7477696674776973e-06, "loss": 0.7107, "step": 17311 }, { "epoch": 0.5054450964935331, "grad_norm": 0.670494717120095, "learning_rate": 2.747607461476075e-06, "loss": 0.5559, "step": 17312 }, { "epoch": 0.5054742927213804, "grad_norm": 0.7175011216717652, "learning_rate": 2.747445255474453e-06, "loss": 0.6001, "step": 17313 }, { "epoch": 0.5055034889492278, "grad_norm": 0.7866074303937787, "learning_rate": 2.747283049472831e-06, "loss": 0.6397, "step": 17314 }, { "epoch": 0.5055326851770752, "grad_norm": 0.715719440105846, "learning_rate": 2.747120843471209e-06, "loss": 0.6433, "step": 17315 }, { "epoch": 0.5055618814049225, "grad_norm": 0.7934506385695138, "learning_rate": 2.7469586374695865e-06, "loss": 0.7115, "step": 17316 }, { "epoch": 0.5055910776327699, "grad_norm": 0.799172117678399, "learning_rate": 2.7467964314679645e-06, "loss": 0.6621, "step": 17317 }, { "epoch": 0.5056202738606173, "grad_norm": 0.7160124030120099, "learning_rate": 2.7466342254663425e-06, "loss": 0.5969, "step": 17318 }, { "epoch": 0.5056494700884646, "grad_norm": 0.7227672567481753, "learning_rate": 2.7464720194647205e-06, "loss": 0.6402, "step": 17319 }, { "epoch": 0.505678666316312, "grad_norm": 0.8572367341825621, "learning_rate": 2.7463098134630985e-06, "loss": 0.6949, "step": 17320 }, { "epoch": 0.5057078625441593, "grad_norm": 0.7894979039760361, "learning_rate": 2.746147607461476e-06, "loss": 0.6805, "step": 17321 }, { "epoch": 0.5057370587720067, "grad_norm": 0.754471439951482, "learning_rate": 2.745985401459854e-06, "loss": 0.6518, "step": 17322 }, { "epoch": 0.5057662549998541, "grad_norm": 0.7451192431499185, "learning_rate": 2.745823195458232e-06, "loss": 0.5955, "step": 17323 }, { "epoch": 0.5057954512277014, "grad_norm": 0.7034902584278472, "learning_rate": 2.74566098945661e-06, "loss": 0.6117, "step": 17324 }, { "epoch": 0.5058246474555488, "grad_norm": 0.8304010387199143, "learning_rate": 2.7454987834549877e-06, "loss": 0.6429, "step": 17325 }, { "epoch": 0.5058538436833961, "grad_norm": 0.7522659336053967, "learning_rate": 2.7453365774533657e-06, "loss": 0.6524, "step": 17326 }, { "epoch": 0.5058830399112435, "grad_norm": 0.8051893365267695, "learning_rate": 2.7451743714517437e-06, "loss": 0.6869, "step": 17327 }, { "epoch": 0.5059122361390909, "grad_norm": 0.7495670608053114, "learning_rate": 2.7450121654501217e-06, "loss": 0.6956, "step": 17328 }, { "epoch": 0.5059414323669382, "grad_norm": 0.7405912027959539, "learning_rate": 2.7448499594485e-06, "loss": 0.7134, "step": 17329 }, { "epoch": 0.5059706285947856, "grad_norm": 0.7449944089214006, "learning_rate": 2.744687753446878e-06, "loss": 0.6878, "step": 17330 }, { "epoch": 0.505999824822633, "grad_norm": 0.7184319406940658, "learning_rate": 2.7445255474452558e-06, "loss": 0.619, "step": 17331 }, { "epoch": 0.5060290210504803, "grad_norm": 0.6924392596601955, "learning_rate": 2.7443633414436338e-06, "loss": 0.6378, "step": 17332 }, { "epoch": 0.5060582172783277, "grad_norm": 0.7162286784098244, "learning_rate": 2.7442011354420118e-06, "loss": 0.632, "step": 17333 }, { "epoch": 0.506087413506175, "grad_norm": 1.0336008892895314, "learning_rate": 2.7440389294403898e-06, "loss": 0.6828, "step": 17334 }, { "epoch": 0.5061166097340224, "grad_norm": 0.7155451702527457, "learning_rate": 2.7438767234387674e-06, "loss": 0.6009, "step": 17335 }, { "epoch": 0.5061458059618698, "grad_norm": 0.774301385781221, "learning_rate": 2.7437145174371454e-06, "loss": 0.7138, "step": 17336 }, { "epoch": 0.5061750021897171, "grad_norm": 0.7242479362318239, "learning_rate": 2.7435523114355234e-06, "loss": 0.6375, "step": 17337 }, { "epoch": 0.5062041984175645, "grad_norm": 0.7879710320649043, "learning_rate": 2.7433901054339014e-06, "loss": 0.7207, "step": 17338 }, { "epoch": 0.5062333946454118, "grad_norm": 0.684396772651667, "learning_rate": 2.7432278994322794e-06, "loss": 0.5998, "step": 17339 }, { "epoch": 0.5062625908732592, "grad_norm": 0.714156898679866, "learning_rate": 2.743065693430657e-06, "loss": 0.6082, "step": 17340 }, { "epoch": 0.5062917871011066, "grad_norm": 0.7631110885961074, "learning_rate": 2.742903487429035e-06, "loss": 0.6489, "step": 17341 }, { "epoch": 0.5063209833289539, "grad_norm": 0.7978775178618311, "learning_rate": 2.742741281427413e-06, "loss": 0.698, "step": 17342 }, { "epoch": 0.5063501795568013, "grad_norm": 0.7291898942981679, "learning_rate": 2.742579075425791e-06, "loss": 0.604, "step": 17343 }, { "epoch": 0.5063793757846486, "grad_norm": 0.6970955265065063, "learning_rate": 2.7424168694241686e-06, "loss": 0.5806, "step": 17344 }, { "epoch": 0.506408572012496, "grad_norm": 0.6550866601378751, "learning_rate": 2.7422546634225466e-06, "loss": 0.5551, "step": 17345 }, { "epoch": 0.5064377682403434, "grad_norm": 0.7160255139895932, "learning_rate": 2.7420924574209246e-06, "loss": 0.6293, "step": 17346 }, { "epoch": 0.5064669644681907, "grad_norm": 0.7531440262851893, "learning_rate": 2.7419302514193026e-06, "loss": 0.579, "step": 17347 }, { "epoch": 0.5064961606960381, "grad_norm": 0.7290395903641262, "learning_rate": 2.741768045417681e-06, "loss": 0.6702, "step": 17348 }, { "epoch": 0.5065253569238855, "grad_norm": 0.760991484616723, "learning_rate": 2.741605839416059e-06, "loss": 0.7043, "step": 17349 }, { "epoch": 0.5065545531517328, "grad_norm": 0.7068030657788082, "learning_rate": 2.7414436334144366e-06, "loss": 0.619, "step": 17350 }, { "epoch": 0.5065837493795802, "grad_norm": 0.7664270608174151, "learning_rate": 2.7412814274128146e-06, "loss": 0.7107, "step": 17351 }, { "epoch": 0.5066129456074275, "grad_norm": 0.7596548646499054, "learning_rate": 2.7411192214111926e-06, "loss": 0.6508, "step": 17352 }, { "epoch": 0.5066421418352749, "grad_norm": 0.6836217205369621, "learning_rate": 2.7409570154095706e-06, "loss": 0.5421, "step": 17353 }, { "epoch": 0.5066713380631223, "grad_norm": 0.810824822863645, "learning_rate": 2.7407948094079482e-06, "loss": 0.7073, "step": 17354 }, { "epoch": 0.5067005342909696, "grad_norm": 0.7633098839429995, "learning_rate": 2.7406326034063262e-06, "loss": 0.7009, "step": 17355 }, { "epoch": 0.506729730518817, "grad_norm": 0.7269436587560784, "learning_rate": 2.7404703974047042e-06, "loss": 0.6636, "step": 17356 }, { "epoch": 0.5067589267466643, "grad_norm": 0.6942145797541427, "learning_rate": 2.7403081914030822e-06, "loss": 0.5708, "step": 17357 }, { "epoch": 0.5067881229745117, "grad_norm": 0.7508800267279009, "learning_rate": 2.7401459854014602e-06, "loss": 0.7085, "step": 17358 }, { "epoch": 0.5068173192023591, "grad_norm": 0.7597015525520313, "learning_rate": 2.739983779399838e-06, "loss": 0.722, "step": 17359 }, { "epoch": 0.5068465154302064, "grad_norm": 0.6797246883127467, "learning_rate": 2.739821573398216e-06, "loss": 0.5467, "step": 17360 }, { "epoch": 0.5068757116580538, "grad_norm": 0.7573291321526211, "learning_rate": 2.739659367396594e-06, "loss": 0.7073, "step": 17361 }, { "epoch": 0.5069049078859011, "grad_norm": 0.7469656304836506, "learning_rate": 2.739497161394972e-06, "loss": 0.6936, "step": 17362 }, { "epoch": 0.5069341041137485, "grad_norm": 0.7502572645782679, "learning_rate": 2.7393349553933494e-06, "loss": 0.6823, "step": 17363 }, { "epoch": 0.5069633003415959, "grad_norm": 0.690278433230884, "learning_rate": 2.7391727493917274e-06, "loss": 0.6087, "step": 17364 }, { "epoch": 0.5069924965694432, "grad_norm": 0.7169786542266049, "learning_rate": 2.7390105433901054e-06, "loss": 0.6503, "step": 17365 }, { "epoch": 0.5070216927972906, "grad_norm": 0.7413416132124923, "learning_rate": 2.7388483373884834e-06, "loss": 0.6709, "step": 17366 }, { "epoch": 0.507050889025138, "grad_norm": 0.7520492899354998, "learning_rate": 2.738686131386862e-06, "loss": 0.7124, "step": 17367 }, { "epoch": 0.5070800852529853, "grad_norm": 0.7854237606121722, "learning_rate": 2.73852392538524e-06, "loss": 0.6654, "step": 17368 }, { "epoch": 0.5071092814808327, "grad_norm": 0.7130310783650953, "learning_rate": 2.7383617193836175e-06, "loss": 0.6313, "step": 17369 }, { "epoch": 0.50713847770868, "grad_norm": 0.6932655135460041, "learning_rate": 2.7381995133819955e-06, "loss": 0.6127, "step": 17370 }, { "epoch": 0.5071676739365274, "grad_norm": 0.7203637028311927, "learning_rate": 2.7380373073803735e-06, "loss": 0.6263, "step": 17371 }, { "epoch": 0.5071968701643748, "grad_norm": 0.7552277738466581, "learning_rate": 2.7378751013787515e-06, "loss": 0.6997, "step": 17372 }, { "epoch": 0.5072260663922221, "grad_norm": 0.8649793502239114, "learning_rate": 2.737712895377129e-06, "loss": 0.701, "step": 17373 }, { "epoch": 0.5072552626200695, "grad_norm": 0.7913944000286176, "learning_rate": 2.737550689375507e-06, "loss": 0.6484, "step": 17374 }, { "epoch": 0.5072844588479168, "grad_norm": 0.7725927617912073, "learning_rate": 2.737388483373885e-06, "loss": 0.6855, "step": 17375 }, { "epoch": 0.5073136550757642, "grad_norm": 0.7008905518485102, "learning_rate": 2.737226277372263e-06, "loss": 0.6287, "step": 17376 }, { "epoch": 0.5073428513036116, "grad_norm": 0.6813345992171841, "learning_rate": 2.737064071370641e-06, "loss": 0.6294, "step": 17377 }, { "epoch": 0.5073720475314589, "grad_norm": 0.7783937333171941, "learning_rate": 2.7369018653690187e-06, "loss": 0.7098, "step": 17378 }, { "epoch": 0.5074012437593063, "grad_norm": 0.7091162911022132, "learning_rate": 2.7367396593673967e-06, "loss": 0.6252, "step": 17379 }, { "epoch": 0.5074304399871536, "grad_norm": 0.7003611463416902, "learning_rate": 2.7365774533657747e-06, "loss": 0.6272, "step": 17380 }, { "epoch": 0.507459636215001, "grad_norm": 1.0352561454766314, "learning_rate": 2.7364152473641527e-06, "loss": 0.7248, "step": 17381 }, { "epoch": 0.5074888324428484, "grad_norm": 0.7202835572453934, "learning_rate": 2.7362530413625303e-06, "loss": 0.6405, "step": 17382 }, { "epoch": 0.5075180286706957, "grad_norm": 0.6791925688829381, "learning_rate": 2.7360908353609083e-06, "loss": 0.5811, "step": 17383 }, { "epoch": 0.5075472248985431, "grad_norm": 0.6900410502259154, "learning_rate": 2.7359286293592863e-06, "loss": 0.578, "step": 17384 }, { "epoch": 0.5075764211263905, "grad_norm": 0.726508573322743, "learning_rate": 2.7357664233576643e-06, "loss": 0.6729, "step": 17385 }, { "epoch": 0.5076056173542378, "grad_norm": 0.7099494063394126, "learning_rate": 2.7356042173560427e-06, "loss": 0.6058, "step": 17386 }, { "epoch": 0.5076348135820852, "grad_norm": 0.7553322610875278, "learning_rate": 2.7354420113544207e-06, "loss": 0.5931, "step": 17387 }, { "epoch": 0.5076640098099325, "grad_norm": 0.7183732708240987, "learning_rate": 2.7352798053527983e-06, "loss": 0.6694, "step": 17388 }, { "epoch": 0.5076932060377799, "grad_norm": 0.725778155397169, "learning_rate": 2.7351175993511763e-06, "loss": 0.6405, "step": 17389 }, { "epoch": 0.5077224022656273, "grad_norm": 0.7423293693345215, "learning_rate": 2.7349553933495543e-06, "loss": 0.6484, "step": 17390 }, { "epoch": 0.5077515984934746, "grad_norm": 0.7140230075027633, "learning_rate": 2.7347931873479323e-06, "loss": 0.6203, "step": 17391 }, { "epoch": 0.507780794721322, "grad_norm": 0.736885352219822, "learning_rate": 2.73463098134631e-06, "loss": 0.6096, "step": 17392 }, { "epoch": 0.5078099909491693, "grad_norm": 0.7431341556479117, "learning_rate": 2.734468775344688e-06, "loss": 0.7087, "step": 17393 }, { "epoch": 0.5078391871770167, "grad_norm": 0.7167990791648062, "learning_rate": 2.734306569343066e-06, "loss": 0.6143, "step": 17394 }, { "epoch": 0.5078683834048641, "grad_norm": 0.7106520674891019, "learning_rate": 2.734144363341444e-06, "loss": 0.6192, "step": 17395 }, { "epoch": 0.5078975796327114, "grad_norm": 0.7494851156145016, "learning_rate": 2.733982157339822e-06, "loss": 0.67, "step": 17396 }, { "epoch": 0.5079267758605588, "grad_norm": 0.7597017456957955, "learning_rate": 2.7338199513381995e-06, "loss": 0.7528, "step": 17397 }, { "epoch": 0.5079559720884061, "grad_norm": 0.6570955428750352, "learning_rate": 2.7336577453365775e-06, "loss": 0.5792, "step": 17398 }, { "epoch": 0.5079851683162535, "grad_norm": 0.7186561351596447, "learning_rate": 2.7334955393349556e-06, "loss": 0.6194, "step": 17399 }, { "epoch": 0.5080143645441009, "grad_norm": 0.709994455063839, "learning_rate": 2.7333333333333336e-06, "loss": 0.5828, "step": 17400 }, { "epoch": 0.5080435607719482, "grad_norm": 0.6723230397555053, "learning_rate": 2.733171127331711e-06, "loss": 0.5623, "step": 17401 }, { "epoch": 0.5080727569997956, "grad_norm": 0.7190172381602662, "learning_rate": 2.733008921330089e-06, "loss": 0.6714, "step": 17402 }, { "epoch": 0.508101953227643, "grad_norm": 0.7674138129091622, "learning_rate": 2.732846715328467e-06, "loss": 0.6939, "step": 17403 }, { "epoch": 0.5081311494554903, "grad_norm": 0.740821053700951, "learning_rate": 2.7326845093268456e-06, "loss": 0.6597, "step": 17404 }, { "epoch": 0.5081603456833377, "grad_norm": 0.7675238475516505, "learning_rate": 2.7325223033252236e-06, "loss": 0.6854, "step": 17405 }, { "epoch": 0.508189541911185, "grad_norm": 0.6971631323299131, "learning_rate": 2.7323600973236016e-06, "loss": 0.6357, "step": 17406 }, { "epoch": 0.5082187381390324, "grad_norm": 0.6526236602652646, "learning_rate": 2.732197891321979e-06, "loss": 0.5431, "step": 17407 }, { "epoch": 0.5082479343668798, "grad_norm": 0.7246510104948063, "learning_rate": 2.732035685320357e-06, "loss": 0.6461, "step": 17408 }, { "epoch": 0.5082771305947271, "grad_norm": 0.7187757735521924, "learning_rate": 2.731873479318735e-06, "loss": 0.6353, "step": 17409 }, { "epoch": 0.5083063268225745, "grad_norm": 1.3792703860709017, "learning_rate": 2.731711273317113e-06, "loss": 0.7554, "step": 17410 }, { "epoch": 0.5083355230504218, "grad_norm": 0.7923018206377934, "learning_rate": 2.731549067315491e-06, "loss": 0.695, "step": 17411 }, { "epoch": 0.5083647192782692, "grad_norm": 0.8723889952765097, "learning_rate": 2.731386861313869e-06, "loss": 0.7894, "step": 17412 }, { "epoch": 0.5083939155061166, "grad_norm": 0.7691384933615459, "learning_rate": 2.731224655312247e-06, "loss": 0.7458, "step": 17413 }, { "epoch": 0.5084231117339639, "grad_norm": 0.7055747483224483, "learning_rate": 2.731062449310625e-06, "loss": 0.6038, "step": 17414 }, { "epoch": 0.5084523079618113, "grad_norm": 0.6899569708722804, "learning_rate": 2.730900243309003e-06, "loss": 0.6263, "step": 17415 }, { "epoch": 0.5084815041896587, "grad_norm": 0.7229188553530066, "learning_rate": 2.7307380373073804e-06, "loss": 0.6484, "step": 17416 }, { "epoch": 0.508510700417506, "grad_norm": 0.7275433797780082, "learning_rate": 2.7305758313057584e-06, "loss": 0.6371, "step": 17417 }, { "epoch": 0.5085398966453534, "grad_norm": 0.7399178018383701, "learning_rate": 2.7304136253041364e-06, "loss": 0.6091, "step": 17418 }, { "epoch": 0.5085690928732007, "grad_norm": 0.7841124817107492, "learning_rate": 2.7302514193025144e-06, "loss": 0.6934, "step": 17419 }, { "epoch": 0.5085982891010481, "grad_norm": 0.7074526979788933, "learning_rate": 2.730089213300892e-06, "loss": 0.6092, "step": 17420 }, { "epoch": 0.5086274853288955, "grad_norm": 0.7496251539423809, "learning_rate": 2.72992700729927e-06, "loss": 0.6969, "step": 17421 }, { "epoch": 0.5086566815567428, "grad_norm": 0.7619927142738293, "learning_rate": 2.729764801297648e-06, "loss": 0.7087, "step": 17422 }, { "epoch": 0.5086858777845902, "grad_norm": 0.7595618995082845, "learning_rate": 2.7296025952960264e-06, "loss": 0.621, "step": 17423 }, { "epoch": 0.5087150740124375, "grad_norm": 0.6754551193863123, "learning_rate": 2.7294403892944045e-06, "loss": 0.6029, "step": 17424 }, { "epoch": 0.5087442702402849, "grad_norm": 0.7725637676813415, "learning_rate": 2.7292781832927825e-06, "loss": 0.7118, "step": 17425 }, { "epoch": 0.5087734664681323, "grad_norm": 0.7233978278906251, "learning_rate": 2.72911597729116e-06, "loss": 0.5829, "step": 17426 }, { "epoch": 0.5088026626959796, "grad_norm": 0.7214141751124102, "learning_rate": 2.728953771289538e-06, "loss": 0.6317, "step": 17427 }, { "epoch": 0.5088318589238271, "grad_norm": 0.7572262907250418, "learning_rate": 2.728791565287916e-06, "loss": 0.6344, "step": 17428 }, { "epoch": 0.5088610551516745, "grad_norm": 0.7325548437736372, "learning_rate": 2.728629359286294e-06, "loss": 0.6602, "step": 17429 }, { "epoch": 0.5088902513795218, "grad_norm": 0.7312068678376412, "learning_rate": 2.7284671532846716e-06, "loss": 0.6332, "step": 17430 }, { "epoch": 0.5089194476073692, "grad_norm": 0.7339686816456895, "learning_rate": 2.7283049472830497e-06, "loss": 0.6641, "step": 17431 }, { "epoch": 0.5089486438352165, "grad_norm": 0.769771819173121, "learning_rate": 2.7281427412814277e-06, "loss": 0.7147, "step": 17432 }, { "epoch": 0.5089778400630639, "grad_norm": 0.7968812718222037, "learning_rate": 2.7279805352798057e-06, "loss": 0.7173, "step": 17433 }, { "epoch": 0.5090070362909113, "grad_norm": 0.7201368744644236, "learning_rate": 2.7278183292781833e-06, "loss": 0.6658, "step": 17434 }, { "epoch": 0.5090362325187586, "grad_norm": 0.7045735338461852, "learning_rate": 2.7276561232765613e-06, "loss": 0.6373, "step": 17435 }, { "epoch": 0.509065428746606, "grad_norm": 0.7034188782488778, "learning_rate": 2.7274939172749393e-06, "loss": 0.6406, "step": 17436 }, { "epoch": 0.5090946249744533, "grad_norm": 0.8115285306889528, "learning_rate": 2.7273317112733173e-06, "loss": 0.7338, "step": 17437 }, { "epoch": 0.5091238212023007, "grad_norm": 0.718921807243604, "learning_rate": 2.7271695052716953e-06, "loss": 0.6455, "step": 17438 }, { "epoch": 0.5091530174301481, "grad_norm": 0.7602234467910629, "learning_rate": 2.727007299270073e-06, "loss": 0.7235, "step": 17439 }, { "epoch": 0.5091822136579954, "grad_norm": 0.7578046155843164, "learning_rate": 2.726845093268451e-06, "loss": 0.5827, "step": 17440 }, { "epoch": 0.5092114098858428, "grad_norm": 0.6649531037924002, "learning_rate": 2.726682887266829e-06, "loss": 0.5568, "step": 17441 }, { "epoch": 0.5092406061136902, "grad_norm": 0.8331674092315812, "learning_rate": 2.7265206812652073e-06, "loss": 0.6842, "step": 17442 }, { "epoch": 0.5092698023415375, "grad_norm": 0.7078916010292934, "learning_rate": 2.7263584752635853e-06, "loss": 0.6483, "step": 17443 }, { "epoch": 0.5092989985693849, "grad_norm": 0.7125349287221246, "learning_rate": 2.7261962692619633e-06, "loss": 0.6022, "step": 17444 }, { "epoch": 0.5093281947972322, "grad_norm": 0.7184567280254068, "learning_rate": 2.726034063260341e-06, "loss": 0.6313, "step": 17445 }, { "epoch": 0.5093573910250796, "grad_norm": 0.7424834908001057, "learning_rate": 2.725871857258719e-06, "loss": 0.6427, "step": 17446 }, { "epoch": 0.509386587252927, "grad_norm": 0.7677439273789942, "learning_rate": 2.725709651257097e-06, "loss": 0.6469, "step": 17447 }, { "epoch": 0.5094157834807743, "grad_norm": 0.7423632327826606, "learning_rate": 2.725547445255475e-06, "loss": 0.7036, "step": 17448 }, { "epoch": 0.5094449797086217, "grad_norm": 0.7007152475062104, "learning_rate": 2.7253852392538525e-06, "loss": 0.588, "step": 17449 }, { "epoch": 0.509474175936469, "grad_norm": 0.7793600456261163, "learning_rate": 2.7252230332522305e-06, "loss": 0.7184, "step": 17450 }, { "epoch": 0.5095033721643164, "grad_norm": 0.7496063856819903, "learning_rate": 2.7250608272506085e-06, "loss": 0.6366, "step": 17451 }, { "epoch": 0.5095325683921638, "grad_norm": 0.7412566514902617, "learning_rate": 2.7248986212489865e-06, "loss": 0.6876, "step": 17452 }, { "epoch": 0.5095617646200111, "grad_norm": 0.710352821984041, "learning_rate": 2.724736415247364e-06, "loss": 0.6378, "step": 17453 }, { "epoch": 0.5095909608478585, "grad_norm": 0.7200158748530143, "learning_rate": 2.724574209245742e-06, "loss": 0.5955, "step": 17454 }, { "epoch": 0.5096201570757058, "grad_norm": 0.7705631882658506, "learning_rate": 2.72441200324412e-06, "loss": 0.7119, "step": 17455 }, { "epoch": 0.5096493533035532, "grad_norm": 0.7450195506397321, "learning_rate": 2.724249797242498e-06, "loss": 0.694, "step": 17456 }, { "epoch": 0.5096785495314006, "grad_norm": 0.7234342546122465, "learning_rate": 2.724087591240876e-06, "loss": 0.6093, "step": 17457 }, { "epoch": 0.5097077457592479, "grad_norm": 0.6703877213819852, "learning_rate": 2.7239253852392537e-06, "loss": 0.5462, "step": 17458 }, { "epoch": 0.5097369419870953, "grad_norm": 0.862674780497516, "learning_rate": 2.7237631792376317e-06, "loss": 0.739, "step": 17459 }, { "epoch": 0.5097661382149427, "grad_norm": 0.6682054911534574, "learning_rate": 2.7236009732360097e-06, "loss": 0.5911, "step": 17460 }, { "epoch": 0.50979533444279, "grad_norm": 0.7692407054686847, "learning_rate": 2.723438767234388e-06, "loss": 0.6665, "step": 17461 }, { "epoch": 0.5098245306706374, "grad_norm": 0.7601391379089755, "learning_rate": 2.723276561232766e-06, "loss": 0.6946, "step": 17462 }, { "epoch": 0.5098537268984847, "grad_norm": 0.7689141228919866, "learning_rate": 2.723114355231144e-06, "loss": 0.7041, "step": 17463 }, { "epoch": 0.5098829231263321, "grad_norm": 0.8155077338240526, "learning_rate": 2.7229521492295218e-06, "loss": 0.6802, "step": 17464 }, { "epoch": 0.5099121193541795, "grad_norm": 0.7388579802030736, "learning_rate": 2.7227899432278998e-06, "loss": 0.639, "step": 17465 }, { "epoch": 0.5099413155820268, "grad_norm": 0.7533344790296718, "learning_rate": 2.7226277372262778e-06, "loss": 0.6793, "step": 17466 }, { "epoch": 0.5099705118098742, "grad_norm": 0.7048171785701809, "learning_rate": 2.7224655312246558e-06, "loss": 0.6213, "step": 17467 }, { "epoch": 0.5099997080377215, "grad_norm": 0.7334939203758342, "learning_rate": 2.7223033252230334e-06, "loss": 0.7148, "step": 17468 }, { "epoch": 0.5100289042655689, "grad_norm": 0.699920517051568, "learning_rate": 2.7221411192214114e-06, "loss": 0.6147, "step": 17469 }, { "epoch": 0.5100581004934163, "grad_norm": 0.7210207005423956, "learning_rate": 2.7219789132197894e-06, "loss": 0.655, "step": 17470 }, { "epoch": 0.5100872967212636, "grad_norm": 0.7433365369584403, "learning_rate": 2.7218167072181674e-06, "loss": 0.6878, "step": 17471 }, { "epoch": 0.510116492949111, "grad_norm": 0.7658147774547234, "learning_rate": 2.721654501216545e-06, "loss": 0.709, "step": 17472 }, { "epoch": 0.5101456891769584, "grad_norm": 0.7767962110032973, "learning_rate": 2.721492295214923e-06, "loss": 0.7023, "step": 17473 }, { "epoch": 0.5101748854048057, "grad_norm": 0.7159766347132271, "learning_rate": 2.721330089213301e-06, "loss": 0.6859, "step": 17474 }, { "epoch": 0.5102040816326531, "grad_norm": 0.797605103718692, "learning_rate": 2.721167883211679e-06, "loss": 0.7362, "step": 17475 }, { "epoch": 0.5102332778605004, "grad_norm": 0.6985708399778205, "learning_rate": 2.721005677210057e-06, "loss": 0.608, "step": 17476 }, { "epoch": 0.5102624740883478, "grad_norm": 0.7179683477824306, "learning_rate": 2.7208434712084346e-06, "loss": 0.6199, "step": 17477 }, { "epoch": 0.5102916703161952, "grad_norm": 0.6914866133216585, "learning_rate": 2.7206812652068126e-06, "loss": 0.641, "step": 17478 }, { "epoch": 0.5103208665440425, "grad_norm": 0.7274638323511172, "learning_rate": 2.7205190592051906e-06, "loss": 0.6112, "step": 17479 }, { "epoch": 0.5103500627718899, "grad_norm": 0.6877857618439169, "learning_rate": 2.720356853203569e-06, "loss": 0.6138, "step": 17480 }, { "epoch": 0.5103792589997372, "grad_norm": 0.7106991702087154, "learning_rate": 2.720194647201947e-06, "loss": 0.5986, "step": 17481 }, { "epoch": 0.5104084552275846, "grad_norm": 0.6981096940930769, "learning_rate": 2.720032441200325e-06, "loss": 0.6062, "step": 17482 }, { "epoch": 0.510437651455432, "grad_norm": 0.7509778895528985, "learning_rate": 2.7198702351987026e-06, "loss": 0.6046, "step": 17483 }, { "epoch": 0.5104668476832793, "grad_norm": 0.7670388998270061, "learning_rate": 2.7197080291970806e-06, "loss": 0.7254, "step": 17484 }, { "epoch": 0.5104960439111267, "grad_norm": 0.7945099407478144, "learning_rate": 2.7195458231954586e-06, "loss": 0.7151, "step": 17485 }, { "epoch": 0.510525240138974, "grad_norm": 0.733687665993145, "learning_rate": 2.7193836171938366e-06, "loss": 0.6654, "step": 17486 }, { "epoch": 0.5105544363668214, "grad_norm": 0.6916342639269251, "learning_rate": 2.7192214111922142e-06, "loss": 0.5989, "step": 17487 }, { "epoch": 0.5105836325946688, "grad_norm": 0.7741734090959504, "learning_rate": 2.7190592051905922e-06, "loss": 0.6634, "step": 17488 }, { "epoch": 0.5106128288225161, "grad_norm": 0.7967969277885889, "learning_rate": 2.7188969991889702e-06, "loss": 0.8231, "step": 17489 }, { "epoch": 0.5106420250503635, "grad_norm": 0.6989816473206086, "learning_rate": 2.7187347931873482e-06, "loss": 0.651, "step": 17490 }, { "epoch": 0.5106712212782109, "grad_norm": 0.7830677464082043, "learning_rate": 2.718572587185726e-06, "loss": 0.6122, "step": 17491 }, { "epoch": 0.5107004175060582, "grad_norm": 0.7208056729804102, "learning_rate": 2.718410381184104e-06, "loss": 0.6313, "step": 17492 }, { "epoch": 0.5107296137339056, "grad_norm": 0.7199544142398231, "learning_rate": 2.718248175182482e-06, "loss": 0.6891, "step": 17493 }, { "epoch": 0.5107588099617529, "grad_norm": 0.7283103282724873, "learning_rate": 2.71808596918086e-06, "loss": 0.6626, "step": 17494 }, { "epoch": 0.5107880061896003, "grad_norm": 0.6945502365439978, "learning_rate": 2.717923763179238e-06, "loss": 0.6126, "step": 17495 }, { "epoch": 0.5108172024174477, "grad_norm": 0.7579523228865417, "learning_rate": 2.7177615571776154e-06, "loss": 0.7379, "step": 17496 }, { "epoch": 0.510846398645295, "grad_norm": 0.6892678916273941, "learning_rate": 2.7175993511759934e-06, "loss": 0.5949, "step": 17497 }, { "epoch": 0.5108755948731424, "grad_norm": 0.7353328564600248, "learning_rate": 2.7174371451743715e-06, "loss": 0.6646, "step": 17498 }, { "epoch": 0.5109047911009897, "grad_norm": 0.6987861694824198, "learning_rate": 2.71727493917275e-06, "loss": 0.6422, "step": 17499 }, { "epoch": 0.5109339873288371, "grad_norm": 0.7274318240814444, "learning_rate": 2.717112733171128e-06, "loss": 0.6619, "step": 17500 }, { "epoch": 0.5109631835566845, "grad_norm": 0.669747496852898, "learning_rate": 2.716950527169506e-06, "loss": 0.567, "step": 17501 }, { "epoch": 0.5109923797845318, "grad_norm": 0.7133148599109788, "learning_rate": 2.7167883211678835e-06, "loss": 0.6506, "step": 17502 }, { "epoch": 0.5110215760123792, "grad_norm": 0.7715389772998063, "learning_rate": 2.7166261151662615e-06, "loss": 0.7592, "step": 17503 }, { "epoch": 0.5110507722402265, "grad_norm": 0.6487601176159619, "learning_rate": 2.7164639091646395e-06, "loss": 0.5772, "step": 17504 }, { "epoch": 0.5110799684680739, "grad_norm": 0.7160046540112126, "learning_rate": 2.7163017031630175e-06, "loss": 0.6498, "step": 17505 }, { "epoch": 0.5111091646959213, "grad_norm": 0.7209754212513577, "learning_rate": 2.716139497161395e-06, "loss": 0.618, "step": 17506 }, { "epoch": 0.5111383609237686, "grad_norm": 0.7435547524590717, "learning_rate": 2.715977291159773e-06, "loss": 0.6707, "step": 17507 }, { "epoch": 0.511167557151616, "grad_norm": 0.6816725025722549, "learning_rate": 2.715815085158151e-06, "loss": 0.6281, "step": 17508 }, { "epoch": 0.5111967533794634, "grad_norm": 0.7299054537915196, "learning_rate": 2.715652879156529e-06, "loss": 0.679, "step": 17509 }, { "epoch": 0.5112259496073107, "grad_norm": 0.7009162368532409, "learning_rate": 2.7154906731549067e-06, "loss": 0.6144, "step": 17510 }, { "epoch": 0.5112551458351581, "grad_norm": 0.7028768976743763, "learning_rate": 2.7153284671532847e-06, "loss": 0.6405, "step": 17511 }, { "epoch": 0.5112843420630054, "grad_norm": 0.6470874397117321, "learning_rate": 2.7151662611516627e-06, "loss": 0.5654, "step": 17512 }, { "epoch": 0.5113135382908528, "grad_norm": 0.6848236693495183, "learning_rate": 2.7150040551500407e-06, "loss": 0.5868, "step": 17513 }, { "epoch": 0.5113427345187002, "grad_norm": 0.6952879610791387, "learning_rate": 2.7148418491484187e-06, "loss": 0.6052, "step": 17514 }, { "epoch": 0.5113719307465475, "grad_norm": 0.65958927125932, "learning_rate": 2.7146796431467963e-06, "loss": 0.5468, "step": 17515 }, { "epoch": 0.5114011269743949, "grad_norm": 0.71404332566045, "learning_rate": 2.7145174371451743e-06, "loss": 0.6111, "step": 17516 }, { "epoch": 0.5114303232022422, "grad_norm": 0.7108943534748373, "learning_rate": 2.7143552311435523e-06, "loss": 0.5787, "step": 17517 }, { "epoch": 0.5114595194300896, "grad_norm": 0.7176746677385062, "learning_rate": 2.7141930251419307e-06, "loss": 0.6182, "step": 17518 }, { "epoch": 0.511488715657937, "grad_norm": 0.6531532761218043, "learning_rate": 2.7140308191403087e-06, "loss": 0.5486, "step": 17519 }, { "epoch": 0.5115179118857843, "grad_norm": 0.6893547295599307, "learning_rate": 2.7138686131386868e-06, "loss": 0.625, "step": 17520 }, { "epoch": 0.5115471081136317, "grad_norm": 0.7793788112612747, "learning_rate": 2.7137064071370643e-06, "loss": 0.7498, "step": 17521 }, { "epoch": 0.511576304341479, "grad_norm": 0.7152700517974196, "learning_rate": 2.7135442011354423e-06, "loss": 0.6406, "step": 17522 }, { "epoch": 0.5116055005693264, "grad_norm": 0.7684911187486626, "learning_rate": 2.7133819951338204e-06, "loss": 0.6785, "step": 17523 }, { "epoch": 0.5116346967971738, "grad_norm": 0.895014033512249, "learning_rate": 2.7132197891321984e-06, "loss": 0.6885, "step": 17524 }, { "epoch": 0.5116638930250211, "grad_norm": 0.6755695880985807, "learning_rate": 2.713057583130576e-06, "loss": 0.5975, "step": 17525 }, { "epoch": 0.5116930892528685, "grad_norm": 0.6837552180402217, "learning_rate": 2.712895377128954e-06, "loss": 0.6085, "step": 17526 }, { "epoch": 0.5117222854807159, "grad_norm": 0.7456287395518654, "learning_rate": 2.712733171127332e-06, "loss": 0.734, "step": 17527 }, { "epoch": 0.5117514817085632, "grad_norm": 0.7105903236657816, "learning_rate": 2.71257096512571e-06, "loss": 0.6385, "step": 17528 }, { "epoch": 0.5117806779364106, "grad_norm": 0.7285082664370225, "learning_rate": 2.7124087591240875e-06, "loss": 0.6534, "step": 17529 }, { "epoch": 0.5118098741642579, "grad_norm": 0.687626347391626, "learning_rate": 2.7122465531224656e-06, "loss": 0.6194, "step": 17530 }, { "epoch": 0.5118390703921053, "grad_norm": 0.7531486265285254, "learning_rate": 2.7120843471208436e-06, "loss": 0.6815, "step": 17531 }, { "epoch": 0.5118682666199527, "grad_norm": 0.69076884498514, "learning_rate": 2.7119221411192216e-06, "loss": 0.5528, "step": 17532 }, { "epoch": 0.5118974628478, "grad_norm": 0.7213860169080255, "learning_rate": 2.7117599351175996e-06, "loss": 0.6448, "step": 17533 }, { "epoch": 0.5119266590756474, "grad_norm": 0.7566377290288764, "learning_rate": 2.711597729115977e-06, "loss": 0.6665, "step": 17534 }, { "epoch": 0.5119558553034947, "grad_norm": 0.7590115000589047, "learning_rate": 2.711435523114355e-06, "loss": 0.6746, "step": 17535 }, { "epoch": 0.5119850515313421, "grad_norm": 0.7527399952613025, "learning_rate": 2.711273317112733e-06, "loss": 0.7249, "step": 17536 }, { "epoch": 0.5120142477591895, "grad_norm": 0.7528439933331306, "learning_rate": 2.7111111111111116e-06, "loss": 0.6679, "step": 17537 }, { "epoch": 0.5120434439870368, "grad_norm": 0.7559201320397136, "learning_rate": 2.7109489051094896e-06, "loss": 0.6792, "step": 17538 }, { "epoch": 0.5120726402148842, "grad_norm": 0.713253265733396, "learning_rate": 2.7107866991078676e-06, "loss": 0.5968, "step": 17539 }, { "epoch": 0.5121018364427316, "grad_norm": 0.7253819823446638, "learning_rate": 2.710624493106245e-06, "loss": 0.6377, "step": 17540 }, { "epoch": 0.5121310326705789, "grad_norm": 0.6513280796994643, "learning_rate": 2.710462287104623e-06, "loss": 0.5341, "step": 17541 }, { "epoch": 0.5121602288984263, "grad_norm": 0.7873773077495466, "learning_rate": 2.710300081103001e-06, "loss": 0.687, "step": 17542 }, { "epoch": 0.5121894251262736, "grad_norm": 0.7102926185757843, "learning_rate": 2.7101378751013792e-06, "loss": 0.6401, "step": 17543 }, { "epoch": 0.512218621354121, "grad_norm": 0.777087384743426, "learning_rate": 2.709975669099757e-06, "loss": 0.6807, "step": 17544 }, { "epoch": 0.5122478175819684, "grad_norm": 0.7163078285998735, "learning_rate": 2.709813463098135e-06, "loss": 0.617, "step": 17545 }, { "epoch": 0.5122770138098157, "grad_norm": 0.7373717452748487, "learning_rate": 2.709651257096513e-06, "loss": 0.6797, "step": 17546 }, { "epoch": 0.5123062100376631, "grad_norm": 0.713544110533389, "learning_rate": 2.709489051094891e-06, "loss": 0.6065, "step": 17547 }, { "epoch": 0.5123354062655106, "grad_norm": 0.7604157330619407, "learning_rate": 2.7093268450932684e-06, "loss": 0.6889, "step": 17548 }, { "epoch": 0.5123646024933579, "grad_norm": 0.7793751431397923, "learning_rate": 2.7091646390916464e-06, "loss": 0.6266, "step": 17549 }, { "epoch": 0.5123937987212053, "grad_norm": 0.7654482844596366, "learning_rate": 2.7090024330900244e-06, "loss": 0.6627, "step": 17550 }, { "epoch": 0.5124229949490526, "grad_norm": 0.6553847222559103, "learning_rate": 2.7088402270884024e-06, "loss": 0.5247, "step": 17551 }, { "epoch": 0.5124521911769, "grad_norm": 0.7457692756310138, "learning_rate": 2.7086780210867804e-06, "loss": 0.7101, "step": 17552 }, { "epoch": 0.5124813874047474, "grad_norm": 0.7940916014391267, "learning_rate": 2.708515815085158e-06, "loss": 0.6803, "step": 17553 }, { "epoch": 0.5125105836325947, "grad_norm": 0.7297030365763847, "learning_rate": 2.708353609083536e-06, "loss": 0.6665, "step": 17554 }, { "epoch": 0.5125397798604421, "grad_norm": 0.8015381136311196, "learning_rate": 2.7081914030819145e-06, "loss": 0.6521, "step": 17555 }, { "epoch": 0.5125689760882894, "grad_norm": 0.7179789139163656, "learning_rate": 2.7080291970802925e-06, "loss": 0.6261, "step": 17556 }, { "epoch": 0.5125981723161368, "grad_norm": 0.7692813549227068, "learning_rate": 2.7078669910786705e-06, "loss": 0.6627, "step": 17557 }, { "epoch": 0.5126273685439842, "grad_norm": 0.695418991183472, "learning_rate": 2.707704785077048e-06, "loss": 0.5981, "step": 17558 }, { "epoch": 0.5126565647718315, "grad_norm": 0.7014620172670479, "learning_rate": 2.707542579075426e-06, "loss": 0.6111, "step": 17559 }, { "epoch": 0.5126857609996789, "grad_norm": 0.7266040863596895, "learning_rate": 2.707380373073804e-06, "loss": 0.6836, "step": 17560 }, { "epoch": 0.5127149572275262, "grad_norm": 0.6797873411393569, "learning_rate": 2.707218167072182e-06, "loss": 0.5914, "step": 17561 }, { "epoch": 0.5127441534553736, "grad_norm": 0.6959492374074604, "learning_rate": 2.70705596107056e-06, "loss": 0.594, "step": 17562 }, { "epoch": 0.512773349683221, "grad_norm": 0.7155293184552559, "learning_rate": 2.7068937550689377e-06, "loss": 0.5583, "step": 17563 }, { "epoch": 0.5128025459110683, "grad_norm": 0.7289921663048172, "learning_rate": 2.7067315490673157e-06, "loss": 0.6797, "step": 17564 }, { "epoch": 0.5128317421389157, "grad_norm": 0.7479449979657065, "learning_rate": 2.7065693430656937e-06, "loss": 0.6592, "step": 17565 }, { "epoch": 0.512860938366763, "grad_norm": 0.7069500383444892, "learning_rate": 2.7064071370640717e-06, "loss": 0.6375, "step": 17566 }, { "epoch": 0.5128901345946104, "grad_norm": 0.7564524492866038, "learning_rate": 2.7062449310624493e-06, "loss": 0.7223, "step": 17567 }, { "epoch": 0.5129193308224578, "grad_norm": 0.7029391446633535, "learning_rate": 2.7060827250608273e-06, "loss": 0.6184, "step": 17568 }, { "epoch": 0.5129485270503051, "grad_norm": 0.7362309186031692, "learning_rate": 2.7059205190592053e-06, "loss": 0.6542, "step": 17569 }, { "epoch": 0.5129777232781525, "grad_norm": 0.7374564107998242, "learning_rate": 2.7057583130575833e-06, "loss": 0.6748, "step": 17570 }, { "epoch": 0.5130069195059999, "grad_norm": 0.7387524816396824, "learning_rate": 2.7055961070559613e-06, "loss": 0.6152, "step": 17571 }, { "epoch": 0.5130361157338472, "grad_norm": 0.6814040053330906, "learning_rate": 2.705433901054339e-06, "loss": 0.6014, "step": 17572 }, { "epoch": 0.5130653119616946, "grad_norm": 0.6656947722683138, "learning_rate": 2.705271695052717e-06, "loss": 0.5609, "step": 17573 }, { "epoch": 0.5130945081895419, "grad_norm": 0.7608770458217856, "learning_rate": 2.7051094890510953e-06, "loss": 0.7144, "step": 17574 }, { "epoch": 0.5131237044173893, "grad_norm": 0.7435193567967442, "learning_rate": 2.7049472830494733e-06, "loss": 0.6753, "step": 17575 }, { "epoch": 0.5131529006452367, "grad_norm": 0.7740696027675247, "learning_rate": 2.7047850770478513e-06, "loss": 0.6781, "step": 17576 }, { "epoch": 0.513182096873084, "grad_norm": 0.6902087039327102, "learning_rate": 2.704622871046229e-06, "loss": 0.6366, "step": 17577 }, { "epoch": 0.5132112931009314, "grad_norm": 0.7168269533504743, "learning_rate": 2.704460665044607e-06, "loss": 0.6597, "step": 17578 }, { "epoch": 0.5132404893287787, "grad_norm": 0.6523697177739801, "learning_rate": 2.704298459042985e-06, "loss": 0.5509, "step": 17579 }, { "epoch": 0.5132696855566261, "grad_norm": 0.7265602011341112, "learning_rate": 2.704136253041363e-06, "loss": 0.6741, "step": 17580 }, { "epoch": 0.5132988817844735, "grad_norm": 0.7320006946446062, "learning_rate": 2.703974047039741e-06, "loss": 0.712, "step": 17581 }, { "epoch": 0.5133280780123208, "grad_norm": 0.7083620098487616, "learning_rate": 2.7038118410381185e-06, "loss": 0.5948, "step": 17582 }, { "epoch": 0.5133572742401682, "grad_norm": 0.7423163416434649, "learning_rate": 2.7036496350364965e-06, "loss": 0.6012, "step": 17583 }, { "epoch": 0.5133864704680156, "grad_norm": 0.6988145552451689, "learning_rate": 2.7034874290348745e-06, "loss": 0.6188, "step": 17584 }, { "epoch": 0.5134156666958629, "grad_norm": 0.7364419634614618, "learning_rate": 2.7033252230332525e-06, "loss": 0.6517, "step": 17585 }, { "epoch": 0.5134448629237103, "grad_norm": 0.6947736415175746, "learning_rate": 2.70316301703163e-06, "loss": 0.6045, "step": 17586 }, { "epoch": 0.5134740591515576, "grad_norm": 0.7934372731567171, "learning_rate": 2.703000811030008e-06, "loss": 0.6804, "step": 17587 }, { "epoch": 0.513503255379405, "grad_norm": 0.6758311730573399, "learning_rate": 2.702838605028386e-06, "loss": 0.5779, "step": 17588 }, { "epoch": 0.5135324516072524, "grad_norm": 0.7063677172926556, "learning_rate": 2.702676399026764e-06, "loss": 0.6439, "step": 17589 }, { "epoch": 0.5135616478350997, "grad_norm": 0.7753230567301316, "learning_rate": 2.702514193025142e-06, "loss": 0.6721, "step": 17590 }, { "epoch": 0.5135908440629471, "grad_norm": 0.7844552647097414, "learning_rate": 2.7023519870235197e-06, "loss": 0.7262, "step": 17591 }, { "epoch": 0.5136200402907944, "grad_norm": 1.0951156032233909, "learning_rate": 2.7021897810218977e-06, "loss": 0.609, "step": 17592 }, { "epoch": 0.5136492365186418, "grad_norm": 0.727576105580227, "learning_rate": 2.702027575020276e-06, "loss": 0.6458, "step": 17593 }, { "epoch": 0.5136784327464892, "grad_norm": 0.7777375201622687, "learning_rate": 2.701865369018654e-06, "loss": 0.6711, "step": 17594 }, { "epoch": 0.5137076289743365, "grad_norm": 0.7182242943796017, "learning_rate": 2.701703163017032e-06, "loss": 0.6278, "step": 17595 }, { "epoch": 0.5137368252021839, "grad_norm": 0.7465727656438316, "learning_rate": 2.7015409570154098e-06, "loss": 0.6274, "step": 17596 }, { "epoch": 0.5137660214300313, "grad_norm": 0.7159494802011208, "learning_rate": 2.7013787510137878e-06, "loss": 0.6076, "step": 17597 }, { "epoch": 0.5137952176578786, "grad_norm": 0.7484853229151182, "learning_rate": 2.7012165450121658e-06, "loss": 0.7015, "step": 17598 }, { "epoch": 0.513824413885726, "grad_norm": 0.6894217621357646, "learning_rate": 2.7010543390105438e-06, "loss": 0.5715, "step": 17599 }, { "epoch": 0.5138536101135733, "grad_norm": 0.6557071938080467, "learning_rate": 2.700892133008922e-06, "loss": 0.5595, "step": 17600 }, { "epoch": 0.5138828063414207, "grad_norm": 0.7451641656846293, "learning_rate": 2.7007299270072994e-06, "loss": 0.6436, "step": 17601 }, { "epoch": 0.5139120025692681, "grad_norm": 0.8232586830531202, "learning_rate": 2.7005677210056774e-06, "loss": 0.7525, "step": 17602 }, { "epoch": 0.5139411987971154, "grad_norm": 0.6934274982782958, "learning_rate": 2.7004055150040554e-06, "loss": 0.598, "step": 17603 }, { "epoch": 0.5139703950249628, "grad_norm": 0.7975119089353282, "learning_rate": 2.7002433090024334e-06, "loss": 0.7231, "step": 17604 }, { "epoch": 0.5139995912528101, "grad_norm": 0.6950107822915291, "learning_rate": 2.700081103000811e-06, "loss": 0.5769, "step": 17605 }, { "epoch": 0.5140287874806575, "grad_norm": 0.7246459428692901, "learning_rate": 2.699918896999189e-06, "loss": 0.6403, "step": 17606 }, { "epoch": 0.5140579837085049, "grad_norm": 0.7362707772193448, "learning_rate": 2.699756690997567e-06, "loss": 0.625, "step": 17607 }, { "epoch": 0.5140871799363522, "grad_norm": 0.7056341154607475, "learning_rate": 2.699594484995945e-06, "loss": 0.585, "step": 17608 }, { "epoch": 0.5141163761641996, "grad_norm": 0.7124402496149115, "learning_rate": 2.699432278994323e-06, "loss": 0.6036, "step": 17609 }, { "epoch": 0.514145572392047, "grad_norm": 0.7413134362460283, "learning_rate": 2.6992700729927006e-06, "loss": 0.6426, "step": 17610 }, { "epoch": 0.5141747686198943, "grad_norm": 0.7992539113497685, "learning_rate": 2.6991078669910786e-06, "loss": 0.6826, "step": 17611 }, { "epoch": 0.5142039648477417, "grad_norm": 0.7469851029998774, "learning_rate": 2.698945660989457e-06, "loss": 0.6732, "step": 17612 }, { "epoch": 0.514233161075589, "grad_norm": 0.7356610505098775, "learning_rate": 2.698783454987835e-06, "loss": 0.5719, "step": 17613 }, { "epoch": 0.5142623573034364, "grad_norm": 0.7594618748214792, "learning_rate": 2.698621248986213e-06, "loss": 0.6872, "step": 17614 }, { "epoch": 0.5142915535312838, "grad_norm": 0.7523222575741452, "learning_rate": 2.6984590429845906e-06, "loss": 0.6568, "step": 17615 }, { "epoch": 0.5143207497591311, "grad_norm": 0.6870902609575753, "learning_rate": 2.6982968369829686e-06, "loss": 0.6223, "step": 17616 }, { "epoch": 0.5143499459869785, "grad_norm": 0.7479291356970436, "learning_rate": 2.6981346309813466e-06, "loss": 0.6345, "step": 17617 }, { "epoch": 0.5143791422148258, "grad_norm": 0.7021756392264845, "learning_rate": 2.6979724249797246e-06, "loss": 0.6064, "step": 17618 }, { "epoch": 0.5144083384426732, "grad_norm": 0.740305020939116, "learning_rate": 2.6978102189781027e-06, "loss": 0.6636, "step": 17619 }, { "epoch": 0.5144375346705206, "grad_norm": 0.6662893956786102, "learning_rate": 2.6976480129764802e-06, "loss": 0.5701, "step": 17620 }, { "epoch": 0.5144667308983679, "grad_norm": 0.7421947015602913, "learning_rate": 2.6974858069748582e-06, "loss": 0.7567, "step": 17621 }, { "epoch": 0.5144959271262153, "grad_norm": 0.6865257948207427, "learning_rate": 2.6973236009732362e-06, "loss": 0.5899, "step": 17622 }, { "epoch": 0.5145251233540626, "grad_norm": 0.723792603253982, "learning_rate": 2.6971613949716143e-06, "loss": 0.6525, "step": 17623 }, { "epoch": 0.51455431958191, "grad_norm": 0.8014265692596246, "learning_rate": 2.696999188969992e-06, "loss": 0.6225, "step": 17624 }, { "epoch": 0.5145835158097574, "grad_norm": 0.8081493432108449, "learning_rate": 2.69683698296837e-06, "loss": 0.7692, "step": 17625 }, { "epoch": 0.5146127120376047, "grad_norm": 0.7081756318714275, "learning_rate": 2.696674776966748e-06, "loss": 0.5694, "step": 17626 }, { "epoch": 0.5146419082654521, "grad_norm": 0.7678923045773001, "learning_rate": 2.696512570965126e-06, "loss": 0.616, "step": 17627 }, { "epoch": 0.5146711044932994, "grad_norm": 0.7348368232329405, "learning_rate": 2.696350364963504e-06, "loss": 0.625, "step": 17628 }, { "epoch": 0.5147003007211468, "grad_norm": 0.702400947507153, "learning_rate": 2.6961881589618814e-06, "loss": 0.5966, "step": 17629 }, { "epoch": 0.5147294969489942, "grad_norm": 0.7044747937831022, "learning_rate": 2.6960259529602595e-06, "loss": 0.65, "step": 17630 }, { "epoch": 0.5147586931768415, "grad_norm": 0.7200329409016926, "learning_rate": 2.695863746958638e-06, "loss": 0.6594, "step": 17631 }, { "epoch": 0.5147878894046889, "grad_norm": 0.7048830062562709, "learning_rate": 2.695701540957016e-06, "loss": 0.5888, "step": 17632 }, { "epoch": 0.5148170856325363, "grad_norm": 0.6915428329607093, "learning_rate": 2.695539334955394e-06, "loss": 0.6017, "step": 17633 }, { "epoch": 0.5148462818603836, "grad_norm": 0.7076210914754179, "learning_rate": 2.6953771289537715e-06, "loss": 0.6115, "step": 17634 }, { "epoch": 0.514875478088231, "grad_norm": 0.7181136404727168, "learning_rate": 2.6952149229521495e-06, "loss": 0.628, "step": 17635 }, { "epoch": 0.5149046743160783, "grad_norm": 0.7098697163762248, "learning_rate": 2.6950527169505275e-06, "loss": 0.6191, "step": 17636 }, { "epoch": 0.5149338705439257, "grad_norm": 0.7291836104457756, "learning_rate": 2.6948905109489055e-06, "loss": 0.6538, "step": 17637 }, { "epoch": 0.5149630667717731, "grad_norm": 0.7229225386253403, "learning_rate": 2.6947283049472835e-06, "loss": 0.6279, "step": 17638 }, { "epoch": 0.5149922629996204, "grad_norm": 0.7483780800700667, "learning_rate": 2.694566098945661e-06, "loss": 0.7006, "step": 17639 }, { "epoch": 0.5150214592274678, "grad_norm": 0.7429270237351836, "learning_rate": 2.694403892944039e-06, "loss": 0.6622, "step": 17640 }, { "epoch": 0.5150506554553151, "grad_norm": 0.7364962868293896, "learning_rate": 2.694241686942417e-06, "loss": 0.6838, "step": 17641 }, { "epoch": 0.5150798516831625, "grad_norm": 0.777739245426361, "learning_rate": 2.694079480940795e-06, "loss": 0.6618, "step": 17642 }, { "epoch": 0.5151090479110099, "grad_norm": 0.7390582748379466, "learning_rate": 2.6939172749391727e-06, "loss": 0.6565, "step": 17643 }, { "epoch": 0.5151382441388572, "grad_norm": 0.6607449519578219, "learning_rate": 2.6937550689375507e-06, "loss": 0.5781, "step": 17644 }, { "epoch": 0.5151674403667046, "grad_norm": 0.7227771468942346, "learning_rate": 2.6935928629359287e-06, "loss": 0.6109, "step": 17645 }, { "epoch": 0.515196636594552, "grad_norm": 0.7066225131747214, "learning_rate": 2.6934306569343067e-06, "loss": 0.6017, "step": 17646 }, { "epoch": 0.5152258328223993, "grad_norm": 0.6628505270939151, "learning_rate": 2.6932684509326847e-06, "loss": 0.5933, "step": 17647 }, { "epoch": 0.5152550290502467, "grad_norm": 0.7472164634818763, "learning_rate": 2.6931062449310623e-06, "loss": 0.6134, "step": 17648 }, { "epoch": 0.515284225278094, "grad_norm": 0.7409466777566254, "learning_rate": 2.6929440389294403e-06, "loss": 0.6857, "step": 17649 }, { "epoch": 0.5153134215059414, "grad_norm": 0.7426028371326853, "learning_rate": 2.6927818329278187e-06, "loss": 0.6641, "step": 17650 }, { "epoch": 0.5153426177337888, "grad_norm": 0.750256137309561, "learning_rate": 2.6926196269261968e-06, "loss": 0.6517, "step": 17651 }, { "epoch": 0.5153718139616361, "grad_norm": 0.6984976980947285, "learning_rate": 2.6924574209245748e-06, "loss": 0.6013, "step": 17652 }, { "epoch": 0.5154010101894835, "grad_norm": 0.6641114403627861, "learning_rate": 2.6922952149229523e-06, "loss": 0.5345, "step": 17653 }, { "epoch": 0.5154302064173308, "grad_norm": 0.7262480711357756, "learning_rate": 2.6921330089213303e-06, "loss": 0.683, "step": 17654 }, { "epoch": 0.5154594026451782, "grad_norm": 0.7950269488969145, "learning_rate": 2.6919708029197084e-06, "loss": 0.5901, "step": 17655 }, { "epoch": 0.5154885988730256, "grad_norm": 0.7770487486800736, "learning_rate": 2.6918085969180864e-06, "loss": 0.7058, "step": 17656 }, { "epoch": 0.5155177951008729, "grad_norm": 0.7788532151408352, "learning_rate": 2.6916463909164644e-06, "loss": 0.6787, "step": 17657 }, { "epoch": 0.5155469913287203, "grad_norm": 0.7180259344665644, "learning_rate": 2.691484184914842e-06, "loss": 0.6619, "step": 17658 }, { "epoch": 0.5155761875565676, "grad_norm": 0.7043175932080443, "learning_rate": 2.69132197891322e-06, "loss": 0.6367, "step": 17659 }, { "epoch": 0.515605383784415, "grad_norm": 0.774465322544456, "learning_rate": 2.691159772911598e-06, "loss": 0.7403, "step": 17660 }, { "epoch": 0.5156345800122624, "grad_norm": 0.7089548519571318, "learning_rate": 2.690997566909976e-06, "loss": 0.5996, "step": 17661 }, { "epoch": 0.5156637762401097, "grad_norm": 0.7210326997196964, "learning_rate": 2.6908353609083536e-06, "loss": 0.6734, "step": 17662 }, { "epoch": 0.5156929724679571, "grad_norm": 0.6839910631324883, "learning_rate": 2.6906731549067316e-06, "loss": 0.609, "step": 17663 }, { "epoch": 0.5157221686958045, "grad_norm": 0.8145323866635013, "learning_rate": 2.6905109489051096e-06, "loss": 0.6791, "step": 17664 }, { "epoch": 0.5157513649236518, "grad_norm": 0.746975292974843, "learning_rate": 2.6903487429034876e-06, "loss": 0.6508, "step": 17665 }, { "epoch": 0.5157805611514992, "grad_norm": 0.7092696788712046, "learning_rate": 2.6901865369018656e-06, "loss": 0.6218, "step": 17666 }, { "epoch": 0.5158097573793465, "grad_norm": 0.76863731954156, "learning_rate": 2.690024330900243e-06, "loss": 0.6427, "step": 17667 }, { "epoch": 0.5158389536071939, "grad_norm": 0.7622398035724184, "learning_rate": 2.689862124898621e-06, "loss": 0.7602, "step": 17668 }, { "epoch": 0.5158681498350414, "grad_norm": 0.7022543583383629, "learning_rate": 2.6896999188969996e-06, "loss": 0.6085, "step": 17669 }, { "epoch": 0.5158973460628887, "grad_norm": 0.7285120879615491, "learning_rate": 2.6895377128953776e-06, "loss": 0.6428, "step": 17670 }, { "epoch": 0.5159265422907361, "grad_norm": 0.7419632992552482, "learning_rate": 2.6893755068937556e-06, "loss": 0.6006, "step": 17671 }, { "epoch": 0.5159557385185835, "grad_norm": 0.6849370262559096, "learning_rate": 2.689213300892133e-06, "loss": 0.5572, "step": 17672 }, { "epoch": 0.5159849347464308, "grad_norm": 0.804601909541206, "learning_rate": 2.689051094890511e-06, "loss": 0.7443, "step": 17673 }, { "epoch": 0.5160141309742782, "grad_norm": 0.7419104243595335, "learning_rate": 2.6888888888888892e-06, "loss": 0.6957, "step": 17674 }, { "epoch": 0.5160433272021255, "grad_norm": 0.7592598989840966, "learning_rate": 2.6887266828872672e-06, "loss": 0.7041, "step": 17675 }, { "epoch": 0.5160725234299729, "grad_norm": 0.6743521182945387, "learning_rate": 2.6885644768856452e-06, "loss": 0.5908, "step": 17676 }, { "epoch": 0.5161017196578203, "grad_norm": 0.6949272633245335, "learning_rate": 2.688402270884023e-06, "loss": 0.576, "step": 17677 }, { "epoch": 0.5161309158856676, "grad_norm": 0.7236319192218691, "learning_rate": 2.688240064882401e-06, "loss": 0.6103, "step": 17678 }, { "epoch": 0.516160112113515, "grad_norm": 0.7741061126903241, "learning_rate": 2.688077858880779e-06, "loss": 0.7285, "step": 17679 }, { "epoch": 0.5161893083413623, "grad_norm": 0.7562781816004732, "learning_rate": 2.687915652879157e-06, "loss": 0.7086, "step": 17680 }, { "epoch": 0.5162185045692097, "grad_norm": 0.685414668670076, "learning_rate": 2.6877534468775344e-06, "loss": 0.574, "step": 17681 }, { "epoch": 0.5162477007970571, "grad_norm": 0.665229806388238, "learning_rate": 2.6875912408759124e-06, "loss": 0.5993, "step": 17682 }, { "epoch": 0.5162768970249044, "grad_norm": 0.6855580805818507, "learning_rate": 2.6874290348742904e-06, "loss": 0.5953, "step": 17683 }, { "epoch": 0.5163060932527518, "grad_norm": 0.7013835407483472, "learning_rate": 2.6872668288726684e-06, "loss": 0.6124, "step": 17684 }, { "epoch": 0.5163352894805991, "grad_norm": 0.699431637106106, "learning_rate": 2.6871046228710464e-06, "loss": 0.6599, "step": 17685 }, { "epoch": 0.5163644857084465, "grad_norm": 0.7340189056207457, "learning_rate": 2.686942416869424e-06, "loss": 0.6404, "step": 17686 }, { "epoch": 0.5163936819362939, "grad_norm": 0.712870596188047, "learning_rate": 2.686780210867802e-06, "loss": 0.6544, "step": 17687 }, { "epoch": 0.5164228781641412, "grad_norm": 0.7398473878588736, "learning_rate": 2.6866180048661805e-06, "loss": 0.6858, "step": 17688 }, { "epoch": 0.5164520743919886, "grad_norm": 0.6945503161833871, "learning_rate": 2.6864557988645585e-06, "loss": 0.5973, "step": 17689 }, { "epoch": 0.516481270619836, "grad_norm": 0.726848152621568, "learning_rate": 2.6862935928629365e-06, "loss": 0.6605, "step": 17690 }, { "epoch": 0.5165104668476833, "grad_norm": 0.7832155677074595, "learning_rate": 2.686131386861314e-06, "loss": 0.6947, "step": 17691 }, { "epoch": 0.5165396630755307, "grad_norm": 0.6332276759216201, "learning_rate": 2.685969180859692e-06, "loss": 0.5123, "step": 17692 }, { "epoch": 0.516568859303378, "grad_norm": 0.6986791858089956, "learning_rate": 2.68580697485807e-06, "loss": 0.6038, "step": 17693 }, { "epoch": 0.5165980555312254, "grad_norm": 0.6966657570236183, "learning_rate": 2.685644768856448e-06, "loss": 0.6027, "step": 17694 }, { "epoch": 0.5166272517590728, "grad_norm": 0.7267658173849049, "learning_rate": 2.685482562854826e-06, "loss": 0.6546, "step": 17695 }, { "epoch": 0.5166564479869201, "grad_norm": 0.747599661532569, "learning_rate": 2.6853203568532037e-06, "loss": 0.6484, "step": 17696 }, { "epoch": 0.5166856442147675, "grad_norm": 0.8000829596963224, "learning_rate": 2.6851581508515817e-06, "loss": 0.7145, "step": 17697 }, { "epoch": 0.5167148404426148, "grad_norm": 0.7055853095468857, "learning_rate": 2.6849959448499597e-06, "loss": 0.6425, "step": 17698 }, { "epoch": 0.5167440366704622, "grad_norm": 0.712864529588757, "learning_rate": 2.6848337388483377e-06, "loss": 0.6272, "step": 17699 }, { "epoch": 0.5167732328983096, "grad_norm": 0.7775881133386132, "learning_rate": 2.6846715328467153e-06, "loss": 0.7324, "step": 17700 }, { "epoch": 0.5168024291261569, "grad_norm": 0.7473500892164541, "learning_rate": 2.6845093268450933e-06, "loss": 0.7144, "step": 17701 }, { "epoch": 0.5168316253540043, "grad_norm": 0.7046684690504484, "learning_rate": 2.6843471208434713e-06, "loss": 0.6299, "step": 17702 }, { "epoch": 0.5168608215818516, "grad_norm": 0.8740430796402746, "learning_rate": 2.6841849148418493e-06, "loss": 0.6551, "step": 17703 }, { "epoch": 0.516890017809699, "grad_norm": 0.7838627824740069, "learning_rate": 2.684022708840227e-06, "loss": 0.7295, "step": 17704 }, { "epoch": 0.5169192140375464, "grad_norm": 0.8010463918015595, "learning_rate": 2.683860502838605e-06, "loss": 0.7256, "step": 17705 }, { "epoch": 0.5169484102653937, "grad_norm": 0.696205062176967, "learning_rate": 2.6836982968369833e-06, "loss": 0.6264, "step": 17706 }, { "epoch": 0.5169776064932411, "grad_norm": 0.7117342016936554, "learning_rate": 2.6835360908353613e-06, "loss": 0.6371, "step": 17707 }, { "epoch": 0.5170068027210885, "grad_norm": 0.7366749326799464, "learning_rate": 2.6833738848337393e-06, "loss": 0.6395, "step": 17708 }, { "epoch": 0.5170359989489358, "grad_norm": 0.7211604299496829, "learning_rate": 2.6832116788321173e-06, "loss": 0.6183, "step": 17709 }, { "epoch": 0.5170651951767832, "grad_norm": 0.734666367941943, "learning_rate": 2.683049472830495e-06, "loss": 0.665, "step": 17710 }, { "epoch": 0.5170943914046305, "grad_norm": 0.7458094883999464, "learning_rate": 2.682887266828873e-06, "loss": 0.615, "step": 17711 }, { "epoch": 0.5171235876324779, "grad_norm": 0.7170519894940248, "learning_rate": 2.682725060827251e-06, "loss": 0.5859, "step": 17712 }, { "epoch": 0.5171527838603253, "grad_norm": 0.787449722863672, "learning_rate": 2.682562854825629e-06, "loss": 0.7177, "step": 17713 }, { "epoch": 0.5171819800881726, "grad_norm": 0.6720229511759611, "learning_rate": 2.682400648824007e-06, "loss": 0.5815, "step": 17714 }, { "epoch": 0.51721117631602, "grad_norm": 0.7046916404369884, "learning_rate": 2.6822384428223845e-06, "loss": 0.6489, "step": 17715 }, { "epoch": 0.5172403725438673, "grad_norm": 0.7138261957998815, "learning_rate": 2.6820762368207625e-06, "loss": 0.6227, "step": 17716 }, { "epoch": 0.5172695687717147, "grad_norm": 0.7412646710746625, "learning_rate": 2.6819140308191405e-06, "loss": 0.6533, "step": 17717 }, { "epoch": 0.5172987649995621, "grad_norm": 0.7870511660884095, "learning_rate": 2.6817518248175185e-06, "loss": 0.7483, "step": 17718 }, { "epoch": 0.5173279612274094, "grad_norm": 0.7484734859975891, "learning_rate": 2.681589618815896e-06, "loss": 0.6956, "step": 17719 }, { "epoch": 0.5173571574552568, "grad_norm": 0.7225568847963543, "learning_rate": 2.681427412814274e-06, "loss": 0.6419, "step": 17720 }, { "epoch": 0.5173863536831041, "grad_norm": 0.6906176857546393, "learning_rate": 2.681265206812652e-06, "loss": 0.5829, "step": 17721 }, { "epoch": 0.5174155499109515, "grad_norm": 0.7050465055621661, "learning_rate": 2.68110300081103e-06, "loss": 0.6081, "step": 17722 }, { "epoch": 0.5174447461387989, "grad_norm": 0.7184367403138169, "learning_rate": 2.6809407948094077e-06, "loss": 0.6126, "step": 17723 }, { "epoch": 0.5174739423666462, "grad_norm": 0.6775911657163781, "learning_rate": 2.6807785888077857e-06, "loss": 0.593, "step": 17724 }, { "epoch": 0.5175031385944936, "grad_norm": 0.7247907136538453, "learning_rate": 2.680616382806164e-06, "loss": 0.6463, "step": 17725 }, { "epoch": 0.517532334822341, "grad_norm": 0.7578427149049196, "learning_rate": 2.680454176804542e-06, "loss": 0.6315, "step": 17726 }, { "epoch": 0.5175615310501883, "grad_norm": 0.7399730653212199, "learning_rate": 2.68029197080292e-06, "loss": 0.5927, "step": 17727 }, { "epoch": 0.5175907272780357, "grad_norm": 0.7748520774735007, "learning_rate": 2.680129764801298e-06, "loss": 0.7452, "step": 17728 }, { "epoch": 0.517619923505883, "grad_norm": 0.748597441836445, "learning_rate": 2.6799675587996758e-06, "loss": 0.6826, "step": 17729 }, { "epoch": 0.5176491197337304, "grad_norm": 0.7655030583639362, "learning_rate": 2.6798053527980538e-06, "loss": 0.6336, "step": 17730 }, { "epoch": 0.5176783159615778, "grad_norm": 0.6743751793736487, "learning_rate": 2.679643146796432e-06, "loss": 0.5952, "step": 17731 }, { "epoch": 0.5177075121894251, "grad_norm": 0.7188437337735928, "learning_rate": 2.67948094079481e-06, "loss": 0.6708, "step": 17732 }, { "epoch": 0.5177367084172725, "grad_norm": 0.7002570779680385, "learning_rate": 2.679318734793188e-06, "loss": 0.5819, "step": 17733 }, { "epoch": 0.5177659046451198, "grad_norm": 0.7275824796233902, "learning_rate": 2.6791565287915654e-06, "loss": 0.6729, "step": 17734 }, { "epoch": 0.5177951008729672, "grad_norm": 0.7051883789899462, "learning_rate": 2.6789943227899434e-06, "loss": 0.6347, "step": 17735 }, { "epoch": 0.5178242971008146, "grad_norm": 0.7581003356407006, "learning_rate": 2.6788321167883214e-06, "loss": 0.7198, "step": 17736 }, { "epoch": 0.5178534933286619, "grad_norm": 0.687375816742277, "learning_rate": 2.6786699107866994e-06, "loss": 0.613, "step": 17737 }, { "epoch": 0.5178826895565093, "grad_norm": 0.76078047690054, "learning_rate": 2.678507704785077e-06, "loss": 0.6812, "step": 17738 }, { "epoch": 0.5179118857843567, "grad_norm": 0.758563032972786, "learning_rate": 2.678345498783455e-06, "loss": 0.6792, "step": 17739 }, { "epoch": 0.517941082012204, "grad_norm": 0.7521918881180802, "learning_rate": 2.678183292781833e-06, "loss": 0.684, "step": 17740 }, { "epoch": 0.5179702782400514, "grad_norm": 0.7489573309066795, "learning_rate": 2.678021086780211e-06, "loss": 0.7041, "step": 17741 }, { "epoch": 0.5179994744678987, "grad_norm": 0.7197049200902328, "learning_rate": 2.6778588807785886e-06, "loss": 0.6793, "step": 17742 }, { "epoch": 0.5180286706957461, "grad_norm": 0.6876549778332679, "learning_rate": 2.6776966747769666e-06, "loss": 0.5836, "step": 17743 }, { "epoch": 0.5180578669235935, "grad_norm": 0.8114241180885019, "learning_rate": 2.677534468775345e-06, "loss": 0.6835, "step": 17744 }, { "epoch": 0.5180870631514408, "grad_norm": 0.6719395837027026, "learning_rate": 2.677372262773723e-06, "loss": 0.5804, "step": 17745 }, { "epoch": 0.5181162593792882, "grad_norm": 0.7400425505432333, "learning_rate": 2.677210056772101e-06, "loss": 0.662, "step": 17746 }, { "epoch": 0.5181454556071355, "grad_norm": 0.769892137590643, "learning_rate": 2.677047850770479e-06, "loss": 0.6447, "step": 17747 }, { "epoch": 0.5181746518349829, "grad_norm": 0.7830555624395401, "learning_rate": 2.6768856447688566e-06, "loss": 0.7205, "step": 17748 }, { "epoch": 0.5182038480628303, "grad_norm": 0.7163207298652049, "learning_rate": 2.6767234387672346e-06, "loss": 0.6255, "step": 17749 }, { "epoch": 0.5182330442906776, "grad_norm": 0.708351677836549, "learning_rate": 2.6765612327656126e-06, "loss": 0.519, "step": 17750 }, { "epoch": 0.518262240518525, "grad_norm": 0.7345296287734174, "learning_rate": 2.6763990267639907e-06, "loss": 0.6672, "step": 17751 }, { "epoch": 0.5182914367463723, "grad_norm": 0.7540424453140324, "learning_rate": 2.6762368207623687e-06, "loss": 0.6229, "step": 17752 }, { "epoch": 0.5183206329742197, "grad_norm": 0.7034223838340992, "learning_rate": 2.6760746147607462e-06, "loss": 0.6066, "step": 17753 }, { "epoch": 0.5183498292020671, "grad_norm": 0.7488275772042549, "learning_rate": 2.6759124087591243e-06, "loss": 0.7483, "step": 17754 }, { "epoch": 0.5183790254299144, "grad_norm": 0.7500255393929599, "learning_rate": 2.6757502027575023e-06, "loss": 0.7199, "step": 17755 }, { "epoch": 0.5184082216577618, "grad_norm": 0.7623589034614837, "learning_rate": 2.6755879967558803e-06, "loss": 0.6487, "step": 17756 }, { "epoch": 0.5184374178856092, "grad_norm": 0.7304646062142298, "learning_rate": 2.675425790754258e-06, "loss": 0.6492, "step": 17757 }, { "epoch": 0.5184666141134565, "grad_norm": 0.7622489767888654, "learning_rate": 2.675263584752636e-06, "loss": 0.6851, "step": 17758 }, { "epoch": 0.5184958103413039, "grad_norm": 0.7267661534950303, "learning_rate": 2.675101378751014e-06, "loss": 0.6515, "step": 17759 }, { "epoch": 0.5185250065691512, "grad_norm": 0.7408155496018747, "learning_rate": 2.674939172749392e-06, "loss": 0.7049, "step": 17760 }, { "epoch": 0.5185542027969986, "grad_norm": 0.7347549697280098, "learning_rate": 2.6747769667477695e-06, "loss": 0.6454, "step": 17761 }, { "epoch": 0.518583399024846, "grad_norm": 0.8393778728615509, "learning_rate": 2.6746147607461475e-06, "loss": 0.638, "step": 17762 }, { "epoch": 0.5186125952526933, "grad_norm": 0.7368095599619999, "learning_rate": 2.674452554744526e-06, "loss": 0.686, "step": 17763 }, { "epoch": 0.5186417914805407, "grad_norm": 0.7198902215698599, "learning_rate": 2.674290348742904e-06, "loss": 0.6516, "step": 17764 }, { "epoch": 0.518670987708388, "grad_norm": 0.6960575612244687, "learning_rate": 2.674128142741282e-06, "loss": 0.5868, "step": 17765 }, { "epoch": 0.5187001839362354, "grad_norm": 0.8013022430933553, "learning_rate": 2.67396593673966e-06, "loss": 0.777, "step": 17766 }, { "epoch": 0.5187293801640828, "grad_norm": 0.7658253343847877, "learning_rate": 2.6738037307380375e-06, "loss": 0.5259, "step": 17767 }, { "epoch": 0.5187585763919301, "grad_norm": 0.7616629695758421, "learning_rate": 2.6736415247364155e-06, "loss": 0.7153, "step": 17768 }, { "epoch": 0.5187877726197775, "grad_norm": 0.7130197691898849, "learning_rate": 2.6734793187347935e-06, "loss": 0.6458, "step": 17769 }, { "epoch": 0.5188169688476248, "grad_norm": 0.7420278222562282, "learning_rate": 2.6733171127331715e-06, "loss": 0.6887, "step": 17770 }, { "epoch": 0.5188461650754722, "grad_norm": 0.7575689873657092, "learning_rate": 2.6731549067315495e-06, "loss": 0.6845, "step": 17771 }, { "epoch": 0.5188753613033196, "grad_norm": 0.8111632399771955, "learning_rate": 2.672992700729927e-06, "loss": 0.7338, "step": 17772 }, { "epoch": 0.5189045575311669, "grad_norm": 0.7190198853118298, "learning_rate": 2.672830494728305e-06, "loss": 0.6347, "step": 17773 }, { "epoch": 0.5189337537590143, "grad_norm": 0.7183566919459442, "learning_rate": 2.672668288726683e-06, "loss": 0.6312, "step": 17774 }, { "epoch": 0.5189629499868617, "grad_norm": 0.7735859055169264, "learning_rate": 2.672506082725061e-06, "loss": 0.7097, "step": 17775 }, { "epoch": 0.518992146214709, "grad_norm": 0.7480207068727664, "learning_rate": 2.6723438767234387e-06, "loss": 0.6592, "step": 17776 }, { "epoch": 0.5190213424425564, "grad_norm": 0.7650962965221098, "learning_rate": 2.6721816707218167e-06, "loss": 0.732, "step": 17777 }, { "epoch": 0.5190505386704037, "grad_norm": 0.8187571491709424, "learning_rate": 2.6720194647201947e-06, "loss": 0.6604, "step": 17778 }, { "epoch": 0.5190797348982511, "grad_norm": 0.7159451484606963, "learning_rate": 2.6718572587185727e-06, "loss": 0.6401, "step": 17779 }, { "epoch": 0.5191089311260985, "grad_norm": 0.6799579997017715, "learning_rate": 2.6716950527169503e-06, "loss": 0.5716, "step": 17780 }, { "epoch": 0.5191381273539458, "grad_norm": 0.7523438415306395, "learning_rate": 2.6715328467153283e-06, "loss": 0.6743, "step": 17781 }, { "epoch": 0.5191673235817932, "grad_norm": 0.7293029974897929, "learning_rate": 2.6713706407137067e-06, "loss": 0.6526, "step": 17782 }, { "epoch": 0.5191965198096405, "grad_norm": 0.7000543707370149, "learning_rate": 2.6712084347120848e-06, "loss": 0.5826, "step": 17783 }, { "epoch": 0.5192257160374879, "grad_norm": 0.7494200901603069, "learning_rate": 2.6710462287104628e-06, "loss": 0.5828, "step": 17784 }, { "epoch": 0.5192549122653353, "grad_norm": 0.7323441268603658, "learning_rate": 2.6708840227088408e-06, "loss": 0.6652, "step": 17785 }, { "epoch": 0.5192841084931826, "grad_norm": 0.7785639808607456, "learning_rate": 2.6707218167072184e-06, "loss": 0.7135, "step": 17786 }, { "epoch": 0.51931330472103, "grad_norm": 0.7450004165199778, "learning_rate": 2.6705596107055964e-06, "loss": 0.6382, "step": 17787 }, { "epoch": 0.5193425009488774, "grad_norm": 0.8002171409933984, "learning_rate": 2.6703974047039744e-06, "loss": 0.7979, "step": 17788 }, { "epoch": 0.5193716971767248, "grad_norm": 0.7315574267135481, "learning_rate": 2.6702351987023524e-06, "loss": 0.6454, "step": 17789 }, { "epoch": 0.5194008934045722, "grad_norm": 0.6857794820593213, "learning_rate": 2.6700729927007304e-06, "loss": 0.5806, "step": 17790 }, { "epoch": 0.5194300896324195, "grad_norm": 0.7967768519303337, "learning_rate": 2.669910786699108e-06, "loss": 0.7514, "step": 17791 }, { "epoch": 0.5194592858602669, "grad_norm": 0.7136493203102118, "learning_rate": 2.669748580697486e-06, "loss": 0.6759, "step": 17792 }, { "epoch": 0.5194884820881143, "grad_norm": 0.7238831216715429, "learning_rate": 2.669586374695864e-06, "loss": 0.6306, "step": 17793 }, { "epoch": 0.5195176783159616, "grad_norm": 0.7844695268983228, "learning_rate": 2.669424168694242e-06, "loss": 0.7668, "step": 17794 }, { "epoch": 0.519546874543809, "grad_norm": 0.7554787944771839, "learning_rate": 2.6692619626926196e-06, "loss": 0.7109, "step": 17795 }, { "epoch": 0.5195760707716564, "grad_norm": 0.7297655855885615, "learning_rate": 2.6690997566909976e-06, "loss": 0.6953, "step": 17796 }, { "epoch": 0.5196052669995037, "grad_norm": 0.8018495510442429, "learning_rate": 2.6689375506893756e-06, "loss": 0.7429, "step": 17797 }, { "epoch": 0.5196344632273511, "grad_norm": 0.7523867882363, "learning_rate": 2.6687753446877536e-06, "loss": 0.653, "step": 17798 }, { "epoch": 0.5196636594551984, "grad_norm": 0.7216771883051798, "learning_rate": 2.668613138686131e-06, "loss": 0.6082, "step": 17799 }, { "epoch": 0.5196928556830458, "grad_norm": 0.7047328037182551, "learning_rate": 2.668450932684509e-06, "loss": 0.635, "step": 17800 }, { "epoch": 0.5197220519108932, "grad_norm": 0.8251352359108096, "learning_rate": 2.6682887266828876e-06, "loss": 0.6557, "step": 17801 }, { "epoch": 0.5197512481387405, "grad_norm": 0.8192838556714664, "learning_rate": 2.6681265206812656e-06, "loss": 0.68, "step": 17802 }, { "epoch": 0.5197804443665879, "grad_norm": 0.7608753765837925, "learning_rate": 2.6679643146796436e-06, "loss": 0.6853, "step": 17803 }, { "epoch": 0.5198096405944352, "grad_norm": 0.7120537410267713, "learning_rate": 2.6678021086780216e-06, "loss": 0.6069, "step": 17804 }, { "epoch": 0.5198388368222826, "grad_norm": 0.7912783602747208, "learning_rate": 2.667639902676399e-06, "loss": 0.7249, "step": 17805 }, { "epoch": 0.51986803305013, "grad_norm": 0.722683634613694, "learning_rate": 2.6674776966747772e-06, "loss": 0.6556, "step": 17806 }, { "epoch": 0.5198972292779773, "grad_norm": 0.716881101457078, "learning_rate": 2.6673154906731552e-06, "loss": 0.6415, "step": 17807 }, { "epoch": 0.5199264255058247, "grad_norm": 0.7355399459925817, "learning_rate": 2.6671532846715332e-06, "loss": 0.6061, "step": 17808 }, { "epoch": 0.519955621733672, "grad_norm": 0.7285660322873131, "learning_rate": 2.6669910786699112e-06, "loss": 0.6212, "step": 17809 }, { "epoch": 0.5199848179615194, "grad_norm": 0.690798744086858, "learning_rate": 2.666828872668289e-06, "loss": 0.621, "step": 17810 }, { "epoch": 0.5200140141893668, "grad_norm": 0.7157843138217014, "learning_rate": 2.666666666666667e-06, "loss": 0.561, "step": 17811 }, { "epoch": 0.5200432104172141, "grad_norm": 0.7566394641297771, "learning_rate": 2.666504460665045e-06, "loss": 0.6414, "step": 17812 }, { "epoch": 0.5200724066450615, "grad_norm": 0.7814393202758129, "learning_rate": 2.666342254663423e-06, "loss": 0.6745, "step": 17813 }, { "epoch": 0.5201016028729089, "grad_norm": 0.7623186110197093, "learning_rate": 2.6661800486618004e-06, "loss": 0.6842, "step": 17814 }, { "epoch": 0.5201307991007562, "grad_norm": 0.733313975985865, "learning_rate": 2.6660178426601784e-06, "loss": 0.6666, "step": 17815 }, { "epoch": 0.5201599953286036, "grad_norm": 0.7308758036201608, "learning_rate": 2.6658556366585564e-06, "loss": 0.6301, "step": 17816 }, { "epoch": 0.5201891915564509, "grad_norm": 0.7727822302689571, "learning_rate": 2.6656934306569344e-06, "loss": 0.631, "step": 17817 }, { "epoch": 0.5202183877842983, "grad_norm": 0.8089132388317685, "learning_rate": 2.665531224655312e-06, "loss": 0.7023, "step": 17818 }, { "epoch": 0.5202475840121457, "grad_norm": 0.7071535699389946, "learning_rate": 2.66536901865369e-06, "loss": 0.6735, "step": 17819 }, { "epoch": 0.520276780239993, "grad_norm": 0.7788658507870365, "learning_rate": 2.6652068126520685e-06, "loss": 0.7018, "step": 17820 }, { "epoch": 0.5203059764678404, "grad_norm": 0.6825072860417865, "learning_rate": 2.6650446066504465e-06, "loss": 0.6143, "step": 17821 }, { "epoch": 0.5203351726956877, "grad_norm": 0.7279928081859178, "learning_rate": 2.6648824006488245e-06, "loss": 0.665, "step": 17822 }, { "epoch": 0.5203643689235351, "grad_norm": 0.7145762728479017, "learning_rate": 2.6647201946472025e-06, "loss": 0.6495, "step": 17823 }, { "epoch": 0.5203935651513825, "grad_norm": 0.7071296263598674, "learning_rate": 2.66455798864558e-06, "loss": 0.645, "step": 17824 }, { "epoch": 0.5204227613792298, "grad_norm": 0.6834078742510397, "learning_rate": 2.664395782643958e-06, "loss": 0.5876, "step": 17825 }, { "epoch": 0.5204519576070772, "grad_norm": 0.7616759953903415, "learning_rate": 2.664233576642336e-06, "loss": 0.707, "step": 17826 }, { "epoch": 0.5204811538349245, "grad_norm": 0.7476357814035373, "learning_rate": 2.664071370640714e-06, "loss": 0.6614, "step": 17827 }, { "epoch": 0.5205103500627719, "grad_norm": 0.7441444720203195, "learning_rate": 2.6639091646390917e-06, "loss": 0.7161, "step": 17828 }, { "epoch": 0.5205395462906193, "grad_norm": 0.7512184252577654, "learning_rate": 2.6637469586374697e-06, "loss": 0.6784, "step": 17829 }, { "epoch": 0.5205687425184666, "grad_norm": 0.6657792166879444, "learning_rate": 2.6635847526358477e-06, "loss": 0.5588, "step": 17830 }, { "epoch": 0.520597938746314, "grad_norm": 0.7365750453031998, "learning_rate": 2.6634225466342257e-06, "loss": 0.6918, "step": 17831 }, { "epoch": 0.5206271349741614, "grad_norm": 0.7443546129630253, "learning_rate": 2.6632603406326037e-06, "loss": 0.6867, "step": 17832 }, { "epoch": 0.5206563312020087, "grad_norm": 0.7286020075575586, "learning_rate": 2.6630981346309813e-06, "loss": 0.5821, "step": 17833 }, { "epoch": 0.5206855274298561, "grad_norm": 0.7256069834962116, "learning_rate": 2.6629359286293593e-06, "loss": 0.6352, "step": 17834 }, { "epoch": 0.5207147236577034, "grad_norm": 0.7823018272017562, "learning_rate": 2.6627737226277373e-06, "loss": 0.7023, "step": 17835 }, { "epoch": 0.5207439198855508, "grad_norm": 0.7343516021372264, "learning_rate": 2.6626115166261153e-06, "loss": 0.6367, "step": 17836 }, { "epoch": 0.5207731161133982, "grad_norm": 0.8083767370273071, "learning_rate": 2.662449310624493e-06, "loss": 0.7011, "step": 17837 }, { "epoch": 0.5208023123412455, "grad_norm": 0.777629255972666, "learning_rate": 2.662287104622871e-06, "loss": 0.7029, "step": 17838 }, { "epoch": 0.5208315085690929, "grad_norm": 0.6987834762391951, "learning_rate": 2.6621248986212493e-06, "loss": 0.608, "step": 17839 }, { "epoch": 0.5208607047969402, "grad_norm": 0.7084079392915601, "learning_rate": 2.6619626926196273e-06, "loss": 0.6594, "step": 17840 }, { "epoch": 0.5208899010247876, "grad_norm": 0.6868780353693362, "learning_rate": 2.6618004866180053e-06, "loss": 0.5971, "step": 17841 }, { "epoch": 0.520919097252635, "grad_norm": 0.8214396450445549, "learning_rate": 2.6616382806163833e-06, "loss": 0.6356, "step": 17842 }, { "epoch": 0.5209482934804823, "grad_norm": 0.6927748317741297, "learning_rate": 2.661476074614761e-06, "loss": 0.6186, "step": 17843 }, { "epoch": 0.5209774897083297, "grad_norm": 0.7857370644832957, "learning_rate": 2.661313868613139e-06, "loss": 0.7521, "step": 17844 }, { "epoch": 0.521006685936177, "grad_norm": 0.7060298171694213, "learning_rate": 2.661151662611517e-06, "loss": 0.6566, "step": 17845 }, { "epoch": 0.5210358821640244, "grad_norm": 0.7353094879902154, "learning_rate": 2.660989456609895e-06, "loss": 0.6698, "step": 17846 }, { "epoch": 0.5210650783918718, "grad_norm": 0.7333400736266492, "learning_rate": 2.6608272506082725e-06, "loss": 0.6597, "step": 17847 }, { "epoch": 0.5210942746197191, "grad_norm": 0.7083641999393001, "learning_rate": 2.6606650446066505e-06, "loss": 0.5977, "step": 17848 }, { "epoch": 0.5211234708475665, "grad_norm": 0.7274856633226678, "learning_rate": 2.6605028386050285e-06, "loss": 0.6659, "step": 17849 }, { "epoch": 0.5211526670754139, "grad_norm": 0.7217230523629611, "learning_rate": 2.6603406326034066e-06, "loss": 0.6136, "step": 17850 }, { "epoch": 0.5211818633032612, "grad_norm": 0.7018069729988468, "learning_rate": 2.6601784266017846e-06, "loss": 0.607, "step": 17851 }, { "epoch": 0.5212110595311086, "grad_norm": 0.7171607191340684, "learning_rate": 2.660016220600162e-06, "loss": 0.5744, "step": 17852 }, { "epoch": 0.5212402557589559, "grad_norm": 0.7090409471476063, "learning_rate": 2.65985401459854e-06, "loss": 0.5954, "step": 17853 }, { "epoch": 0.5212694519868033, "grad_norm": 0.7358743921184524, "learning_rate": 2.659691808596918e-06, "loss": 0.6792, "step": 17854 }, { "epoch": 0.5212986482146507, "grad_norm": 0.6900851276394109, "learning_rate": 2.659529602595296e-06, "loss": 0.6471, "step": 17855 }, { "epoch": 0.521327844442498, "grad_norm": 0.6756889853312649, "learning_rate": 2.6593673965936737e-06, "loss": 0.5775, "step": 17856 }, { "epoch": 0.5213570406703454, "grad_norm": 0.7423553151659097, "learning_rate": 2.6592051905920526e-06, "loss": 0.6256, "step": 17857 }, { "epoch": 0.5213862368981927, "grad_norm": 0.7064359544809391, "learning_rate": 2.65904298459043e-06, "loss": 0.6404, "step": 17858 }, { "epoch": 0.5214154331260401, "grad_norm": 0.7430479613509616, "learning_rate": 2.658880778588808e-06, "loss": 0.707, "step": 17859 }, { "epoch": 0.5214446293538875, "grad_norm": 0.748759507215963, "learning_rate": 2.658718572587186e-06, "loss": 0.7, "step": 17860 }, { "epoch": 0.5214738255817348, "grad_norm": 0.7845697333764569, "learning_rate": 2.658556366585564e-06, "loss": 0.7409, "step": 17861 }, { "epoch": 0.5215030218095822, "grad_norm": 0.7663041809287263, "learning_rate": 2.6583941605839418e-06, "loss": 0.7361, "step": 17862 }, { "epoch": 0.5215322180374296, "grad_norm": 0.7018421468633549, "learning_rate": 2.65823195458232e-06, "loss": 0.5683, "step": 17863 }, { "epoch": 0.5215614142652769, "grad_norm": 0.7808972379829645, "learning_rate": 2.658069748580698e-06, "loss": 0.6887, "step": 17864 }, { "epoch": 0.5215906104931243, "grad_norm": 0.7203565728321957, "learning_rate": 2.657907542579076e-06, "loss": 0.6925, "step": 17865 }, { "epoch": 0.5216198067209716, "grad_norm": 0.7433194098147454, "learning_rate": 2.6577453365774534e-06, "loss": 0.6733, "step": 17866 }, { "epoch": 0.521649002948819, "grad_norm": 0.7343406722345273, "learning_rate": 2.6575831305758314e-06, "loss": 0.6932, "step": 17867 }, { "epoch": 0.5216781991766664, "grad_norm": 0.6666766824947314, "learning_rate": 2.6574209245742094e-06, "loss": 0.5457, "step": 17868 }, { "epoch": 0.5217073954045137, "grad_norm": 0.7426397166428358, "learning_rate": 2.6572587185725874e-06, "loss": 0.6808, "step": 17869 }, { "epoch": 0.5217365916323611, "grad_norm": 0.7226120159135127, "learning_rate": 2.6570965125709654e-06, "loss": 0.6113, "step": 17870 }, { "epoch": 0.5217657878602084, "grad_norm": 0.7322960171951215, "learning_rate": 2.656934306569343e-06, "loss": 0.6031, "step": 17871 }, { "epoch": 0.5217949840880558, "grad_norm": 0.7659958285542369, "learning_rate": 2.656772100567721e-06, "loss": 0.7101, "step": 17872 }, { "epoch": 0.5218241803159032, "grad_norm": 0.6477451217573938, "learning_rate": 2.656609894566099e-06, "loss": 0.5188, "step": 17873 }, { "epoch": 0.5218533765437505, "grad_norm": 0.7477132098486788, "learning_rate": 2.656447688564477e-06, "loss": 0.6977, "step": 17874 }, { "epoch": 0.5218825727715979, "grad_norm": 0.7602566630282878, "learning_rate": 2.6562854825628546e-06, "loss": 0.7202, "step": 17875 }, { "epoch": 0.5219117689994452, "grad_norm": 0.7140805420108162, "learning_rate": 2.6561232765612335e-06, "loss": 0.6546, "step": 17876 }, { "epoch": 0.5219409652272926, "grad_norm": 0.8342196702178458, "learning_rate": 2.655961070559611e-06, "loss": 0.8283, "step": 17877 }, { "epoch": 0.52197016145514, "grad_norm": 0.6644056960517176, "learning_rate": 2.655798864557989e-06, "loss": 0.5663, "step": 17878 }, { "epoch": 0.5219993576829873, "grad_norm": 0.7104629544230407, "learning_rate": 2.655636658556367e-06, "loss": 0.6516, "step": 17879 }, { "epoch": 0.5220285539108347, "grad_norm": 0.8093801787415673, "learning_rate": 2.655474452554745e-06, "loss": 0.6422, "step": 17880 }, { "epoch": 0.522057750138682, "grad_norm": 0.8416175443014574, "learning_rate": 2.6553122465531226e-06, "loss": 0.6244, "step": 17881 }, { "epoch": 0.5220869463665294, "grad_norm": 0.7709874637715577, "learning_rate": 2.6551500405515007e-06, "loss": 0.6784, "step": 17882 }, { "epoch": 0.5221161425943768, "grad_norm": 0.6907916559925253, "learning_rate": 2.6549878345498787e-06, "loss": 0.6013, "step": 17883 }, { "epoch": 0.5221453388222241, "grad_norm": 0.6684361699871572, "learning_rate": 2.6548256285482567e-06, "loss": 0.596, "step": 17884 }, { "epoch": 0.5221745350500715, "grad_norm": 0.705232938878512, "learning_rate": 2.6546634225466342e-06, "loss": 0.685, "step": 17885 }, { "epoch": 0.5222037312779189, "grad_norm": 0.7557140148278321, "learning_rate": 2.6545012165450123e-06, "loss": 0.5888, "step": 17886 }, { "epoch": 0.5222329275057662, "grad_norm": 0.697020786002631, "learning_rate": 2.6543390105433903e-06, "loss": 0.6236, "step": 17887 }, { "epoch": 0.5222621237336136, "grad_norm": 0.6948007342045616, "learning_rate": 2.6541768045417683e-06, "loss": 0.6192, "step": 17888 }, { "epoch": 0.5222913199614609, "grad_norm": 0.7767274897214199, "learning_rate": 2.6540145985401463e-06, "loss": 0.6933, "step": 17889 }, { "epoch": 0.5223205161893083, "grad_norm": 0.7365739846093102, "learning_rate": 2.653852392538524e-06, "loss": 0.632, "step": 17890 }, { "epoch": 0.5223497124171557, "grad_norm": 0.7658492237485987, "learning_rate": 2.653690186536902e-06, "loss": 0.6241, "step": 17891 }, { "epoch": 0.522378908645003, "grad_norm": 0.7388084494190634, "learning_rate": 2.65352798053528e-06, "loss": 0.6626, "step": 17892 }, { "epoch": 0.5224081048728504, "grad_norm": 0.7678139710305807, "learning_rate": 2.653365774533658e-06, "loss": 0.6594, "step": 17893 }, { "epoch": 0.5224373011006977, "grad_norm": 0.6585982131936232, "learning_rate": 2.6532035685320355e-06, "loss": 0.5671, "step": 17894 }, { "epoch": 0.5224664973285451, "grad_norm": 0.792450939509372, "learning_rate": 2.6530413625304143e-06, "loss": 0.7019, "step": 17895 }, { "epoch": 0.5224956935563925, "grad_norm": 0.705943228167806, "learning_rate": 2.652879156528792e-06, "loss": 0.6264, "step": 17896 }, { "epoch": 0.5225248897842398, "grad_norm": 0.7487353794723469, "learning_rate": 2.65271695052717e-06, "loss": 0.6668, "step": 17897 }, { "epoch": 0.5225540860120872, "grad_norm": 0.7723410282371468, "learning_rate": 2.652554744525548e-06, "loss": 0.6624, "step": 17898 }, { "epoch": 0.5225832822399346, "grad_norm": 0.8504476766447111, "learning_rate": 2.652392538523926e-06, "loss": 0.7034, "step": 17899 }, { "epoch": 0.5226124784677819, "grad_norm": 0.7777900345903465, "learning_rate": 2.6522303325223035e-06, "loss": 0.656, "step": 17900 }, { "epoch": 0.5226416746956293, "grad_norm": 0.8258429505240502, "learning_rate": 2.6520681265206815e-06, "loss": 0.6094, "step": 17901 }, { "epoch": 0.5226708709234766, "grad_norm": 0.7144616148241346, "learning_rate": 2.6519059205190595e-06, "loss": 0.6413, "step": 17902 }, { "epoch": 0.522700067151324, "grad_norm": 0.7368972374807334, "learning_rate": 2.6517437145174375e-06, "loss": 0.5811, "step": 17903 }, { "epoch": 0.5227292633791714, "grad_norm": 0.6864441725371173, "learning_rate": 2.651581508515815e-06, "loss": 0.6235, "step": 17904 }, { "epoch": 0.5227584596070187, "grad_norm": 0.724421637007436, "learning_rate": 2.651419302514193e-06, "loss": 0.6245, "step": 17905 }, { "epoch": 0.5227876558348661, "grad_norm": 0.7253246420390452, "learning_rate": 2.651257096512571e-06, "loss": 0.6307, "step": 17906 }, { "epoch": 0.5228168520627134, "grad_norm": 0.7448884456337561, "learning_rate": 2.651094890510949e-06, "loss": 0.656, "step": 17907 }, { "epoch": 0.5228460482905608, "grad_norm": 0.7998119455177186, "learning_rate": 2.650932684509327e-06, "loss": 0.6899, "step": 17908 }, { "epoch": 0.5228752445184082, "grad_norm": 0.7053392857571746, "learning_rate": 2.6507704785077047e-06, "loss": 0.6143, "step": 17909 }, { "epoch": 0.5229044407462556, "grad_norm": 0.7604534736969363, "learning_rate": 2.6506082725060827e-06, "loss": 0.6943, "step": 17910 }, { "epoch": 0.522933636974103, "grad_norm": 0.7023565714297794, "learning_rate": 2.6504460665044607e-06, "loss": 0.6057, "step": 17911 }, { "epoch": 0.5229628332019504, "grad_norm": 0.6828359502132817, "learning_rate": 2.6502838605028387e-06, "loss": 0.5809, "step": 17912 }, { "epoch": 0.5229920294297977, "grad_norm": 0.6635882808579904, "learning_rate": 2.6501216545012163e-06, "loss": 0.5289, "step": 17913 }, { "epoch": 0.5230212256576451, "grad_norm": 0.7367788039812464, "learning_rate": 2.649959448499595e-06, "loss": 0.6424, "step": 17914 }, { "epoch": 0.5230504218854924, "grad_norm": 0.8004977721645348, "learning_rate": 2.6497972424979728e-06, "loss": 0.8261, "step": 17915 }, { "epoch": 0.5230796181133398, "grad_norm": 0.7116082369451863, "learning_rate": 2.6496350364963508e-06, "loss": 0.6461, "step": 17916 }, { "epoch": 0.5231088143411872, "grad_norm": 0.6836398756514738, "learning_rate": 2.6494728304947288e-06, "loss": 0.5692, "step": 17917 }, { "epoch": 0.5231380105690345, "grad_norm": 0.7228784822439708, "learning_rate": 2.6493106244931068e-06, "loss": 0.6277, "step": 17918 }, { "epoch": 0.5231672067968819, "grad_norm": 0.69879429827774, "learning_rate": 2.6491484184914844e-06, "loss": 0.6066, "step": 17919 }, { "epoch": 0.5231964030247293, "grad_norm": 0.8029061481453212, "learning_rate": 2.6489862124898624e-06, "loss": 0.6928, "step": 17920 }, { "epoch": 0.5232255992525766, "grad_norm": 0.7586504133608144, "learning_rate": 2.6488240064882404e-06, "loss": 0.685, "step": 17921 }, { "epoch": 0.523254795480424, "grad_norm": 0.6617030775430354, "learning_rate": 2.6486618004866184e-06, "loss": 0.5539, "step": 17922 }, { "epoch": 0.5232839917082713, "grad_norm": 0.8824328495876241, "learning_rate": 2.648499594484996e-06, "loss": 0.6933, "step": 17923 }, { "epoch": 0.5233131879361187, "grad_norm": 0.7369997574628185, "learning_rate": 2.648337388483374e-06, "loss": 0.6204, "step": 17924 }, { "epoch": 0.5233423841639661, "grad_norm": 0.7460569093830175, "learning_rate": 2.648175182481752e-06, "loss": 0.6654, "step": 17925 }, { "epoch": 0.5233715803918134, "grad_norm": 0.7883311158719816, "learning_rate": 2.64801297648013e-06, "loss": 0.6167, "step": 17926 }, { "epoch": 0.5234007766196608, "grad_norm": 0.6855011499110206, "learning_rate": 2.647850770478508e-06, "loss": 0.5757, "step": 17927 }, { "epoch": 0.5234299728475081, "grad_norm": 0.7130273419115891, "learning_rate": 2.6476885644768856e-06, "loss": 0.6466, "step": 17928 }, { "epoch": 0.5234591690753555, "grad_norm": 0.7322905154810915, "learning_rate": 2.6475263584752636e-06, "loss": 0.6882, "step": 17929 }, { "epoch": 0.5234883653032029, "grad_norm": 0.716217299955142, "learning_rate": 2.6473641524736416e-06, "loss": 0.6494, "step": 17930 }, { "epoch": 0.5235175615310502, "grad_norm": 0.7379253117274343, "learning_rate": 2.6472019464720196e-06, "loss": 0.6654, "step": 17931 }, { "epoch": 0.5235467577588976, "grad_norm": 0.7445421437683423, "learning_rate": 2.647039740470397e-06, "loss": 0.71, "step": 17932 }, { "epoch": 0.523575953986745, "grad_norm": 0.7198159436818766, "learning_rate": 2.6468775344687756e-06, "loss": 0.6348, "step": 17933 }, { "epoch": 0.5236051502145923, "grad_norm": 0.7086366515307986, "learning_rate": 2.6467153284671536e-06, "loss": 0.5887, "step": 17934 }, { "epoch": 0.5236343464424397, "grad_norm": 0.7321524815370296, "learning_rate": 2.6465531224655316e-06, "loss": 0.6828, "step": 17935 }, { "epoch": 0.523663542670287, "grad_norm": 0.7638923264460393, "learning_rate": 2.6463909164639096e-06, "loss": 0.7464, "step": 17936 }, { "epoch": 0.5236927388981344, "grad_norm": 0.7458076816543755, "learning_rate": 2.6462287104622876e-06, "loss": 0.6905, "step": 17937 }, { "epoch": 0.5237219351259818, "grad_norm": 0.7526548756197615, "learning_rate": 2.6460665044606652e-06, "loss": 0.6875, "step": 17938 }, { "epoch": 0.5237511313538291, "grad_norm": 0.7240354102412822, "learning_rate": 2.6459042984590432e-06, "loss": 0.6078, "step": 17939 }, { "epoch": 0.5237803275816765, "grad_norm": 0.8538809867017855, "learning_rate": 2.6457420924574212e-06, "loss": 0.7061, "step": 17940 }, { "epoch": 0.5238095238095238, "grad_norm": 0.7861169229462284, "learning_rate": 2.6455798864557992e-06, "loss": 0.7369, "step": 17941 }, { "epoch": 0.5238387200373712, "grad_norm": 0.6993549907600821, "learning_rate": 2.645417680454177e-06, "loss": 0.589, "step": 17942 }, { "epoch": 0.5238679162652186, "grad_norm": 0.6831951983088322, "learning_rate": 2.645255474452555e-06, "loss": 0.5569, "step": 17943 }, { "epoch": 0.5238971124930659, "grad_norm": 0.6967700668129048, "learning_rate": 2.645093268450933e-06, "loss": 0.6356, "step": 17944 }, { "epoch": 0.5239263087209133, "grad_norm": 0.7112699354253882, "learning_rate": 2.644931062449311e-06, "loss": 0.6332, "step": 17945 }, { "epoch": 0.5239555049487606, "grad_norm": 0.7299819802003925, "learning_rate": 2.644768856447689e-06, "loss": 0.6961, "step": 17946 }, { "epoch": 0.523984701176608, "grad_norm": 0.7130361778288413, "learning_rate": 2.6446066504460664e-06, "loss": 0.6376, "step": 17947 }, { "epoch": 0.5240138974044554, "grad_norm": 0.7557126416851272, "learning_rate": 2.6444444444444444e-06, "loss": 0.725, "step": 17948 }, { "epoch": 0.5240430936323027, "grad_norm": 0.7076265760917282, "learning_rate": 2.6442822384428224e-06, "loss": 0.6336, "step": 17949 }, { "epoch": 0.5240722898601501, "grad_norm": 0.7409887398323016, "learning_rate": 2.6441200324412005e-06, "loss": 0.6805, "step": 17950 }, { "epoch": 0.5241014860879974, "grad_norm": 0.8945981015658662, "learning_rate": 2.643957826439578e-06, "loss": 0.7306, "step": 17951 }, { "epoch": 0.5241306823158448, "grad_norm": 0.7894433399817793, "learning_rate": 2.6437956204379565e-06, "loss": 0.7633, "step": 17952 }, { "epoch": 0.5241598785436922, "grad_norm": 0.7398718611531978, "learning_rate": 2.6436334144363345e-06, "loss": 0.665, "step": 17953 }, { "epoch": 0.5241890747715395, "grad_norm": 0.7403659647941875, "learning_rate": 2.6434712084347125e-06, "loss": 0.6564, "step": 17954 }, { "epoch": 0.5242182709993869, "grad_norm": 0.6786846181376796, "learning_rate": 2.6433090024330905e-06, "loss": 0.5721, "step": 17955 }, { "epoch": 0.5242474672272343, "grad_norm": 0.7206508463839136, "learning_rate": 2.6431467964314685e-06, "loss": 0.6294, "step": 17956 }, { "epoch": 0.5242766634550816, "grad_norm": 0.7345333365524975, "learning_rate": 2.642984590429846e-06, "loss": 0.6645, "step": 17957 }, { "epoch": 0.524305859682929, "grad_norm": 0.870735099473812, "learning_rate": 2.642822384428224e-06, "loss": 0.7186, "step": 17958 }, { "epoch": 0.5243350559107763, "grad_norm": 0.7368554165895741, "learning_rate": 2.642660178426602e-06, "loss": 0.7167, "step": 17959 }, { "epoch": 0.5243642521386237, "grad_norm": 0.6858261033792049, "learning_rate": 2.64249797242498e-06, "loss": 0.6202, "step": 17960 }, { "epoch": 0.5243934483664711, "grad_norm": 0.7166134452801977, "learning_rate": 2.6423357664233577e-06, "loss": 0.6529, "step": 17961 }, { "epoch": 0.5244226445943184, "grad_norm": 0.788431023398722, "learning_rate": 2.6421735604217357e-06, "loss": 0.6944, "step": 17962 }, { "epoch": 0.5244518408221658, "grad_norm": 0.6936106569074529, "learning_rate": 2.6420113544201137e-06, "loss": 0.6322, "step": 17963 }, { "epoch": 0.5244810370500131, "grad_norm": 0.6845954739262129, "learning_rate": 2.6418491484184917e-06, "loss": 0.5484, "step": 17964 }, { "epoch": 0.5245102332778605, "grad_norm": 0.7323579748432761, "learning_rate": 2.6416869424168697e-06, "loss": 0.6344, "step": 17965 }, { "epoch": 0.5245394295057079, "grad_norm": 0.7188685658185541, "learning_rate": 2.6415247364152473e-06, "loss": 0.6581, "step": 17966 }, { "epoch": 0.5245686257335552, "grad_norm": 0.7235936709139879, "learning_rate": 2.6413625304136253e-06, "loss": 0.6445, "step": 17967 }, { "epoch": 0.5245978219614026, "grad_norm": 0.6783164311161327, "learning_rate": 2.6412003244120033e-06, "loss": 0.6187, "step": 17968 }, { "epoch": 0.52462701818925, "grad_norm": 0.7123027164873144, "learning_rate": 2.6410381184103813e-06, "loss": 0.5955, "step": 17969 }, { "epoch": 0.5246562144170973, "grad_norm": 0.8185601262767355, "learning_rate": 2.640875912408759e-06, "loss": 0.7144, "step": 17970 }, { "epoch": 0.5246854106449447, "grad_norm": 0.7508790655962886, "learning_rate": 2.6407137064071373e-06, "loss": 0.6725, "step": 17971 }, { "epoch": 0.524714606872792, "grad_norm": 0.7618113616674855, "learning_rate": 2.6405515004055153e-06, "loss": 0.6703, "step": 17972 }, { "epoch": 0.5247438031006394, "grad_norm": 0.7779240944258345, "learning_rate": 2.6403892944038933e-06, "loss": 0.6242, "step": 17973 }, { "epoch": 0.5247729993284868, "grad_norm": 0.7323894447141256, "learning_rate": 2.6402270884022713e-06, "loss": 0.6472, "step": 17974 }, { "epoch": 0.5248021955563341, "grad_norm": 0.7117805555599126, "learning_rate": 2.6400648824006494e-06, "loss": 0.6436, "step": 17975 }, { "epoch": 0.5248313917841815, "grad_norm": 0.7221230883291717, "learning_rate": 2.639902676399027e-06, "loss": 0.6371, "step": 17976 }, { "epoch": 0.5248605880120288, "grad_norm": 0.6676215894148507, "learning_rate": 2.639740470397405e-06, "loss": 0.5415, "step": 17977 }, { "epoch": 0.5248897842398762, "grad_norm": 0.8122066974074882, "learning_rate": 2.639578264395783e-06, "loss": 0.7527, "step": 17978 }, { "epoch": 0.5249189804677236, "grad_norm": 0.6810873637380619, "learning_rate": 2.639416058394161e-06, "loss": 0.601, "step": 17979 }, { "epoch": 0.5249481766955709, "grad_norm": 0.7209806277957931, "learning_rate": 2.6392538523925385e-06, "loss": 0.681, "step": 17980 }, { "epoch": 0.5249773729234183, "grad_norm": 0.7527641406412101, "learning_rate": 2.6390916463909165e-06, "loss": 0.6939, "step": 17981 }, { "epoch": 0.5250065691512656, "grad_norm": 0.720805718358143, "learning_rate": 2.6389294403892946e-06, "loss": 0.6406, "step": 17982 }, { "epoch": 0.525035765379113, "grad_norm": 0.743752549805746, "learning_rate": 2.6387672343876726e-06, "loss": 0.6795, "step": 17983 }, { "epoch": 0.5250649616069604, "grad_norm": 0.7391174739341431, "learning_rate": 2.6386050283860506e-06, "loss": 0.6651, "step": 17984 }, { "epoch": 0.5250941578348077, "grad_norm": 0.6815930951550643, "learning_rate": 2.638442822384428e-06, "loss": 0.5636, "step": 17985 }, { "epoch": 0.5251233540626551, "grad_norm": 0.7499506849979419, "learning_rate": 2.638280616382806e-06, "loss": 0.7144, "step": 17986 }, { "epoch": 0.5251525502905025, "grad_norm": 0.6920733534890794, "learning_rate": 2.638118410381184e-06, "loss": 0.6517, "step": 17987 }, { "epoch": 0.5251817465183498, "grad_norm": 0.7492888799071318, "learning_rate": 2.637956204379562e-06, "loss": 0.7433, "step": 17988 }, { "epoch": 0.5252109427461972, "grad_norm": 0.7368170586761527, "learning_rate": 2.6377939983779398e-06, "loss": 0.6617, "step": 17989 }, { "epoch": 0.5252401389740445, "grad_norm": 0.7603464724771277, "learning_rate": 2.637631792376318e-06, "loss": 0.6594, "step": 17990 }, { "epoch": 0.5252693352018919, "grad_norm": 0.7418666933661031, "learning_rate": 2.637469586374696e-06, "loss": 0.6803, "step": 17991 }, { "epoch": 0.5252985314297393, "grad_norm": 0.7442215357047718, "learning_rate": 2.637307380373074e-06, "loss": 0.6648, "step": 17992 }, { "epoch": 0.5253277276575866, "grad_norm": 0.7310409719920702, "learning_rate": 2.637145174371452e-06, "loss": 0.6344, "step": 17993 }, { "epoch": 0.525356923885434, "grad_norm": 0.8936877181233183, "learning_rate": 2.6369829683698302e-06, "loss": 0.7345, "step": 17994 }, { "epoch": 0.5253861201132813, "grad_norm": 0.7213509804382074, "learning_rate": 2.636820762368208e-06, "loss": 0.6298, "step": 17995 }, { "epoch": 0.5254153163411287, "grad_norm": 0.7169152954309381, "learning_rate": 2.636658556366586e-06, "loss": 0.6413, "step": 17996 }, { "epoch": 0.5254445125689761, "grad_norm": 0.7155299388900254, "learning_rate": 2.636496350364964e-06, "loss": 0.6506, "step": 17997 }, { "epoch": 0.5254737087968234, "grad_norm": 0.7587992851234913, "learning_rate": 2.636334144363342e-06, "loss": 0.6943, "step": 17998 }, { "epoch": 0.5255029050246708, "grad_norm": 0.6833997578822281, "learning_rate": 2.6361719383617194e-06, "loss": 0.5817, "step": 17999 }, { "epoch": 0.5255321012525181, "grad_norm": 0.7400754234210984, "learning_rate": 2.6360097323600974e-06, "loss": 0.6728, "step": 18000 }, { "epoch": 0.5255612974803655, "grad_norm": 0.7486113447315169, "learning_rate": 2.6358475263584754e-06, "loss": 0.6849, "step": 18001 }, { "epoch": 0.5255904937082129, "grad_norm": 0.7371017287438163, "learning_rate": 2.6356853203568534e-06, "loss": 0.6549, "step": 18002 }, { "epoch": 0.5256196899360602, "grad_norm": 0.6833223912583481, "learning_rate": 2.6355231143552314e-06, "loss": 0.5601, "step": 18003 }, { "epoch": 0.5256488861639076, "grad_norm": 0.714823015756583, "learning_rate": 2.635360908353609e-06, "loss": 0.6257, "step": 18004 }, { "epoch": 0.525678082391755, "grad_norm": 0.6917118401872271, "learning_rate": 2.635198702351987e-06, "loss": 0.6003, "step": 18005 }, { "epoch": 0.5257072786196023, "grad_norm": 0.8018955724211257, "learning_rate": 2.635036496350365e-06, "loss": 0.7539, "step": 18006 }, { "epoch": 0.5257364748474497, "grad_norm": 0.7808423623725927, "learning_rate": 2.634874290348743e-06, "loss": 0.7929, "step": 18007 }, { "epoch": 0.525765671075297, "grad_norm": 0.7042473755350821, "learning_rate": 2.6347120843471215e-06, "loss": 0.5969, "step": 18008 }, { "epoch": 0.5257948673031444, "grad_norm": 0.725359036011518, "learning_rate": 2.634549878345499e-06, "loss": 0.6814, "step": 18009 }, { "epoch": 0.5258240635309918, "grad_norm": 0.805112029440417, "learning_rate": 2.634387672343877e-06, "loss": 0.7461, "step": 18010 }, { "epoch": 0.5258532597588391, "grad_norm": 0.680729944924449, "learning_rate": 2.634225466342255e-06, "loss": 0.5567, "step": 18011 }, { "epoch": 0.5258824559866865, "grad_norm": 0.6499856511113548, "learning_rate": 2.634063260340633e-06, "loss": 0.5328, "step": 18012 }, { "epoch": 0.5259116522145338, "grad_norm": 0.7598660128185523, "learning_rate": 2.633901054339011e-06, "loss": 0.7163, "step": 18013 }, { "epoch": 0.5259408484423812, "grad_norm": 0.7425206933548678, "learning_rate": 2.6337388483373887e-06, "loss": 0.5872, "step": 18014 }, { "epoch": 0.5259700446702286, "grad_norm": 0.7508802545308442, "learning_rate": 2.6335766423357667e-06, "loss": 0.6836, "step": 18015 }, { "epoch": 0.5259992408980759, "grad_norm": 0.7119143737957431, "learning_rate": 2.6334144363341447e-06, "loss": 0.6642, "step": 18016 }, { "epoch": 0.5260284371259233, "grad_norm": 0.7363402641954622, "learning_rate": 2.6332522303325227e-06, "loss": 0.6465, "step": 18017 }, { "epoch": 0.5260576333537706, "grad_norm": 0.7415439171889027, "learning_rate": 2.6330900243309003e-06, "loss": 0.6501, "step": 18018 }, { "epoch": 0.526086829581618, "grad_norm": 0.7443353247394832, "learning_rate": 2.6329278183292783e-06, "loss": 0.6678, "step": 18019 }, { "epoch": 0.5261160258094654, "grad_norm": 0.7179823330105952, "learning_rate": 2.6327656123276563e-06, "loss": 0.6155, "step": 18020 }, { "epoch": 0.5261452220373127, "grad_norm": 0.7427675244774982, "learning_rate": 2.6326034063260343e-06, "loss": 0.679, "step": 18021 }, { "epoch": 0.5261744182651601, "grad_norm": 0.6603354323547556, "learning_rate": 2.6324412003244123e-06, "loss": 0.5251, "step": 18022 }, { "epoch": 0.5262036144930075, "grad_norm": 0.6909711489164947, "learning_rate": 2.63227899432279e-06, "loss": 0.607, "step": 18023 }, { "epoch": 0.5262328107208548, "grad_norm": 0.7472495419535049, "learning_rate": 2.632116788321168e-06, "loss": 0.6373, "step": 18024 }, { "epoch": 0.5262620069487022, "grad_norm": 0.7058017200537576, "learning_rate": 2.631954582319546e-06, "loss": 0.6046, "step": 18025 }, { "epoch": 0.5262912031765495, "grad_norm": 0.721431004494722, "learning_rate": 2.631792376317924e-06, "loss": 0.6629, "step": 18026 }, { "epoch": 0.5263203994043969, "grad_norm": 0.6781372769644173, "learning_rate": 2.6316301703163023e-06, "loss": 0.5657, "step": 18027 }, { "epoch": 0.5263495956322443, "grad_norm": 0.677284085398688, "learning_rate": 2.63146796431468e-06, "loss": 0.5329, "step": 18028 }, { "epoch": 0.5263787918600916, "grad_norm": 0.7098651383111545, "learning_rate": 2.631305758313058e-06, "loss": 0.6258, "step": 18029 }, { "epoch": 0.526407988087939, "grad_norm": 0.7036272725638355, "learning_rate": 2.631143552311436e-06, "loss": 0.6086, "step": 18030 }, { "epoch": 0.5264371843157865, "grad_norm": 0.7112424070102014, "learning_rate": 2.630981346309814e-06, "loss": 0.6207, "step": 18031 }, { "epoch": 0.5264663805436338, "grad_norm": 0.7002570014081775, "learning_rate": 2.630819140308192e-06, "loss": 0.6494, "step": 18032 }, { "epoch": 0.5264955767714812, "grad_norm": 0.7128756231585207, "learning_rate": 2.6306569343065695e-06, "loss": 0.6031, "step": 18033 }, { "epoch": 0.5265247729993285, "grad_norm": 0.69034403275675, "learning_rate": 2.6304947283049475e-06, "loss": 0.6335, "step": 18034 }, { "epoch": 0.5265539692271759, "grad_norm": 0.7978188633238475, "learning_rate": 2.6303325223033255e-06, "loss": 0.7594, "step": 18035 }, { "epoch": 0.5265831654550233, "grad_norm": 0.8058875488479285, "learning_rate": 2.6301703163017035e-06, "loss": 0.7492, "step": 18036 }, { "epoch": 0.5266123616828706, "grad_norm": 0.7892790196516895, "learning_rate": 2.630008110300081e-06, "loss": 0.762, "step": 18037 }, { "epoch": 0.526641557910718, "grad_norm": 0.7498597246856918, "learning_rate": 2.629845904298459e-06, "loss": 0.65, "step": 18038 }, { "epoch": 0.5266707541385653, "grad_norm": 0.7557703888723261, "learning_rate": 2.629683698296837e-06, "loss": 0.6969, "step": 18039 }, { "epoch": 0.5266999503664127, "grad_norm": 0.6754074154708442, "learning_rate": 2.629521492295215e-06, "loss": 0.5381, "step": 18040 }, { "epoch": 0.5267291465942601, "grad_norm": 0.7052309923363268, "learning_rate": 2.629359286293593e-06, "loss": 0.6019, "step": 18041 }, { "epoch": 0.5267583428221074, "grad_norm": 0.679922700496378, "learning_rate": 2.6291970802919707e-06, "loss": 0.5798, "step": 18042 }, { "epoch": 0.5267875390499548, "grad_norm": 0.8006277064186853, "learning_rate": 2.6290348742903487e-06, "loss": 0.5827, "step": 18043 }, { "epoch": 0.5268167352778022, "grad_norm": 0.6915828277084921, "learning_rate": 2.6288726682887267e-06, "loss": 0.5988, "step": 18044 }, { "epoch": 0.5268459315056495, "grad_norm": 0.7407928568860144, "learning_rate": 2.6287104622871047e-06, "loss": 0.6964, "step": 18045 }, { "epoch": 0.5268751277334969, "grad_norm": 0.8574710531900195, "learning_rate": 2.628548256285483e-06, "loss": 0.7439, "step": 18046 }, { "epoch": 0.5269043239613442, "grad_norm": 0.6929055721052876, "learning_rate": 2.6283860502838608e-06, "loss": 0.5825, "step": 18047 }, { "epoch": 0.5269335201891916, "grad_norm": 0.7428799185023616, "learning_rate": 2.6282238442822388e-06, "loss": 0.5901, "step": 18048 }, { "epoch": 0.526962716417039, "grad_norm": 0.8186227038905813, "learning_rate": 2.6280616382806168e-06, "loss": 0.672, "step": 18049 }, { "epoch": 0.5269919126448863, "grad_norm": 0.7255924777960745, "learning_rate": 2.6278994322789948e-06, "loss": 0.6747, "step": 18050 }, { "epoch": 0.5270211088727337, "grad_norm": 0.6944641152219239, "learning_rate": 2.627737226277373e-06, "loss": 0.6274, "step": 18051 }, { "epoch": 0.527050305100581, "grad_norm": 0.7701303106081416, "learning_rate": 2.6275750202757504e-06, "loss": 0.7147, "step": 18052 }, { "epoch": 0.5270795013284284, "grad_norm": 0.7459035980461737, "learning_rate": 2.6274128142741284e-06, "loss": 0.6854, "step": 18053 }, { "epoch": 0.5271086975562758, "grad_norm": 0.8247165883190435, "learning_rate": 2.6272506082725064e-06, "loss": 0.7724, "step": 18054 }, { "epoch": 0.5271378937841231, "grad_norm": 0.6974922222615538, "learning_rate": 2.6270884022708844e-06, "loss": 0.605, "step": 18055 }, { "epoch": 0.5271670900119705, "grad_norm": 0.7106684789506165, "learning_rate": 2.626926196269262e-06, "loss": 0.6116, "step": 18056 }, { "epoch": 0.5271962862398178, "grad_norm": 0.7743692864985149, "learning_rate": 2.62676399026764e-06, "loss": 0.7154, "step": 18057 }, { "epoch": 0.5272254824676652, "grad_norm": 0.7027812168343303, "learning_rate": 2.626601784266018e-06, "loss": 0.5693, "step": 18058 }, { "epoch": 0.5272546786955126, "grad_norm": 0.7055290755566841, "learning_rate": 2.626439578264396e-06, "loss": 0.5792, "step": 18059 }, { "epoch": 0.5272838749233599, "grad_norm": 0.7168808658055265, "learning_rate": 2.626277372262774e-06, "loss": 0.6123, "step": 18060 }, { "epoch": 0.5273130711512073, "grad_norm": 0.650659446397661, "learning_rate": 2.6261151662611516e-06, "loss": 0.5417, "step": 18061 }, { "epoch": 0.5273422673790547, "grad_norm": 0.7328472778264261, "learning_rate": 2.6259529602595296e-06, "loss": 0.6587, "step": 18062 }, { "epoch": 0.527371463606902, "grad_norm": 0.7088404543412852, "learning_rate": 2.6257907542579076e-06, "loss": 0.6298, "step": 18063 }, { "epoch": 0.5274006598347494, "grad_norm": 0.7464454620181978, "learning_rate": 2.6256285482562856e-06, "loss": 0.6573, "step": 18064 }, { "epoch": 0.5274298560625967, "grad_norm": 0.744297771096945, "learning_rate": 2.625466342254664e-06, "loss": 0.6163, "step": 18065 }, { "epoch": 0.5274590522904441, "grad_norm": 0.7321275713296005, "learning_rate": 2.6253041362530416e-06, "loss": 0.6478, "step": 18066 }, { "epoch": 0.5274882485182915, "grad_norm": 0.7985649516490213, "learning_rate": 2.6251419302514196e-06, "loss": 0.6298, "step": 18067 }, { "epoch": 0.5275174447461388, "grad_norm": 0.7194608691901974, "learning_rate": 2.6249797242497976e-06, "loss": 0.6405, "step": 18068 }, { "epoch": 0.5275466409739862, "grad_norm": 0.7103459284463679, "learning_rate": 2.6248175182481756e-06, "loss": 0.5928, "step": 18069 }, { "epoch": 0.5275758372018335, "grad_norm": 0.6909159779306602, "learning_rate": 2.6246553122465536e-06, "loss": 0.5593, "step": 18070 }, { "epoch": 0.5276050334296809, "grad_norm": 0.6831709670283618, "learning_rate": 2.6244931062449312e-06, "loss": 0.5758, "step": 18071 }, { "epoch": 0.5276342296575283, "grad_norm": 0.6663196314299231, "learning_rate": 2.6243309002433092e-06, "loss": 0.5717, "step": 18072 }, { "epoch": 0.5276634258853756, "grad_norm": 0.6795509172409412, "learning_rate": 2.6241686942416872e-06, "loss": 0.6053, "step": 18073 }, { "epoch": 0.527692622113223, "grad_norm": 0.7591259020297776, "learning_rate": 2.6240064882400653e-06, "loss": 0.6797, "step": 18074 }, { "epoch": 0.5277218183410703, "grad_norm": 0.7664141963772224, "learning_rate": 2.623844282238443e-06, "loss": 0.724, "step": 18075 }, { "epoch": 0.5277510145689177, "grad_norm": 0.6999539674907675, "learning_rate": 2.623682076236821e-06, "loss": 0.6001, "step": 18076 }, { "epoch": 0.5277802107967651, "grad_norm": 0.7797091943295402, "learning_rate": 2.623519870235199e-06, "loss": 0.6717, "step": 18077 }, { "epoch": 0.5278094070246124, "grad_norm": 0.6973082037067752, "learning_rate": 2.623357664233577e-06, "loss": 0.5626, "step": 18078 }, { "epoch": 0.5278386032524598, "grad_norm": 0.7591690200736847, "learning_rate": 2.6231954582319544e-06, "loss": 0.6934, "step": 18079 }, { "epoch": 0.5278677994803072, "grad_norm": 0.6997778736501792, "learning_rate": 2.6230332522303324e-06, "loss": 0.6036, "step": 18080 }, { "epoch": 0.5278969957081545, "grad_norm": 0.7075921647430792, "learning_rate": 2.6228710462287105e-06, "loss": 0.6212, "step": 18081 }, { "epoch": 0.5279261919360019, "grad_norm": 0.823923700981535, "learning_rate": 2.6227088402270885e-06, "loss": 0.6969, "step": 18082 }, { "epoch": 0.5279553881638492, "grad_norm": 0.6960544336014077, "learning_rate": 2.6225466342254665e-06, "loss": 0.6262, "step": 18083 }, { "epoch": 0.5279845843916966, "grad_norm": 0.8014368337673712, "learning_rate": 2.622384428223845e-06, "loss": 0.7197, "step": 18084 }, { "epoch": 0.528013780619544, "grad_norm": 0.7088421104822585, "learning_rate": 2.6222222222222225e-06, "loss": 0.5908, "step": 18085 }, { "epoch": 0.5280429768473913, "grad_norm": 0.7225748250238033, "learning_rate": 2.6220600162206005e-06, "loss": 0.6196, "step": 18086 }, { "epoch": 0.5280721730752387, "grad_norm": 0.700466158513678, "learning_rate": 2.6218978102189785e-06, "loss": 0.6223, "step": 18087 }, { "epoch": 0.528101369303086, "grad_norm": 0.7407206978359896, "learning_rate": 2.6217356042173565e-06, "loss": 0.5816, "step": 18088 }, { "epoch": 0.5281305655309334, "grad_norm": 0.7506016320022664, "learning_rate": 2.6215733982157345e-06, "loss": 0.691, "step": 18089 }, { "epoch": 0.5281597617587808, "grad_norm": 0.7123215959023321, "learning_rate": 2.621411192214112e-06, "loss": 0.6446, "step": 18090 }, { "epoch": 0.5281889579866281, "grad_norm": 0.7657405859562756, "learning_rate": 2.62124898621249e-06, "loss": 0.6888, "step": 18091 }, { "epoch": 0.5282181542144755, "grad_norm": 0.7197824792939859, "learning_rate": 2.621086780210868e-06, "loss": 0.6353, "step": 18092 }, { "epoch": 0.5282473504423228, "grad_norm": 0.797398229989292, "learning_rate": 2.620924574209246e-06, "loss": 0.7242, "step": 18093 }, { "epoch": 0.5282765466701702, "grad_norm": 0.7780892355452816, "learning_rate": 2.6207623682076237e-06, "loss": 0.7007, "step": 18094 }, { "epoch": 0.5283057428980176, "grad_norm": 0.7550237567025704, "learning_rate": 2.6206001622060017e-06, "loss": 0.6912, "step": 18095 }, { "epoch": 0.5283349391258649, "grad_norm": 0.7316247352802923, "learning_rate": 2.6204379562043797e-06, "loss": 0.6929, "step": 18096 }, { "epoch": 0.5283641353537123, "grad_norm": 0.7735419355139569, "learning_rate": 2.6202757502027577e-06, "loss": 0.7399, "step": 18097 }, { "epoch": 0.5283933315815597, "grad_norm": 0.7297971371295212, "learning_rate": 2.6201135442011353e-06, "loss": 0.684, "step": 18098 }, { "epoch": 0.528422527809407, "grad_norm": 0.75812302878238, "learning_rate": 2.6199513381995133e-06, "loss": 0.679, "step": 18099 }, { "epoch": 0.5284517240372544, "grad_norm": 0.6972333533308397, "learning_rate": 2.6197891321978913e-06, "loss": 0.6139, "step": 18100 }, { "epoch": 0.5284809202651017, "grad_norm": 1.1320545854653605, "learning_rate": 2.6196269261962693e-06, "loss": 0.6806, "step": 18101 }, { "epoch": 0.5285101164929491, "grad_norm": 0.7553898436260013, "learning_rate": 2.6194647201946473e-06, "loss": 0.694, "step": 18102 }, { "epoch": 0.5285393127207965, "grad_norm": 0.795006019144056, "learning_rate": 2.6193025141930258e-06, "loss": 0.6158, "step": 18103 }, { "epoch": 0.5285685089486438, "grad_norm": 0.6966940320151239, "learning_rate": 2.6191403081914033e-06, "loss": 0.6075, "step": 18104 }, { "epoch": 0.5285977051764912, "grad_norm": 0.7320275788876497, "learning_rate": 2.6189781021897813e-06, "loss": 0.6687, "step": 18105 }, { "epoch": 0.5286269014043385, "grad_norm": 0.6892198328811812, "learning_rate": 2.6188158961881594e-06, "loss": 0.5746, "step": 18106 }, { "epoch": 0.5286560976321859, "grad_norm": 0.7110481150944645, "learning_rate": 2.6186536901865374e-06, "loss": 0.5832, "step": 18107 }, { "epoch": 0.5286852938600333, "grad_norm": 0.7284853255586056, "learning_rate": 2.6184914841849154e-06, "loss": 0.6533, "step": 18108 }, { "epoch": 0.5287144900878806, "grad_norm": 0.7325311991296775, "learning_rate": 2.618329278183293e-06, "loss": 0.5915, "step": 18109 }, { "epoch": 0.528743686315728, "grad_norm": 0.7322284337692212, "learning_rate": 2.618167072181671e-06, "loss": 0.6929, "step": 18110 }, { "epoch": 0.5287728825435754, "grad_norm": 0.7929201044163683, "learning_rate": 2.618004866180049e-06, "loss": 0.7541, "step": 18111 }, { "epoch": 0.5288020787714227, "grad_norm": 0.709937105981137, "learning_rate": 2.617842660178427e-06, "loss": 0.6199, "step": 18112 }, { "epoch": 0.5288312749992701, "grad_norm": 0.7448343891495072, "learning_rate": 2.6176804541768046e-06, "loss": 0.6798, "step": 18113 }, { "epoch": 0.5288604712271174, "grad_norm": 0.7403208074801108, "learning_rate": 2.6175182481751826e-06, "loss": 0.6605, "step": 18114 }, { "epoch": 0.5288896674549648, "grad_norm": 0.783880253262982, "learning_rate": 2.6173560421735606e-06, "loss": 0.7315, "step": 18115 }, { "epoch": 0.5289188636828122, "grad_norm": 0.7198369284686295, "learning_rate": 2.6171938361719386e-06, "loss": 0.656, "step": 18116 }, { "epoch": 0.5289480599106595, "grad_norm": 0.7345082272567517, "learning_rate": 2.617031630170316e-06, "loss": 0.6233, "step": 18117 }, { "epoch": 0.5289772561385069, "grad_norm": 0.7415631586496517, "learning_rate": 2.616869424168694e-06, "loss": 0.6152, "step": 18118 }, { "epoch": 0.5290064523663542, "grad_norm": 0.6882846674153645, "learning_rate": 2.616707218167072e-06, "loss": 0.6181, "step": 18119 }, { "epoch": 0.5290356485942016, "grad_norm": 0.7608660438448118, "learning_rate": 2.61654501216545e-06, "loss": 0.641, "step": 18120 }, { "epoch": 0.529064844822049, "grad_norm": 0.7414960780217225, "learning_rate": 2.616382806163828e-06, "loss": 0.6985, "step": 18121 }, { "epoch": 0.5290940410498963, "grad_norm": 0.6735437512788183, "learning_rate": 2.6162206001622066e-06, "loss": 0.5862, "step": 18122 }, { "epoch": 0.5291232372777437, "grad_norm": 0.7092036124250377, "learning_rate": 2.616058394160584e-06, "loss": 0.5888, "step": 18123 }, { "epoch": 0.529152433505591, "grad_norm": 0.7307272193922821, "learning_rate": 2.615896188158962e-06, "loss": 0.6452, "step": 18124 }, { "epoch": 0.5291816297334384, "grad_norm": 0.7040086909209333, "learning_rate": 2.61573398215734e-06, "loss": 0.6282, "step": 18125 }, { "epoch": 0.5292108259612858, "grad_norm": 0.9185751048354257, "learning_rate": 2.6155717761557182e-06, "loss": 0.5927, "step": 18126 }, { "epoch": 0.5292400221891331, "grad_norm": 0.746286977691394, "learning_rate": 2.6154095701540962e-06, "loss": 0.651, "step": 18127 }, { "epoch": 0.5292692184169805, "grad_norm": 0.7260149888045123, "learning_rate": 2.615247364152474e-06, "loss": 0.6764, "step": 18128 }, { "epoch": 0.5292984146448279, "grad_norm": 0.8279743428959253, "learning_rate": 2.615085158150852e-06, "loss": 0.735, "step": 18129 }, { "epoch": 0.5293276108726752, "grad_norm": 0.8804073325685622, "learning_rate": 2.61492295214923e-06, "loss": 0.6461, "step": 18130 }, { "epoch": 0.5293568071005226, "grad_norm": 0.7080199412975567, "learning_rate": 2.614760746147608e-06, "loss": 0.5958, "step": 18131 }, { "epoch": 0.5293860033283699, "grad_norm": 0.8065019062226989, "learning_rate": 2.6145985401459854e-06, "loss": 0.721, "step": 18132 }, { "epoch": 0.5294151995562173, "grad_norm": 0.7181861621825169, "learning_rate": 2.6144363341443634e-06, "loss": 0.6277, "step": 18133 }, { "epoch": 0.5294443957840647, "grad_norm": 0.6919731716350858, "learning_rate": 2.6142741281427414e-06, "loss": 0.5599, "step": 18134 }, { "epoch": 0.529473592011912, "grad_norm": 0.7256695454287407, "learning_rate": 2.6141119221411194e-06, "loss": 0.6264, "step": 18135 }, { "epoch": 0.5295027882397594, "grad_norm": 0.6831646607585855, "learning_rate": 2.613949716139497e-06, "loss": 0.5933, "step": 18136 }, { "epoch": 0.5295319844676067, "grad_norm": 0.7249531761490557, "learning_rate": 2.613787510137875e-06, "loss": 0.6132, "step": 18137 }, { "epoch": 0.5295611806954541, "grad_norm": 0.7556670200835478, "learning_rate": 2.613625304136253e-06, "loss": 0.6978, "step": 18138 }, { "epoch": 0.5295903769233015, "grad_norm": 0.7088825970055211, "learning_rate": 2.613463098134631e-06, "loss": 0.6311, "step": 18139 }, { "epoch": 0.5296195731511488, "grad_norm": 0.7542719500710968, "learning_rate": 2.613300892133009e-06, "loss": 0.7246, "step": 18140 }, { "epoch": 0.5296487693789962, "grad_norm": 0.6826973534147246, "learning_rate": 2.6131386861313875e-06, "loss": 0.552, "step": 18141 }, { "epoch": 0.5296779656068435, "grad_norm": 0.8265569710744968, "learning_rate": 2.612976480129765e-06, "loss": 0.5684, "step": 18142 }, { "epoch": 0.5297071618346909, "grad_norm": 0.7273682718697438, "learning_rate": 2.612814274128143e-06, "loss": 0.6573, "step": 18143 }, { "epoch": 0.5297363580625383, "grad_norm": 0.7166758763034696, "learning_rate": 2.612652068126521e-06, "loss": 0.5921, "step": 18144 }, { "epoch": 0.5297655542903856, "grad_norm": 0.7264618843194504, "learning_rate": 2.612489862124899e-06, "loss": 0.647, "step": 18145 }, { "epoch": 0.529794750518233, "grad_norm": 0.7447559404830199, "learning_rate": 2.612327656123277e-06, "loss": 0.6736, "step": 18146 }, { "epoch": 0.5298239467460804, "grad_norm": 0.7489025301041818, "learning_rate": 2.6121654501216547e-06, "loss": 0.706, "step": 18147 }, { "epoch": 0.5298531429739277, "grad_norm": 0.7203560851242841, "learning_rate": 2.6120032441200327e-06, "loss": 0.6775, "step": 18148 }, { "epoch": 0.5298823392017751, "grad_norm": 0.8745579071593043, "learning_rate": 2.6118410381184107e-06, "loss": 0.6121, "step": 18149 }, { "epoch": 0.5299115354296224, "grad_norm": 0.7049261613989454, "learning_rate": 2.6116788321167887e-06, "loss": 0.6393, "step": 18150 }, { "epoch": 0.5299407316574699, "grad_norm": 0.6937367118361188, "learning_rate": 2.6115166261151663e-06, "loss": 0.6188, "step": 18151 }, { "epoch": 0.5299699278853173, "grad_norm": 0.7287500933160371, "learning_rate": 2.6113544201135443e-06, "loss": 0.6199, "step": 18152 }, { "epoch": 0.5299991241131646, "grad_norm": 0.739498251731085, "learning_rate": 2.6111922141119223e-06, "loss": 0.6562, "step": 18153 }, { "epoch": 0.530028320341012, "grad_norm": 0.7285672838337394, "learning_rate": 2.6110300081103003e-06, "loss": 0.6617, "step": 18154 }, { "epoch": 0.5300575165688594, "grad_norm": 0.6554730968941933, "learning_rate": 2.610867802108678e-06, "loss": 0.535, "step": 18155 }, { "epoch": 0.5300867127967067, "grad_norm": 0.7026422923778473, "learning_rate": 2.610705596107056e-06, "loss": 0.5842, "step": 18156 }, { "epoch": 0.5301159090245541, "grad_norm": 0.6957902484349682, "learning_rate": 2.610543390105434e-06, "loss": 0.6298, "step": 18157 }, { "epoch": 0.5301451052524014, "grad_norm": 0.7534108064700706, "learning_rate": 2.610381184103812e-06, "loss": 0.6719, "step": 18158 }, { "epoch": 0.5301743014802488, "grad_norm": 0.7223896710339421, "learning_rate": 2.6102189781021903e-06, "loss": 0.6845, "step": 18159 }, { "epoch": 0.5302034977080962, "grad_norm": 0.7041261427412988, "learning_rate": 2.6100567721005683e-06, "loss": 0.5893, "step": 18160 }, { "epoch": 0.5302326939359435, "grad_norm": 0.7128635181542146, "learning_rate": 2.609894566098946e-06, "loss": 0.6433, "step": 18161 }, { "epoch": 0.5302618901637909, "grad_norm": 0.6903375477773468, "learning_rate": 2.609732360097324e-06, "loss": 0.6071, "step": 18162 }, { "epoch": 0.5302910863916382, "grad_norm": 0.7641090293632709, "learning_rate": 2.609570154095702e-06, "loss": 0.7238, "step": 18163 }, { "epoch": 0.5303202826194856, "grad_norm": 0.7833390415223697, "learning_rate": 2.60940794809408e-06, "loss": 0.7351, "step": 18164 }, { "epoch": 0.530349478847333, "grad_norm": 0.6906722296425315, "learning_rate": 2.609245742092458e-06, "loss": 0.5918, "step": 18165 }, { "epoch": 0.5303786750751803, "grad_norm": 0.7529163036940931, "learning_rate": 2.6090835360908355e-06, "loss": 0.6946, "step": 18166 }, { "epoch": 0.5304078713030277, "grad_norm": 0.7571277824134182, "learning_rate": 2.6089213300892135e-06, "loss": 0.6752, "step": 18167 }, { "epoch": 0.530437067530875, "grad_norm": 0.7640132788835676, "learning_rate": 2.6087591240875915e-06, "loss": 0.6355, "step": 18168 }, { "epoch": 0.5304662637587224, "grad_norm": 0.7791948241410283, "learning_rate": 2.6085969180859695e-06, "loss": 0.7301, "step": 18169 }, { "epoch": 0.5304954599865698, "grad_norm": 0.7193102521985333, "learning_rate": 2.608434712084347e-06, "loss": 0.6265, "step": 18170 }, { "epoch": 0.5305246562144171, "grad_norm": 0.7303340135298497, "learning_rate": 2.608272506082725e-06, "loss": 0.6506, "step": 18171 }, { "epoch": 0.5305538524422645, "grad_norm": 0.6807197492864161, "learning_rate": 2.608110300081103e-06, "loss": 0.5548, "step": 18172 }, { "epoch": 0.5305830486701119, "grad_norm": 0.6879151890314916, "learning_rate": 2.607948094079481e-06, "loss": 0.5155, "step": 18173 }, { "epoch": 0.5306122448979592, "grad_norm": 0.64245407093642, "learning_rate": 2.6077858880778587e-06, "loss": 0.5318, "step": 18174 }, { "epoch": 0.5306414411258066, "grad_norm": 0.7416897942200036, "learning_rate": 2.6076236820762367e-06, "loss": 0.6806, "step": 18175 }, { "epoch": 0.5306706373536539, "grad_norm": 0.7169422615597724, "learning_rate": 2.6074614760746147e-06, "loss": 0.6296, "step": 18176 }, { "epoch": 0.5306998335815013, "grad_norm": 0.780554426530272, "learning_rate": 2.6072992700729928e-06, "loss": 0.6854, "step": 18177 }, { "epoch": 0.5307290298093487, "grad_norm": 0.713503102030779, "learning_rate": 2.607137064071371e-06, "loss": 0.5864, "step": 18178 }, { "epoch": 0.530758226037196, "grad_norm": 0.7553027265644449, "learning_rate": 2.606974858069749e-06, "loss": 0.6726, "step": 18179 }, { "epoch": 0.5307874222650434, "grad_norm": 0.7094749307061853, "learning_rate": 2.6068126520681268e-06, "loss": 0.6223, "step": 18180 }, { "epoch": 0.5308166184928907, "grad_norm": 0.696787011963698, "learning_rate": 2.6066504460665048e-06, "loss": 0.5591, "step": 18181 }, { "epoch": 0.5308458147207381, "grad_norm": 0.819750668777626, "learning_rate": 2.6064882400648828e-06, "loss": 0.7401, "step": 18182 }, { "epoch": 0.5308750109485855, "grad_norm": 0.8207983715146362, "learning_rate": 2.606326034063261e-06, "loss": 0.6704, "step": 18183 }, { "epoch": 0.5309042071764328, "grad_norm": 0.6914602070525427, "learning_rate": 2.606163828061639e-06, "loss": 0.6102, "step": 18184 }, { "epoch": 0.5309334034042802, "grad_norm": 0.6735681930250024, "learning_rate": 2.6060016220600164e-06, "loss": 0.5605, "step": 18185 }, { "epoch": 0.5309625996321276, "grad_norm": 0.6975682165545365, "learning_rate": 2.6058394160583944e-06, "loss": 0.618, "step": 18186 }, { "epoch": 0.5309917958599749, "grad_norm": 0.7119702075469524, "learning_rate": 2.6056772100567724e-06, "loss": 0.6307, "step": 18187 }, { "epoch": 0.5310209920878223, "grad_norm": 0.7466690536884155, "learning_rate": 2.6055150040551504e-06, "loss": 0.6816, "step": 18188 }, { "epoch": 0.5310501883156696, "grad_norm": 0.7343483100712759, "learning_rate": 2.605352798053528e-06, "loss": 0.6603, "step": 18189 }, { "epoch": 0.531079384543517, "grad_norm": 0.7224960104430562, "learning_rate": 2.605190592051906e-06, "loss": 0.6125, "step": 18190 }, { "epoch": 0.5311085807713644, "grad_norm": 1.2335959547855484, "learning_rate": 2.605028386050284e-06, "loss": 0.6585, "step": 18191 }, { "epoch": 0.5311377769992117, "grad_norm": 0.7296951316386135, "learning_rate": 2.604866180048662e-06, "loss": 0.6934, "step": 18192 }, { "epoch": 0.5311669732270591, "grad_norm": 0.7972184469595659, "learning_rate": 2.6047039740470396e-06, "loss": 0.5914, "step": 18193 }, { "epoch": 0.5311961694549064, "grad_norm": 0.7064395306691084, "learning_rate": 2.6045417680454176e-06, "loss": 0.6211, "step": 18194 }, { "epoch": 0.5312253656827538, "grad_norm": 0.7075398015705562, "learning_rate": 2.6043795620437956e-06, "loss": 0.6188, "step": 18195 }, { "epoch": 0.5312545619106012, "grad_norm": 0.7326602542325357, "learning_rate": 2.6042173560421736e-06, "loss": 0.6254, "step": 18196 }, { "epoch": 0.5312837581384485, "grad_norm": 0.7125612124802134, "learning_rate": 2.604055150040552e-06, "loss": 0.6637, "step": 18197 }, { "epoch": 0.5313129543662959, "grad_norm": 0.7047265041603087, "learning_rate": 2.60389294403893e-06, "loss": 0.5731, "step": 18198 }, { "epoch": 0.5313421505941432, "grad_norm": 0.6553799512753627, "learning_rate": 2.6037307380373076e-06, "loss": 0.5395, "step": 18199 }, { "epoch": 0.5313713468219906, "grad_norm": 0.7362324366791464, "learning_rate": 2.6035685320356856e-06, "loss": 0.7154, "step": 18200 }, { "epoch": 0.531400543049838, "grad_norm": 0.6929212627593148, "learning_rate": 2.6034063260340636e-06, "loss": 0.63, "step": 18201 }, { "epoch": 0.5314297392776853, "grad_norm": 0.6765863900582552, "learning_rate": 2.6032441200324417e-06, "loss": 0.6079, "step": 18202 }, { "epoch": 0.5314589355055327, "grad_norm": 0.7383678655340263, "learning_rate": 2.6030819140308192e-06, "loss": 0.6651, "step": 18203 }, { "epoch": 0.53148813173338, "grad_norm": 0.7081677340193465, "learning_rate": 2.6029197080291972e-06, "loss": 0.5988, "step": 18204 }, { "epoch": 0.5315173279612274, "grad_norm": 0.7068803305891818, "learning_rate": 2.6027575020275752e-06, "loss": 0.6266, "step": 18205 }, { "epoch": 0.5315465241890748, "grad_norm": 0.702418488856975, "learning_rate": 2.6025952960259533e-06, "loss": 0.6582, "step": 18206 }, { "epoch": 0.5315757204169221, "grad_norm": 0.7008102448055304, "learning_rate": 2.6024330900243313e-06, "loss": 0.6169, "step": 18207 }, { "epoch": 0.5316049166447695, "grad_norm": 0.7114822199942922, "learning_rate": 2.602270884022709e-06, "loss": 0.6216, "step": 18208 }, { "epoch": 0.5316341128726169, "grad_norm": 0.7906724270245802, "learning_rate": 2.602108678021087e-06, "loss": 0.6856, "step": 18209 }, { "epoch": 0.5316633091004642, "grad_norm": 0.9473841036421875, "learning_rate": 2.601946472019465e-06, "loss": 0.7562, "step": 18210 }, { "epoch": 0.5316925053283116, "grad_norm": 0.7031093898470986, "learning_rate": 2.601784266017843e-06, "loss": 0.6437, "step": 18211 }, { "epoch": 0.5317217015561589, "grad_norm": 0.7361461155274854, "learning_rate": 2.6016220600162204e-06, "loss": 0.6367, "step": 18212 }, { "epoch": 0.5317508977840063, "grad_norm": 0.7186451850994063, "learning_rate": 2.6014598540145985e-06, "loss": 0.5775, "step": 18213 }, { "epoch": 0.5317800940118537, "grad_norm": 0.6993942370140099, "learning_rate": 2.6012976480129765e-06, "loss": 0.6119, "step": 18214 }, { "epoch": 0.531809290239701, "grad_norm": 0.7822906714461071, "learning_rate": 2.6011354420113545e-06, "loss": 0.7311, "step": 18215 }, { "epoch": 0.5318384864675484, "grad_norm": 0.7238156700993607, "learning_rate": 2.600973236009733e-06, "loss": 0.6078, "step": 18216 }, { "epoch": 0.5318676826953957, "grad_norm": 0.7808875822992286, "learning_rate": 2.600811030008111e-06, "loss": 0.6956, "step": 18217 }, { "epoch": 0.5318968789232431, "grad_norm": 0.8721812198686039, "learning_rate": 2.6006488240064885e-06, "loss": 0.714, "step": 18218 }, { "epoch": 0.5319260751510905, "grad_norm": 0.7101337845985448, "learning_rate": 2.6004866180048665e-06, "loss": 0.6488, "step": 18219 }, { "epoch": 0.5319552713789378, "grad_norm": 0.7152189128390509, "learning_rate": 2.6003244120032445e-06, "loss": 0.6311, "step": 18220 }, { "epoch": 0.5319844676067852, "grad_norm": 0.7512160863979095, "learning_rate": 2.6001622060016225e-06, "loss": 0.6645, "step": 18221 }, { "epoch": 0.5320136638346326, "grad_norm": 0.7099944276247734, "learning_rate": 2.6e-06, "loss": 0.6243, "step": 18222 }, { "epoch": 0.5320428600624799, "grad_norm": 0.6543741075566257, "learning_rate": 2.599837793998378e-06, "loss": 0.5436, "step": 18223 }, { "epoch": 0.5320720562903273, "grad_norm": 0.7456270917656999, "learning_rate": 2.599675587996756e-06, "loss": 0.6842, "step": 18224 }, { "epoch": 0.5321012525181746, "grad_norm": 0.7595854593476303, "learning_rate": 2.599513381995134e-06, "loss": 0.712, "step": 18225 }, { "epoch": 0.532130448746022, "grad_norm": 0.717979349040129, "learning_rate": 2.599351175993512e-06, "loss": 0.5819, "step": 18226 }, { "epoch": 0.5321596449738694, "grad_norm": 0.7193688954227611, "learning_rate": 2.5991889699918897e-06, "loss": 0.6593, "step": 18227 }, { "epoch": 0.5321888412017167, "grad_norm": 0.7113980925668456, "learning_rate": 2.5990267639902677e-06, "loss": 0.619, "step": 18228 }, { "epoch": 0.5322180374295641, "grad_norm": 0.7592529058561792, "learning_rate": 2.5988645579886457e-06, "loss": 0.6259, "step": 18229 }, { "epoch": 0.5322472336574114, "grad_norm": 0.7335184878837137, "learning_rate": 2.5987023519870237e-06, "loss": 0.664, "step": 18230 }, { "epoch": 0.5322764298852588, "grad_norm": 0.7655236456662832, "learning_rate": 2.5985401459854013e-06, "loss": 0.7012, "step": 18231 }, { "epoch": 0.5323056261131062, "grad_norm": 0.7727511370245127, "learning_rate": 2.5983779399837793e-06, "loss": 0.6574, "step": 18232 }, { "epoch": 0.5323348223409535, "grad_norm": 0.7237362102014314, "learning_rate": 2.5982157339821573e-06, "loss": 0.6562, "step": 18233 }, { "epoch": 0.5323640185688009, "grad_norm": 0.7839146058227436, "learning_rate": 2.5980535279805353e-06, "loss": 0.743, "step": 18234 }, { "epoch": 0.5323932147966483, "grad_norm": 0.7213912180537205, "learning_rate": 2.5978913219789138e-06, "loss": 0.6718, "step": 18235 }, { "epoch": 0.5324224110244956, "grad_norm": 0.7228182052938109, "learning_rate": 2.5977291159772918e-06, "loss": 0.6726, "step": 18236 }, { "epoch": 0.532451607252343, "grad_norm": 0.7643912982557282, "learning_rate": 2.5975669099756693e-06, "loss": 0.6243, "step": 18237 }, { "epoch": 0.5324808034801903, "grad_norm": 0.6931528029689674, "learning_rate": 2.5974047039740474e-06, "loss": 0.624, "step": 18238 }, { "epoch": 0.5325099997080377, "grad_norm": 0.7863875207360416, "learning_rate": 2.5972424979724254e-06, "loss": 0.6807, "step": 18239 }, { "epoch": 0.5325391959358851, "grad_norm": 0.7993331676939892, "learning_rate": 2.5970802919708034e-06, "loss": 0.7432, "step": 18240 }, { "epoch": 0.5325683921637324, "grad_norm": 0.720264562882961, "learning_rate": 2.596918085969181e-06, "loss": 0.5851, "step": 18241 }, { "epoch": 0.5325975883915798, "grad_norm": 0.7359356731405943, "learning_rate": 2.596755879967559e-06, "loss": 0.635, "step": 18242 }, { "epoch": 0.5326267846194271, "grad_norm": 0.7205462158119136, "learning_rate": 2.596593673965937e-06, "loss": 0.6615, "step": 18243 }, { "epoch": 0.5326559808472745, "grad_norm": 0.7316084036264835, "learning_rate": 2.596431467964315e-06, "loss": 0.7342, "step": 18244 }, { "epoch": 0.5326851770751219, "grad_norm": 0.7220437745864873, "learning_rate": 2.596269261962693e-06, "loss": 0.6693, "step": 18245 }, { "epoch": 0.5327143733029692, "grad_norm": 0.720811377366602, "learning_rate": 2.5961070559610706e-06, "loss": 0.6666, "step": 18246 }, { "epoch": 0.5327435695308166, "grad_norm": 0.7115122730033816, "learning_rate": 2.5959448499594486e-06, "loss": 0.6312, "step": 18247 }, { "epoch": 0.532772765758664, "grad_norm": 0.7679891420259269, "learning_rate": 2.5957826439578266e-06, "loss": 0.7607, "step": 18248 }, { "epoch": 0.5328019619865113, "grad_norm": 0.6434289337502357, "learning_rate": 2.5956204379562046e-06, "loss": 0.5331, "step": 18249 }, { "epoch": 0.5328311582143587, "grad_norm": 0.7364490458679468, "learning_rate": 2.595458231954582e-06, "loss": 0.6945, "step": 18250 }, { "epoch": 0.532860354442206, "grad_norm": 0.7635442847245202, "learning_rate": 2.59529602595296e-06, "loss": 0.6746, "step": 18251 }, { "epoch": 0.5328895506700534, "grad_norm": 0.668144021638849, "learning_rate": 2.595133819951338e-06, "loss": 0.5718, "step": 18252 }, { "epoch": 0.5329187468979008, "grad_norm": 0.7110441456687091, "learning_rate": 2.594971613949716e-06, "loss": 0.6321, "step": 18253 }, { "epoch": 0.5329479431257481, "grad_norm": 0.6757921256309897, "learning_rate": 2.5948094079480946e-06, "loss": 0.5702, "step": 18254 }, { "epoch": 0.5329771393535955, "grad_norm": 0.7746625573860617, "learning_rate": 2.5946472019464726e-06, "loss": 0.6868, "step": 18255 }, { "epoch": 0.5330063355814428, "grad_norm": 0.8580289172586344, "learning_rate": 2.59448499594485e-06, "loss": 0.6462, "step": 18256 }, { "epoch": 0.5330355318092902, "grad_norm": 0.7663776888489715, "learning_rate": 2.5943227899432282e-06, "loss": 0.6955, "step": 18257 }, { "epoch": 0.5330647280371376, "grad_norm": 0.6926425852319181, "learning_rate": 2.5941605839416062e-06, "loss": 0.5649, "step": 18258 }, { "epoch": 0.5330939242649849, "grad_norm": 0.7691337135322044, "learning_rate": 2.5939983779399842e-06, "loss": 0.6759, "step": 18259 }, { "epoch": 0.5331231204928323, "grad_norm": 0.7350124059381908, "learning_rate": 2.593836171938362e-06, "loss": 0.6953, "step": 18260 }, { "epoch": 0.5331523167206796, "grad_norm": 0.7838909963208741, "learning_rate": 2.59367396593674e-06, "loss": 0.7234, "step": 18261 }, { "epoch": 0.533181512948527, "grad_norm": 0.7720968260685137, "learning_rate": 2.593511759935118e-06, "loss": 0.7013, "step": 18262 }, { "epoch": 0.5332107091763744, "grad_norm": 0.7858996657789509, "learning_rate": 2.593349553933496e-06, "loss": 0.7954, "step": 18263 }, { "epoch": 0.5332399054042217, "grad_norm": 0.6685089635013673, "learning_rate": 2.593187347931874e-06, "loss": 0.594, "step": 18264 }, { "epoch": 0.5332691016320691, "grad_norm": 0.7071088385087889, "learning_rate": 2.5930251419302514e-06, "loss": 0.6543, "step": 18265 }, { "epoch": 0.5332982978599164, "grad_norm": 0.7248991529610652, "learning_rate": 2.5928629359286294e-06, "loss": 0.7161, "step": 18266 }, { "epoch": 0.5333274940877638, "grad_norm": 0.8158052663473858, "learning_rate": 2.5927007299270074e-06, "loss": 0.6794, "step": 18267 }, { "epoch": 0.5333566903156112, "grad_norm": 0.7619523859530635, "learning_rate": 2.5925385239253854e-06, "loss": 0.7153, "step": 18268 }, { "epoch": 0.5333858865434585, "grad_norm": 0.6952090955942137, "learning_rate": 2.592376317923763e-06, "loss": 0.6193, "step": 18269 }, { "epoch": 0.5334150827713059, "grad_norm": 0.6657548879906225, "learning_rate": 2.592214111922141e-06, "loss": 0.5598, "step": 18270 }, { "epoch": 0.5334442789991533, "grad_norm": 0.7394435548132016, "learning_rate": 2.592051905920519e-06, "loss": 0.637, "step": 18271 }, { "epoch": 0.5334734752270007, "grad_norm": 0.6959599747409271, "learning_rate": 2.591889699918897e-06, "loss": 0.5859, "step": 18272 }, { "epoch": 0.5335026714548481, "grad_norm": 0.722391882634083, "learning_rate": 2.5917274939172755e-06, "loss": 0.6905, "step": 18273 }, { "epoch": 0.5335318676826954, "grad_norm": 0.7293347226565837, "learning_rate": 2.5915652879156535e-06, "loss": 0.6828, "step": 18274 }, { "epoch": 0.5335610639105428, "grad_norm": 0.7374143532282534, "learning_rate": 2.591403081914031e-06, "loss": 0.6292, "step": 18275 }, { "epoch": 0.5335902601383902, "grad_norm": 0.7133749840657186, "learning_rate": 2.591240875912409e-06, "loss": 0.6833, "step": 18276 }, { "epoch": 0.5336194563662375, "grad_norm": 0.6919595096725818, "learning_rate": 2.591078669910787e-06, "loss": 0.6046, "step": 18277 }, { "epoch": 0.5336486525940849, "grad_norm": 0.7134520357832669, "learning_rate": 2.590916463909165e-06, "loss": 0.6702, "step": 18278 }, { "epoch": 0.5336778488219323, "grad_norm": 0.7170764846358277, "learning_rate": 2.5907542579075427e-06, "loss": 0.6013, "step": 18279 }, { "epoch": 0.5337070450497796, "grad_norm": 0.7059089221523193, "learning_rate": 2.5905920519059207e-06, "loss": 0.6371, "step": 18280 }, { "epoch": 0.533736241277627, "grad_norm": 0.7133029679009052, "learning_rate": 2.5904298459042987e-06, "loss": 0.6486, "step": 18281 }, { "epoch": 0.5337654375054743, "grad_norm": 0.7210559225768159, "learning_rate": 2.5902676399026767e-06, "loss": 0.6536, "step": 18282 }, { "epoch": 0.5337946337333217, "grad_norm": 0.6866763300680278, "learning_rate": 2.5901054339010547e-06, "loss": 0.6199, "step": 18283 }, { "epoch": 0.5338238299611691, "grad_norm": 0.7505015152290762, "learning_rate": 2.5899432278994323e-06, "loss": 0.7611, "step": 18284 }, { "epoch": 0.5338530261890164, "grad_norm": 0.7883666616616336, "learning_rate": 2.5897810218978103e-06, "loss": 0.7485, "step": 18285 }, { "epoch": 0.5338822224168638, "grad_norm": 0.7428108093344454, "learning_rate": 2.5896188158961883e-06, "loss": 0.6862, "step": 18286 }, { "epoch": 0.5339114186447111, "grad_norm": 0.737771007720932, "learning_rate": 2.5894566098945663e-06, "loss": 0.6677, "step": 18287 }, { "epoch": 0.5339406148725585, "grad_norm": 0.7504358735714264, "learning_rate": 2.589294403892944e-06, "loss": 0.6973, "step": 18288 }, { "epoch": 0.5339698111004059, "grad_norm": 0.9431132148741864, "learning_rate": 2.589132197891322e-06, "loss": 0.627, "step": 18289 }, { "epoch": 0.5339990073282532, "grad_norm": 0.7959553498458374, "learning_rate": 2.5889699918897e-06, "loss": 0.7111, "step": 18290 }, { "epoch": 0.5340282035561006, "grad_norm": 0.7305594631367933, "learning_rate": 2.588807785888078e-06, "loss": 0.688, "step": 18291 }, { "epoch": 0.534057399783948, "grad_norm": 0.6709379572360108, "learning_rate": 2.5886455798864563e-06, "loss": 0.5677, "step": 18292 }, { "epoch": 0.5340865960117953, "grad_norm": 0.6784483567657497, "learning_rate": 2.5884833738848343e-06, "loss": 0.5859, "step": 18293 }, { "epoch": 0.5341157922396427, "grad_norm": 0.8007785942757768, "learning_rate": 2.588321167883212e-06, "loss": 0.7321, "step": 18294 }, { "epoch": 0.53414498846749, "grad_norm": 0.7875114831135573, "learning_rate": 2.58815896188159e-06, "loss": 0.6963, "step": 18295 }, { "epoch": 0.5341741846953374, "grad_norm": 0.7618470806338479, "learning_rate": 2.587996755879968e-06, "loss": 0.5937, "step": 18296 }, { "epoch": 0.5342033809231848, "grad_norm": 0.7071287162283528, "learning_rate": 2.587834549878346e-06, "loss": 0.5759, "step": 18297 }, { "epoch": 0.5342325771510321, "grad_norm": 0.7112631961194658, "learning_rate": 2.5876723438767235e-06, "loss": 0.5982, "step": 18298 }, { "epoch": 0.5342617733788795, "grad_norm": 0.7679243954780205, "learning_rate": 2.5875101378751015e-06, "loss": 0.6635, "step": 18299 }, { "epoch": 0.5342909696067268, "grad_norm": 0.7207723107196778, "learning_rate": 2.5873479318734795e-06, "loss": 0.6266, "step": 18300 }, { "epoch": 0.5343201658345742, "grad_norm": 0.7739086731158202, "learning_rate": 2.5871857258718575e-06, "loss": 0.6528, "step": 18301 }, { "epoch": 0.5343493620624216, "grad_norm": 0.6982036216440608, "learning_rate": 2.5870235198702356e-06, "loss": 0.5906, "step": 18302 }, { "epoch": 0.5343785582902689, "grad_norm": 0.8093888076621805, "learning_rate": 2.586861313868613e-06, "loss": 0.7835, "step": 18303 }, { "epoch": 0.5344077545181163, "grad_norm": 0.7208278024487151, "learning_rate": 2.586699107866991e-06, "loss": 0.5897, "step": 18304 }, { "epoch": 0.5344369507459636, "grad_norm": 0.7674532235140203, "learning_rate": 2.586536901865369e-06, "loss": 0.6693, "step": 18305 }, { "epoch": 0.534466146973811, "grad_norm": 0.6917782098528232, "learning_rate": 2.586374695863747e-06, "loss": 0.5986, "step": 18306 }, { "epoch": 0.5344953432016584, "grad_norm": 0.6894679672069817, "learning_rate": 2.5862124898621247e-06, "loss": 0.6209, "step": 18307 }, { "epoch": 0.5345245394295057, "grad_norm": 0.7280220282532907, "learning_rate": 2.5860502838605027e-06, "loss": 0.6667, "step": 18308 }, { "epoch": 0.5345537356573531, "grad_norm": 0.72198261077423, "learning_rate": 2.5858880778588808e-06, "loss": 0.6139, "step": 18309 }, { "epoch": 0.5345829318852005, "grad_norm": 0.7106521270887077, "learning_rate": 2.585725871857259e-06, "loss": 0.6556, "step": 18310 }, { "epoch": 0.5346121281130478, "grad_norm": 0.7074052315512916, "learning_rate": 2.585563665855637e-06, "loss": 0.586, "step": 18311 }, { "epoch": 0.5346413243408952, "grad_norm": 0.7702841809532902, "learning_rate": 2.585401459854015e-06, "loss": 0.681, "step": 18312 }, { "epoch": 0.5346705205687425, "grad_norm": 0.8302029176312353, "learning_rate": 2.5852392538523928e-06, "loss": 0.6678, "step": 18313 }, { "epoch": 0.5346997167965899, "grad_norm": 0.7399815422890953, "learning_rate": 2.585077047850771e-06, "loss": 0.6962, "step": 18314 }, { "epoch": 0.5347289130244373, "grad_norm": 0.7131919590554665, "learning_rate": 2.584914841849149e-06, "loss": 0.6545, "step": 18315 }, { "epoch": 0.5347581092522846, "grad_norm": 0.7668955138476878, "learning_rate": 2.584752635847527e-06, "loss": 0.6718, "step": 18316 }, { "epoch": 0.534787305480132, "grad_norm": 0.7371771966381766, "learning_rate": 2.5845904298459044e-06, "loss": 0.6424, "step": 18317 }, { "epoch": 0.5348165017079793, "grad_norm": 0.680305087110528, "learning_rate": 2.5844282238442824e-06, "loss": 0.5925, "step": 18318 }, { "epoch": 0.5348456979358267, "grad_norm": 0.7464111630594621, "learning_rate": 2.5842660178426604e-06, "loss": 0.6815, "step": 18319 }, { "epoch": 0.5348748941636741, "grad_norm": 0.720040233977794, "learning_rate": 2.5841038118410384e-06, "loss": 0.6468, "step": 18320 }, { "epoch": 0.5349040903915214, "grad_norm": 0.787093796111313, "learning_rate": 2.5839416058394164e-06, "loss": 0.6036, "step": 18321 }, { "epoch": 0.5349332866193688, "grad_norm": 0.745134605201937, "learning_rate": 2.583779399837794e-06, "loss": 0.7268, "step": 18322 }, { "epoch": 0.5349624828472161, "grad_norm": 0.7667524005563684, "learning_rate": 2.583617193836172e-06, "loss": 0.621, "step": 18323 }, { "epoch": 0.5349916790750635, "grad_norm": 0.7122094680635326, "learning_rate": 2.58345498783455e-06, "loss": 0.5898, "step": 18324 }, { "epoch": 0.5350208753029109, "grad_norm": 0.7291254772590928, "learning_rate": 2.583292781832928e-06, "loss": 0.6594, "step": 18325 }, { "epoch": 0.5350500715307582, "grad_norm": 0.820247388098135, "learning_rate": 2.5831305758313056e-06, "loss": 0.6727, "step": 18326 }, { "epoch": 0.5350792677586056, "grad_norm": 0.7260508875065591, "learning_rate": 2.5829683698296836e-06, "loss": 0.6476, "step": 18327 }, { "epoch": 0.535108463986453, "grad_norm": 0.6869080205095378, "learning_rate": 2.5828061638280616e-06, "loss": 0.5788, "step": 18328 }, { "epoch": 0.5351376602143003, "grad_norm": 0.8061849039115121, "learning_rate": 2.58264395782644e-06, "loss": 0.694, "step": 18329 }, { "epoch": 0.5351668564421477, "grad_norm": 0.7587004129890617, "learning_rate": 2.582481751824818e-06, "loss": 0.7029, "step": 18330 }, { "epoch": 0.535196052669995, "grad_norm": 0.7333404703486487, "learning_rate": 2.582319545823196e-06, "loss": 0.6579, "step": 18331 }, { "epoch": 0.5352252488978424, "grad_norm": 0.7400112628517771, "learning_rate": 2.5821573398215736e-06, "loss": 0.6748, "step": 18332 }, { "epoch": 0.5352544451256898, "grad_norm": 0.6838546559241641, "learning_rate": 2.5819951338199516e-06, "loss": 0.6123, "step": 18333 }, { "epoch": 0.5352836413535371, "grad_norm": 0.7124936755955416, "learning_rate": 2.5818329278183297e-06, "loss": 0.6161, "step": 18334 }, { "epoch": 0.5353128375813845, "grad_norm": 0.7204271742803103, "learning_rate": 2.5816707218167077e-06, "loss": 0.6013, "step": 18335 }, { "epoch": 0.5353420338092318, "grad_norm": 0.7436858187746157, "learning_rate": 2.5815085158150852e-06, "loss": 0.6924, "step": 18336 }, { "epoch": 0.5353712300370792, "grad_norm": 0.6599532334500853, "learning_rate": 2.5813463098134633e-06, "loss": 0.5325, "step": 18337 }, { "epoch": 0.5354004262649266, "grad_norm": 0.7074291542879736, "learning_rate": 2.5811841038118413e-06, "loss": 0.626, "step": 18338 }, { "epoch": 0.5354296224927739, "grad_norm": 0.7399869217092072, "learning_rate": 2.5810218978102193e-06, "loss": 0.6418, "step": 18339 }, { "epoch": 0.5354588187206213, "grad_norm": 0.7461139943150986, "learning_rate": 2.5808596918085973e-06, "loss": 0.6651, "step": 18340 }, { "epoch": 0.5354880149484686, "grad_norm": 0.6948551965731776, "learning_rate": 2.580697485806975e-06, "loss": 0.5808, "step": 18341 }, { "epoch": 0.535517211176316, "grad_norm": 0.6526322376580794, "learning_rate": 2.580535279805353e-06, "loss": 0.5899, "step": 18342 }, { "epoch": 0.5355464074041634, "grad_norm": 0.7345117675943605, "learning_rate": 2.580373073803731e-06, "loss": 0.6168, "step": 18343 }, { "epoch": 0.5355756036320107, "grad_norm": 0.729531455446745, "learning_rate": 2.580210867802109e-06, "loss": 0.6542, "step": 18344 }, { "epoch": 0.5356047998598581, "grad_norm": 0.7365337170371524, "learning_rate": 2.5800486618004865e-06, "loss": 0.6554, "step": 18345 }, { "epoch": 0.5356339960877055, "grad_norm": 0.7185932358164396, "learning_rate": 2.5798864557988645e-06, "loss": 0.6628, "step": 18346 }, { "epoch": 0.5356631923155528, "grad_norm": 0.7658282030721224, "learning_rate": 2.5797242497972425e-06, "loss": 0.6356, "step": 18347 }, { "epoch": 0.5356923885434002, "grad_norm": 0.8323189275514411, "learning_rate": 2.579562043795621e-06, "loss": 0.6953, "step": 18348 }, { "epoch": 0.5357215847712475, "grad_norm": 0.7199311874296832, "learning_rate": 2.579399837793999e-06, "loss": 0.5846, "step": 18349 }, { "epoch": 0.5357507809990949, "grad_norm": 0.7534469225785152, "learning_rate": 2.579237631792377e-06, "loss": 0.7026, "step": 18350 }, { "epoch": 0.5357799772269423, "grad_norm": 0.7182670948137677, "learning_rate": 2.5790754257907545e-06, "loss": 0.6391, "step": 18351 }, { "epoch": 0.5358091734547896, "grad_norm": 0.679036480662687, "learning_rate": 2.5789132197891325e-06, "loss": 0.5508, "step": 18352 }, { "epoch": 0.535838369682637, "grad_norm": 0.7320683880236061, "learning_rate": 2.5787510137875105e-06, "loss": 0.6366, "step": 18353 }, { "epoch": 0.5358675659104843, "grad_norm": 0.7258723969526962, "learning_rate": 2.5785888077858885e-06, "loss": 0.6871, "step": 18354 }, { "epoch": 0.5358967621383317, "grad_norm": 0.7421713547155457, "learning_rate": 2.578426601784266e-06, "loss": 0.6716, "step": 18355 }, { "epoch": 0.5359259583661791, "grad_norm": 0.8157270863798528, "learning_rate": 2.578264395782644e-06, "loss": 0.7537, "step": 18356 }, { "epoch": 0.5359551545940264, "grad_norm": 0.7165898004997038, "learning_rate": 2.578102189781022e-06, "loss": 0.631, "step": 18357 }, { "epoch": 0.5359843508218738, "grad_norm": 0.7172001575213025, "learning_rate": 2.5779399837794e-06, "loss": 0.6001, "step": 18358 }, { "epoch": 0.5360135470497212, "grad_norm": 0.748834861110746, "learning_rate": 2.577777777777778e-06, "loss": 0.6175, "step": 18359 }, { "epoch": 0.5360427432775685, "grad_norm": 0.782965308925016, "learning_rate": 2.5776155717761557e-06, "loss": 0.7054, "step": 18360 }, { "epoch": 0.5360719395054159, "grad_norm": 0.7267097334014199, "learning_rate": 2.5774533657745337e-06, "loss": 0.7097, "step": 18361 }, { "epoch": 0.5361011357332632, "grad_norm": 0.75249320793609, "learning_rate": 2.5772911597729117e-06, "loss": 0.6376, "step": 18362 }, { "epoch": 0.5361303319611106, "grad_norm": 0.7272198772961511, "learning_rate": 2.5771289537712897e-06, "loss": 0.6399, "step": 18363 }, { "epoch": 0.536159528188958, "grad_norm": 0.7070586338474474, "learning_rate": 2.5769667477696673e-06, "loss": 0.6264, "step": 18364 }, { "epoch": 0.5361887244168053, "grad_norm": 0.8392893259933814, "learning_rate": 2.5768045417680453e-06, "loss": 0.7274, "step": 18365 }, { "epoch": 0.5362179206446527, "grad_norm": 0.7273963451525179, "learning_rate": 2.5766423357664233e-06, "loss": 0.6451, "step": 18366 }, { "epoch": 0.5362471168725, "grad_norm": 0.7477536588035495, "learning_rate": 2.5764801297648018e-06, "loss": 0.6735, "step": 18367 }, { "epoch": 0.5362763131003474, "grad_norm": 0.8041249000221843, "learning_rate": 2.5763179237631798e-06, "loss": 0.696, "step": 18368 }, { "epoch": 0.5363055093281948, "grad_norm": 0.7195729566992777, "learning_rate": 2.5761557177615578e-06, "loss": 0.6499, "step": 18369 }, { "epoch": 0.5363347055560421, "grad_norm": 0.7629015720804799, "learning_rate": 2.5759935117599354e-06, "loss": 0.6249, "step": 18370 }, { "epoch": 0.5363639017838895, "grad_norm": 0.8306167561148506, "learning_rate": 2.5758313057583134e-06, "loss": 0.5879, "step": 18371 }, { "epoch": 0.5363930980117368, "grad_norm": 0.7272990805965747, "learning_rate": 2.5756690997566914e-06, "loss": 0.6573, "step": 18372 }, { "epoch": 0.5364222942395842, "grad_norm": 0.7877124076610923, "learning_rate": 2.5755068937550694e-06, "loss": 0.7152, "step": 18373 }, { "epoch": 0.5364514904674316, "grad_norm": 0.8051880041557683, "learning_rate": 2.575344687753447e-06, "loss": 0.6697, "step": 18374 }, { "epoch": 0.5364806866952789, "grad_norm": 0.722978536454506, "learning_rate": 2.575182481751825e-06, "loss": 0.6397, "step": 18375 }, { "epoch": 0.5365098829231263, "grad_norm": 0.6923838344041447, "learning_rate": 2.575020275750203e-06, "loss": 0.5831, "step": 18376 }, { "epoch": 0.5365390791509737, "grad_norm": 0.7165661903587568, "learning_rate": 2.574858069748581e-06, "loss": 0.6549, "step": 18377 }, { "epoch": 0.536568275378821, "grad_norm": 0.7305800602556738, "learning_rate": 2.574695863746959e-06, "loss": 0.5751, "step": 18378 }, { "epoch": 0.5365974716066684, "grad_norm": 0.7572030683598043, "learning_rate": 2.5745336577453366e-06, "loss": 0.6956, "step": 18379 }, { "epoch": 0.5366266678345157, "grad_norm": 0.7554759123995625, "learning_rate": 2.5743714517437146e-06, "loss": 0.7107, "step": 18380 }, { "epoch": 0.5366558640623631, "grad_norm": 0.6812810177665494, "learning_rate": 2.5742092457420926e-06, "loss": 0.5145, "step": 18381 }, { "epoch": 0.5366850602902105, "grad_norm": 0.7457098993739175, "learning_rate": 2.5740470397404706e-06, "loss": 0.681, "step": 18382 }, { "epoch": 0.5367142565180578, "grad_norm": 0.725048252057321, "learning_rate": 2.573884833738848e-06, "loss": 0.6654, "step": 18383 }, { "epoch": 0.5367434527459052, "grad_norm": 0.7790599090905095, "learning_rate": 2.573722627737226e-06, "loss": 0.6574, "step": 18384 }, { "epoch": 0.5367726489737525, "grad_norm": 0.6948775736260346, "learning_rate": 2.573560421735604e-06, "loss": 0.6511, "step": 18385 }, { "epoch": 0.5368018452015999, "grad_norm": 0.6891497690431401, "learning_rate": 2.5733982157339826e-06, "loss": 0.6045, "step": 18386 }, { "epoch": 0.5368310414294473, "grad_norm": 0.767216203463739, "learning_rate": 2.5732360097323606e-06, "loss": 0.6828, "step": 18387 }, { "epoch": 0.5368602376572946, "grad_norm": 0.7459127266130311, "learning_rate": 2.5730738037307386e-06, "loss": 0.6823, "step": 18388 }, { "epoch": 0.536889433885142, "grad_norm": 0.7737284367078807, "learning_rate": 2.5729115977291162e-06, "loss": 0.6986, "step": 18389 }, { "epoch": 0.5369186301129893, "grad_norm": 0.6968035853327447, "learning_rate": 2.5727493917274942e-06, "loss": 0.6133, "step": 18390 }, { "epoch": 0.5369478263408367, "grad_norm": 0.689064639291462, "learning_rate": 2.5725871857258722e-06, "loss": 0.6122, "step": 18391 }, { "epoch": 0.5369770225686842, "grad_norm": 0.711007600198226, "learning_rate": 2.5724249797242502e-06, "loss": 0.6315, "step": 18392 }, { "epoch": 0.5370062187965315, "grad_norm": 0.7017097492711315, "learning_rate": 2.572262773722628e-06, "loss": 0.5915, "step": 18393 }, { "epoch": 0.5370354150243789, "grad_norm": 0.7437074300609727, "learning_rate": 2.572100567721006e-06, "loss": 0.7154, "step": 18394 }, { "epoch": 0.5370646112522263, "grad_norm": 0.7537139135144505, "learning_rate": 2.571938361719384e-06, "loss": 0.6382, "step": 18395 }, { "epoch": 0.5370938074800736, "grad_norm": 0.748759819989249, "learning_rate": 2.571776155717762e-06, "loss": 0.6772, "step": 18396 }, { "epoch": 0.537123003707921, "grad_norm": 0.7034658102143254, "learning_rate": 2.57161394971614e-06, "loss": 0.624, "step": 18397 }, { "epoch": 0.5371521999357683, "grad_norm": 0.7707236579785537, "learning_rate": 2.5714517437145174e-06, "loss": 0.7229, "step": 18398 }, { "epoch": 0.5371813961636157, "grad_norm": 0.6964836680721257, "learning_rate": 2.5712895377128954e-06, "loss": 0.583, "step": 18399 }, { "epoch": 0.5372105923914631, "grad_norm": 0.699998880642379, "learning_rate": 2.5711273317112734e-06, "loss": 0.5731, "step": 18400 }, { "epoch": 0.5372397886193104, "grad_norm": 0.7175677476204839, "learning_rate": 2.5709651257096515e-06, "loss": 0.6444, "step": 18401 }, { "epoch": 0.5372689848471578, "grad_norm": 0.7486351330848559, "learning_rate": 2.570802919708029e-06, "loss": 0.6821, "step": 18402 }, { "epoch": 0.5372981810750052, "grad_norm": 0.7569227571980387, "learning_rate": 2.570640713706407e-06, "loss": 0.693, "step": 18403 }, { "epoch": 0.5373273773028525, "grad_norm": 0.7538520701843198, "learning_rate": 2.570478507704785e-06, "loss": 0.7074, "step": 18404 }, { "epoch": 0.5373565735306999, "grad_norm": 0.7561224366073233, "learning_rate": 2.5703163017031635e-06, "loss": 0.6564, "step": 18405 }, { "epoch": 0.5373857697585472, "grad_norm": 0.7598385315773508, "learning_rate": 2.5701540957015415e-06, "loss": 0.6872, "step": 18406 }, { "epoch": 0.5374149659863946, "grad_norm": 0.7950419773707996, "learning_rate": 2.5699918896999195e-06, "loss": 0.7202, "step": 18407 }, { "epoch": 0.537444162214242, "grad_norm": 0.7258821416579808, "learning_rate": 2.569829683698297e-06, "loss": 0.6567, "step": 18408 }, { "epoch": 0.5374733584420893, "grad_norm": 0.7555074748591258, "learning_rate": 2.569667477696675e-06, "loss": 0.6876, "step": 18409 }, { "epoch": 0.5375025546699367, "grad_norm": 0.703696710838932, "learning_rate": 2.569505271695053e-06, "loss": 0.5988, "step": 18410 }, { "epoch": 0.537531750897784, "grad_norm": 0.7730347329641137, "learning_rate": 2.569343065693431e-06, "loss": 0.6281, "step": 18411 }, { "epoch": 0.5375609471256314, "grad_norm": 0.7021215474047187, "learning_rate": 2.5691808596918087e-06, "loss": 0.6428, "step": 18412 }, { "epoch": 0.5375901433534788, "grad_norm": 0.7972040575714586, "learning_rate": 2.5690186536901867e-06, "loss": 0.6728, "step": 18413 }, { "epoch": 0.5376193395813261, "grad_norm": 0.7114710209976488, "learning_rate": 2.5688564476885647e-06, "loss": 0.6541, "step": 18414 }, { "epoch": 0.5376485358091735, "grad_norm": 0.6991249546315325, "learning_rate": 2.5686942416869427e-06, "loss": 0.614, "step": 18415 }, { "epoch": 0.5376777320370209, "grad_norm": 0.7115341038913063, "learning_rate": 2.5685320356853207e-06, "loss": 0.6159, "step": 18416 }, { "epoch": 0.5377069282648682, "grad_norm": 0.7272474468553254, "learning_rate": 2.5683698296836983e-06, "loss": 0.6129, "step": 18417 }, { "epoch": 0.5377361244927156, "grad_norm": 0.680886887063953, "learning_rate": 2.5682076236820763e-06, "loss": 0.6265, "step": 18418 }, { "epoch": 0.5377653207205629, "grad_norm": 0.658015233763512, "learning_rate": 2.5680454176804543e-06, "loss": 0.5302, "step": 18419 }, { "epoch": 0.5377945169484103, "grad_norm": 0.7919361342343281, "learning_rate": 2.5678832116788323e-06, "loss": 0.6432, "step": 18420 }, { "epoch": 0.5378237131762577, "grad_norm": 0.6962179956899789, "learning_rate": 2.56772100567721e-06, "loss": 0.6458, "step": 18421 }, { "epoch": 0.537852909404105, "grad_norm": 0.7544768947084749, "learning_rate": 2.567558799675588e-06, "loss": 0.6955, "step": 18422 }, { "epoch": 0.5378821056319524, "grad_norm": 0.8102195638572899, "learning_rate": 2.567396593673966e-06, "loss": 0.7443, "step": 18423 }, { "epoch": 0.5379113018597997, "grad_norm": 0.7462875765333657, "learning_rate": 2.5672343876723443e-06, "loss": 0.6619, "step": 18424 }, { "epoch": 0.5379404980876471, "grad_norm": 0.7617312455404167, "learning_rate": 2.5670721816707223e-06, "loss": 0.6938, "step": 18425 }, { "epoch": 0.5379696943154945, "grad_norm": 1.271773790228973, "learning_rate": 2.5669099756691004e-06, "loss": 0.6998, "step": 18426 }, { "epoch": 0.5379988905433418, "grad_norm": 0.7620730983234186, "learning_rate": 2.566747769667478e-06, "loss": 0.7201, "step": 18427 }, { "epoch": 0.5380280867711892, "grad_norm": 0.7308163709806225, "learning_rate": 2.566585563665856e-06, "loss": 0.6118, "step": 18428 }, { "epoch": 0.5380572829990365, "grad_norm": 0.7458710385205245, "learning_rate": 2.566423357664234e-06, "loss": 0.6765, "step": 18429 }, { "epoch": 0.5380864792268839, "grad_norm": 0.7005514502219218, "learning_rate": 2.566261151662612e-06, "loss": 0.6266, "step": 18430 }, { "epoch": 0.5381156754547313, "grad_norm": 0.7117068908816212, "learning_rate": 2.5660989456609895e-06, "loss": 0.5917, "step": 18431 }, { "epoch": 0.5381448716825786, "grad_norm": 0.6856354243057011, "learning_rate": 2.5659367396593675e-06, "loss": 0.5959, "step": 18432 }, { "epoch": 0.538174067910426, "grad_norm": 0.7306610968724355, "learning_rate": 2.5657745336577456e-06, "loss": 0.6208, "step": 18433 }, { "epoch": 0.5382032641382734, "grad_norm": 0.7419599279832065, "learning_rate": 2.5656123276561236e-06, "loss": 0.6737, "step": 18434 }, { "epoch": 0.5382324603661207, "grad_norm": 0.8340307923086058, "learning_rate": 2.5654501216545016e-06, "loss": 0.6862, "step": 18435 }, { "epoch": 0.5382616565939681, "grad_norm": 0.7335046432719595, "learning_rate": 2.565287915652879e-06, "loss": 0.6291, "step": 18436 }, { "epoch": 0.5382908528218154, "grad_norm": 0.7456452555066072, "learning_rate": 2.565125709651257e-06, "loss": 0.7074, "step": 18437 }, { "epoch": 0.5383200490496628, "grad_norm": 0.7962715171872368, "learning_rate": 2.564963503649635e-06, "loss": 0.687, "step": 18438 }, { "epoch": 0.5383492452775102, "grad_norm": 0.7438081393951395, "learning_rate": 2.564801297648013e-06, "loss": 0.6427, "step": 18439 }, { "epoch": 0.5383784415053575, "grad_norm": 0.7416975617908813, "learning_rate": 2.5646390916463908e-06, "loss": 0.692, "step": 18440 }, { "epoch": 0.5384076377332049, "grad_norm": 0.6991089810490754, "learning_rate": 2.5644768856447688e-06, "loss": 0.6237, "step": 18441 }, { "epoch": 0.5384368339610522, "grad_norm": 0.6941605306671842, "learning_rate": 2.5643146796431468e-06, "loss": 0.6065, "step": 18442 }, { "epoch": 0.5384660301888996, "grad_norm": 0.7257987525569528, "learning_rate": 2.564152473641525e-06, "loss": 0.6458, "step": 18443 }, { "epoch": 0.538495226416747, "grad_norm": 0.691704946089105, "learning_rate": 2.563990267639903e-06, "loss": 0.582, "step": 18444 }, { "epoch": 0.5385244226445943, "grad_norm": 0.7304520240358557, "learning_rate": 2.563828061638281e-06, "loss": 0.6513, "step": 18445 }, { "epoch": 0.5385536188724417, "grad_norm": 0.6753687508426912, "learning_rate": 2.563665855636659e-06, "loss": 0.6276, "step": 18446 }, { "epoch": 0.538582815100289, "grad_norm": 0.7672533574426489, "learning_rate": 2.563503649635037e-06, "loss": 0.6846, "step": 18447 }, { "epoch": 0.5386120113281364, "grad_norm": 0.6547020058709391, "learning_rate": 2.563341443633415e-06, "loss": 0.5408, "step": 18448 }, { "epoch": 0.5386412075559838, "grad_norm": 0.7110142048280162, "learning_rate": 2.563179237631793e-06, "loss": 0.6011, "step": 18449 }, { "epoch": 0.5386704037838311, "grad_norm": 0.755919694160922, "learning_rate": 2.5630170316301704e-06, "loss": 0.6637, "step": 18450 }, { "epoch": 0.5386996000116785, "grad_norm": 0.6944004819725398, "learning_rate": 2.5628548256285484e-06, "loss": 0.6247, "step": 18451 }, { "epoch": 0.5387287962395259, "grad_norm": 0.7794322744674547, "learning_rate": 2.5626926196269264e-06, "loss": 0.757, "step": 18452 }, { "epoch": 0.5387579924673732, "grad_norm": 0.786257123067269, "learning_rate": 2.5625304136253044e-06, "loss": 0.5543, "step": 18453 }, { "epoch": 0.5387871886952206, "grad_norm": 0.7621675070696345, "learning_rate": 2.5623682076236824e-06, "loss": 0.6247, "step": 18454 }, { "epoch": 0.5388163849230679, "grad_norm": 0.7256753899115725, "learning_rate": 2.56220600162206e-06, "loss": 0.6119, "step": 18455 }, { "epoch": 0.5388455811509153, "grad_norm": 0.7528543793452314, "learning_rate": 2.562043795620438e-06, "loss": 0.7557, "step": 18456 }, { "epoch": 0.5388747773787627, "grad_norm": 0.6984574610948666, "learning_rate": 2.561881589618816e-06, "loss": 0.6, "step": 18457 }, { "epoch": 0.53890397360661, "grad_norm": 0.6687500540621202, "learning_rate": 2.561719383617194e-06, "loss": 0.5751, "step": 18458 }, { "epoch": 0.5389331698344574, "grad_norm": 0.7328885150177084, "learning_rate": 2.5615571776155716e-06, "loss": 0.5601, "step": 18459 }, { "epoch": 0.5389623660623047, "grad_norm": 0.6989882171865868, "learning_rate": 2.5613949716139496e-06, "loss": 0.6234, "step": 18460 }, { "epoch": 0.5389915622901521, "grad_norm": 0.858263375732467, "learning_rate": 2.561232765612328e-06, "loss": 0.5856, "step": 18461 }, { "epoch": 0.5390207585179995, "grad_norm": 0.6885335781018072, "learning_rate": 2.561070559610706e-06, "loss": 0.591, "step": 18462 }, { "epoch": 0.5390499547458468, "grad_norm": 0.7208711238499402, "learning_rate": 2.560908353609084e-06, "loss": 0.6836, "step": 18463 }, { "epoch": 0.5390791509736942, "grad_norm": 0.7316142569064942, "learning_rate": 2.560746147607462e-06, "loss": 0.6354, "step": 18464 }, { "epoch": 0.5391083472015415, "grad_norm": 0.7355652693500986, "learning_rate": 2.5605839416058397e-06, "loss": 0.6501, "step": 18465 }, { "epoch": 0.5391375434293889, "grad_norm": 0.7149074913827211, "learning_rate": 2.5604217356042177e-06, "loss": 0.6206, "step": 18466 }, { "epoch": 0.5391667396572363, "grad_norm": 0.7494142466325314, "learning_rate": 2.5602595296025957e-06, "loss": 0.7142, "step": 18467 }, { "epoch": 0.5391959358850836, "grad_norm": 0.8964320301108569, "learning_rate": 2.5600973236009737e-06, "loss": 0.6772, "step": 18468 }, { "epoch": 0.539225132112931, "grad_norm": 0.7270623625260281, "learning_rate": 2.5599351175993513e-06, "loss": 0.6381, "step": 18469 }, { "epoch": 0.5392543283407784, "grad_norm": 0.698620952999079, "learning_rate": 2.5597729115977293e-06, "loss": 0.6588, "step": 18470 }, { "epoch": 0.5392835245686257, "grad_norm": 0.7530119955238004, "learning_rate": 2.5596107055961073e-06, "loss": 0.6925, "step": 18471 }, { "epoch": 0.5393127207964731, "grad_norm": 0.7620982600375171, "learning_rate": 2.5594484995944853e-06, "loss": 0.7333, "step": 18472 }, { "epoch": 0.5393419170243204, "grad_norm": 0.7107149736479648, "learning_rate": 2.559286293592863e-06, "loss": 0.6538, "step": 18473 }, { "epoch": 0.5393711132521678, "grad_norm": 0.7236299314807803, "learning_rate": 2.559124087591241e-06, "loss": 0.6597, "step": 18474 }, { "epoch": 0.5394003094800152, "grad_norm": 0.807437287136007, "learning_rate": 2.558961881589619e-06, "loss": 0.6662, "step": 18475 }, { "epoch": 0.5394295057078625, "grad_norm": 0.7124485913396541, "learning_rate": 2.558799675587997e-06, "loss": 0.6692, "step": 18476 }, { "epoch": 0.5394587019357099, "grad_norm": 0.7199556718737425, "learning_rate": 2.558637469586375e-06, "loss": 0.6323, "step": 18477 }, { "epoch": 0.5394878981635572, "grad_norm": 0.8185081924220428, "learning_rate": 2.5584752635847525e-06, "loss": 0.7754, "step": 18478 }, { "epoch": 0.5395170943914046, "grad_norm": 0.7685755271224455, "learning_rate": 2.5583130575831305e-06, "loss": 0.695, "step": 18479 }, { "epoch": 0.539546290619252, "grad_norm": 0.7606895117940897, "learning_rate": 2.558150851581509e-06, "loss": 0.7035, "step": 18480 }, { "epoch": 0.5395754868470993, "grad_norm": 0.7331380074486146, "learning_rate": 2.557988645579887e-06, "loss": 0.6225, "step": 18481 }, { "epoch": 0.5396046830749467, "grad_norm": 0.7600458939052571, "learning_rate": 2.557826439578265e-06, "loss": 0.6943, "step": 18482 }, { "epoch": 0.539633879302794, "grad_norm": 0.6916905863004371, "learning_rate": 2.557664233576643e-06, "loss": 0.6, "step": 18483 }, { "epoch": 0.5396630755306414, "grad_norm": 0.7748480542762719, "learning_rate": 2.5575020275750205e-06, "loss": 0.6708, "step": 18484 }, { "epoch": 0.5396922717584888, "grad_norm": 0.702069887312018, "learning_rate": 2.5573398215733985e-06, "loss": 0.6517, "step": 18485 }, { "epoch": 0.5397214679863361, "grad_norm": 0.7316053330756157, "learning_rate": 2.5571776155717765e-06, "loss": 0.6637, "step": 18486 }, { "epoch": 0.5397506642141835, "grad_norm": 0.6757612492427886, "learning_rate": 2.5570154095701545e-06, "loss": 0.6045, "step": 18487 }, { "epoch": 0.5397798604420309, "grad_norm": 0.6812250594636351, "learning_rate": 2.556853203568532e-06, "loss": 0.5741, "step": 18488 }, { "epoch": 0.5398090566698782, "grad_norm": 0.678622029314897, "learning_rate": 2.55669099756691e-06, "loss": 0.5597, "step": 18489 }, { "epoch": 0.5398382528977256, "grad_norm": 0.7229344731283861, "learning_rate": 2.556528791565288e-06, "loss": 0.6339, "step": 18490 }, { "epoch": 0.5398674491255729, "grad_norm": 0.690737114228196, "learning_rate": 2.556366585563666e-06, "loss": 0.6167, "step": 18491 }, { "epoch": 0.5398966453534203, "grad_norm": 0.7048810601891036, "learning_rate": 2.5562043795620437e-06, "loss": 0.6349, "step": 18492 }, { "epoch": 0.5399258415812677, "grad_norm": 0.7451279206657394, "learning_rate": 2.5560421735604217e-06, "loss": 0.6769, "step": 18493 }, { "epoch": 0.539955037809115, "grad_norm": 0.6820043971210827, "learning_rate": 2.5558799675587997e-06, "loss": 0.5873, "step": 18494 }, { "epoch": 0.5399842340369624, "grad_norm": 0.7522194165726825, "learning_rate": 2.5557177615571777e-06, "loss": 0.6069, "step": 18495 }, { "epoch": 0.5400134302648097, "grad_norm": 0.7150389767941492, "learning_rate": 2.5555555555555557e-06, "loss": 0.6219, "step": 18496 }, { "epoch": 0.5400426264926571, "grad_norm": 0.7097469948507238, "learning_rate": 2.5553933495539333e-06, "loss": 0.6615, "step": 18497 }, { "epoch": 0.5400718227205045, "grad_norm": 0.7485025254949323, "learning_rate": 2.5552311435523113e-06, "loss": 0.6355, "step": 18498 }, { "epoch": 0.5401010189483518, "grad_norm": 0.6701345573069444, "learning_rate": 2.5550689375506898e-06, "loss": 0.567, "step": 18499 }, { "epoch": 0.5401302151761992, "grad_norm": 0.6604160319697363, "learning_rate": 2.5549067315490678e-06, "loss": 0.5169, "step": 18500 }, { "epoch": 0.5401594114040466, "grad_norm": 0.7322345736739494, "learning_rate": 2.5547445255474458e-06, "loss": 0.6353, "step": 18501 }, { "epoch": 0.5401886076318939, "grad_norm": 0.7689217794456314, "learning_rate": 2.5545823195458238e-06, "loss": 0.6974, "step": 18502 }, { "epoch": 0.5402178038597413, "grad_norm": 0.6958225995247653, "learning_rate": 2.5544201135442014e-06, "loss": 0.6059, "step": 18503 }, { "epoch": 0.5402470000875886, "grad_norm": 0.7433001234415674, "learning_rate": 2.5542579075425794e-06, "loss": 0.6049, "step": 18504 }, { "epoch": 0.540276196315436, "grad_norm": 0.7144544012504701, "learning_rate": 2.5540957015409574e-06, "loss": 0.646, "step": 18505 }, { "epoch": 0.5403053925432834, "grad_norm": 0.7437720569250269, "learning_rate": 2.5539334955393354e-06, "loss": 0.6868, "step": 18506 }, { "epoch": 0.5403345887711307, "grad_norm": 0.7148285924696497, "learning_rate": 2.553771289537713e-06, "loss": 0.6338, "step": 18507 }, { "epoch": 0.5403637849989781, "grad_norm": 0.8517265384648305, "learning_rate": 2.553609083536091e-06, "loss": 0.7213, "step": 18508 }, { "epoch": 0.5403929812268254, "grad_norm": 0.7145368630291703, "learning_rate": 2.553446877534469e-06, "loss": 0.5896, "step": 18509 }, { "epoch": 0.5404221774546728, "grad_norm": 0.7043351673805144, "learning_rate": 2.553284671532847e-06, "loss": 0.6381, "step": 18510 }, { "epoch": 0.5404513736825202, "grad_norm": 0.6534319837332527, "learning_rate": 2.5531224655312246e-06, "loss": 0.534, "step": 18511 }, { "epoch": 0.5404805699103675, "grad_norm": 0.8435605341418478, "learning_rate": 2.5529602595296026e-06, "loss": 0.6735, "step": 18512 }, { "epoch": 0.540509766138215, "grad_norm": 0.7056553687539393, "learning_rate": 2.5527980535279806e-06, "loss": 0.6026, "step": 18513 }, { "epoch": 0.5405389623660624, "grad_norm": 0.6650061126465633, "learning_rate": 2.5526358475263586e-06, "loss": 0.5581, "step": 18514 }, { "epoch": 0.5405681585939097, "grad_norm": 0.7140596268587884, "learning_rate": 2.5524736415247366e-06, "loss": 0.5611, "step": 18515 }, { "epoch": 0.5405973548217571, "grad_norm": 0.6700191737009692, "learning_rate": 2.552311435523114e-06, "loss": 0.584, "step": 18516 }, { "epoch": 0.5406265510496044, "grad_norm": 0.7633347203795453, "learning_rate": 2.552149229521492e-06, "loss": 0.6526, "step": 18517 }, { "epoch": 0.5406557472774518, "grad_norm": 1.0800850888643097, "learning_rate": 2.5519870235198706e-06, "loss": 0.7219, "step": 18518 }, { "epoch": 0.5406849435052992, "grad_norm": 0.7576099874776513, "learning_rate": 2.5518248175182486e-06, "loss": 0.6866, "step": 18519 }, { "epoch": 0.5407141397331465, "grad_norm": 0.7451023963689853, "learning_rate": 2.5516626115166266e-06, "loss": 0.6886, "step": 18520 }, { "epoch": 0.5407433359609939, "grad_norm": 0.6979751365389729, "learning_rate": 2.5515004055150046e-06, "loss": 0.6413, "step": 18521 }, { "epoch": 0.5407725321888412, "grad_norm": 0.7257997758628542, "learning_rate": 2.5513381995133822e-06, "loss": 0.6287, "step": 18522 }, { "epoch": 0.5408017284166886, "grad_norm": 0.6952250470650895, "learning_rate": 2.5511759935117602e-06, "loss": 0.6105, "step": 18523 }, { "epoch": 0.540830924644536, "grad_norm": 0.714720397018011, "learning_rate": 2.5510137875101382e-06, "loss": 0.6325, "step": 18524 }, { "epoch": 0.5408601208723833, "grad_norm": 0.7479434263001769, "learning_rate": 2.5508515815085162e-06, "loss": 0.7186, "step": 18525 }, { "epoch": 0.5408893171002307, "grad_norm": 0.7123121906419889, "learning_rate": 2.550689375506894e-06, "loss": 0.6353, "step": 18526 }, { "epoch": 0.540918513328078, "grad_norm": 0.7910658022794602, "learning_rate": 2.550527169505272e-06, "loss": 0.7403, "step": 18527 }, { "epoch": 0.5409477095559254, "grad_norm": 0.7478633998225066, "learning_rate": 2.55036496350365e-06, "loss": 0.6738, "step": 18528 }, { "epoch": 0.5409769057837728, "grad_norm": 0.8264535986879965, "learning_rate": 2.550202757502028e-06, "loss": 0.7145, "step": 18529 }, { "epoch": 0.5410061020116201, "grad_norm": 0.7512370634036035, "learning_rate": 2.5500405515004054e-06, "loss": 0.6609, "step": 18530 }, { "epoch": 0.5410352982394675, "grad_norm": 0.7008758643626086, "learning_rate": 2.5498783454987834e-06, "loss": 0.594, "step": 18531 }, { "epoch": 0.5410644944673149, "grad_norm": 0.7593778184968049, "learning_rate": 2.5497161394971614e-06, "loss": 0.6786, "step": 18532 }, { "epoch": 0.5410936906951622, "grad_norm": 0.6473405195699506, "learning_rate": 2.5495539334955395e-06, "loss": 0.5212, "step": 18533 }, { "epoch": 0.5411228869230096, "grad_norm": 0.7759383945066075, "learning_rate": 2.5493917274939175e-06, "loss": 0.6714, "step": 18534 }, { "epoch": 0.5411520831508569, "grad_norm": 0.6488994222037836, "learning_rate": 2.549229521492295e-06, "loss": 0.544, "step": 18535 }, { "epoch": 0.5411812793787043, "grad_norm": 0.7431855039280821, "learning_rate": 2.549067315490673e-06, "loss": 0.6427, "step": 18536 }, { "epoch": 0.5412104756065517, "grad_norm": 0.6895539146069817, "learning_rate": 2.5489051094890515e-06, "loss": 0.6379, "step": 18537 }, { "epoch": 0.541239671834399, "grad_norm": 0.7590820630345863, "learning_rate": 2.5487429034874295e-06, "loss": 0.7173, "step": 18538 }, { "epoch": 0.5412688680622464, "grad_norm": 0.6507805939997494, "learning_rate": 2.5485806974858075e-06, "loss": 0.5706, "step": 18539 }, { "epoch": 0.5412980642900938, "grad_norm": 0.7098218562754844, "learning_rate": 2.5484184914841855e-06, "loss": 0.6504, "step": 18540 }, { "epoch": 0.5413272605179411, "grad_norm": 0.7143831864829077, "learning_rate": 2.548256285482563e-06, "loss": 0.6019, "step": 18541 }, { "epoch": 0.5413564567457885, "grad_norm": 0.8719157395685888, "learning_rate": 2.548094079480941e-06, "loss": 0.6605, "step": 18542 }, { "epoch": 0.5413856529736358, "grad_norm": 0.7233578036959489, "learning_rate": 2.547931873479319e-06, "loss": 0.577, "step": 18543 }, { "epoch": 0.5414148492014832, "grad_norm": 0.6954977180176981, "learning_rate": 2.547769667477697e-06, "loss": 0.5728, "step": 18544 }, { "epoch": 0.5414440454293306, "grad_norm": 0.7216835177792639, "learning_rate": 2.5476074614760747e-06, "loss": 0.627, "step": 18545 }, { "epoch": 0.5414732416571779, "grad_norm": 0.7578743242311878, "learning_rate": 2.5474452554744527e-06, "loss": 0.6726, "step": 18546 }, { "epoch": 0.5415024378850253, "grad_norm": 0.7361960184376478, "learning_rate": 2.5472830494728307e-06, "loss": 0.6783, "step": 18547 }, { "epoch": 0.5415316341128726, "grad_norm": 0.6856385846476453, "learning_rate": 2.5471208434712087e-06, "loss": 0.557, "step": 18548 }, { "epoch": 0.54156083034072, "grad_norm": 0.6610588779908684, "learning_rate": 2.5469586374695863e-06, "loss": 0.5063, "step": 18549 }, { "epoch": 0.5415900265685674, "grad_norm": 0.7662030196634158, "learning_rate": 2.5467964314679643e-06, "loss": 0.5972, "step": 18550 }, { "epoch": 0.5416192227964147, "grad_norm": 0.7261669377048089, "learning_rate": 2.5466342254663423e-06, "loss": 0.6684, "step": 18551 }, { "epoch": 0.5416484190242621, "grad_norm": 0.7437387134165682, "learning_rate": 2.5464720194647203e-06, "loss": 0.7, "step": 18552 }, { "epoch": 0.5416776152521094, "grad_norm": 0.6801549973148141, "learning_rate": 2.5463098134630983e-06, "loss": 0.5834, "step": 18553 }, { "epoch": 0.5417068114799568, "grad_norm": 0.6810071344299636, "learning_rate": 2.546147607461476e-06, "loss": 0.5883, "step": 18554 }, { "epoch": 0.5417360077078042, "grad_norm": 0.6758926879954087, "learning_rate": 2.545985401459854e-06, "loss": 0.5725, "step": 18555 }, { "epoch": 0.5417652039356515, "grad_norm": 0.7246985142135478, "learning_rate": 2.5458231954582323e-06, "loss": 0.7143, "step": 18556 }, { "epoch": 0.5417944001634989, "grad_norm": 0.8001480561831484, "learning_rate": 2.5456609894566103e-06, "loss": 0.723, "step": 18557 }, { "epoch": 0.5418235963913463, "grad_norm": 0.7112494483320116, "learning_rate": 2.5454987834549884e-06, "loss": 0.6296, "step": 18558 }, { "epoch": 0.5418527926191936, "grad_norm": 0.777007850075732, "learning_rate": 2.5453365774533664e-06, "loss": 0.6122, "step": 18559 }, { "epoch": 0.541881988847041, "grad_norm": 0.7249736855815634, "learning_rate": 2.545174371451744e-06, "loss": 0.5945, "step": 18560 }, { "epoch": 0.5419111850748883, "grad_norm": 0.7007369639400021, "learning_rate": 2.545012165450122e-06, "loss": 0.605, "step": 18561 }, { "epoch": 0.5419403813027357, "grad_norm": 0.8152945245645176, "learning_rate": 2.5448499594485e-06, "loss": 0.7628, "step": 18562 }, { "epoch": 0.5419695775305831, "grad_norm": 0.7179450122409305, "learning_rate": 2.544687753446878e-06, "loss": 0.5982, "step": 18563 }, { "epoch": 0.5419987737584304, "grad_norm": 0.7987284172146567, "learning_rate": 2.5445255474452555e-06, "loss": 0.6975, "step": 18564 }, { "epoch": 0.5420279699862778, "grad_norm": 0.6792810041855453, "learning_rate": 2.5443633414436336e-06, "loss": 0.5905, "step": 18565 }, { "epoch": 0.5420571662141251, "grad_norm": 0.6773717215264247, "learning_rate": 2.5442011354420116e-06, "loss": 0.6054, "step": 18566 }, { "epoch": 0.5420863624419725, "grad_norm": 0.7775154060089637, "learning_rate": 2.5440389294403896e-06, "loss": 0.7374, "step": 18567 }, { "epoch": 0.5421155586698199, "grad_norm": 0.6936296267931922, "learning_rate": 2.543876723438767e-06, "loss": 0.6306, "step": 18568 }, { "epoch": 0.5421447548976672, "grad_norm": 0.7494101918702649, "learning_rate": 2.543714517437145e-06, "loss": 0.7373, "step": 18569 }, { "epoch": 0.5421739511255146, "grad_norm": 0.7259101890723624, "learning_rate": 2.543552311435523e-06, "loss": 0.6197, "step": 18570 }, { "epoch": 0.542203147353362, "grad_norm": 0.7451420021298556, "learning_rate": 2.543390105433901e-06, "loss": 0.66, "step": 18571 }, { "epoch": 0.5422323435812093, "grad_norm": 0.7215598576000642, "learning_rate": 2.543227899432279e-06, "loss": 0.5857, "step": 18572 }, { "epoch": 0.5422615398090567, "grad_norm": 0.7072301777434171, "learning_rate": 2.5430656934306568e-06, "loss": 0.6195, "step": 18573 }, { "epoch": 0.542290736036904, "grad_norm": 0.678122063696545, "learning_rate": 2.5429034874290348e-06, "loss": 0.606, "step": 18574 }, { "epoch": 0.5423199322647514, "grad_norm": 0.7389145190499572, "learning_rate": 2.542741281427413e-06, "loss": 0.6001, "step": 18575 }, { "epoch": 0.5423491284925988, "grad_norm": 0.738181552937458, "learning_rate": 2.542579075425791e-06, "loss": 0.6516, "step": 18576 }, { "epoch": 0.5423783247204461, "grad_norm": 0.7130568374312664, "learning_rate": 2.5424168694241692e-06, "loss": 0.6361, "step": 18577 }, { "epoch": 0.5424075209482935, "grad_norm": 0.6857447030949197, "learning_rate": 2.542254663422547e-06, "loss": 0.6051, "step": 18578 }, { "epoch": 0.5424367171761408, "grad_norm": 0.7347137307013846, "learning_rate": 2.542092457420925e-06, "loss": 0.6332, "step": 18579 }, { "epoch": 0.5424659134039882, "grad_norm": 0.7238558192970417, "learning_rate": 2.541930251419303e-06, "loss": 0.6551, "step": 18580 }, { "epoch": 0.5424951096318356, "grad_norm": 0.765402780122382, "learning_rate": 2.541768045417681e-06, "loss": 0.5587, "step": 18581 }, { "epoch": 0.5425243058596829, "grad_norm": 0.7265294975599842, "learning_rate": 2.541605839416059e-06, "loss": 0.6405, "step": 18582 }, { "epoch": 0.5425535020875303, "grad_norm": 0.8252593360502645, "learning_rate": 2.5414436334144364e-06, "loss": 0.7665, "step": 18583 }, { "epoch": 0.5425826983153776, "grad_norm": 0.6895058296015079, "learning_rate": 2.5412814274128144e-06, "loss": 0.5955, "step": 18584 }, { "epoch": 0.542611894543225, "grad_norm": 0.7566611279318785, "learning_rate": 2.5411192214111924e-06, "loss": 0.6959, "step": 18585 }, { "epoch": 0.5426410907710724, "grad_norm": 0.7208257584132011, "learning_rate": 2.5409570154095704e-06, "loss": 0.5948, "step": 18586 }, { "epoch": 0.5426702869989197, "grad_norm": 0.6889963769110296, "learning_rate": 2.540794809407948e-06, "loss": 0.5653, "step": 18587 }, { "epoch": 0.5426994832267671, "grad_norm": 0.6996633235829742, "learning_rate": 2.540632603406326e-06, "loss": 0.5858, "step": 18588 }, { "epoch": 0.5427286794546144, "grad_norm": 0.7167320223332466, "learning_rate": 2.540470397404704e-06, "loss": 0.6385, "step": 18589 }, { "epoch": 0.5427578756824618, "grad_norm": 0.6603386530596401, "learning_rate": 2.540308191403082e-06, "loss": 0.5486, "step": 18590 }, { "epoch": 0.5427870719103092, "grad_norm": 0.7745340452989792, "learning_rate": 2.54014598540146e-06, "loss": 0.6889, "step": 18591 }, { "epoch": 0.5428162681381565, "grad_norm": 0.7031966392949169, "learning_rate": 2.5399837793998376e-06, "loss": 0.6299, "step": 18592 }, { "epoch": 0.5428454643660039, "grad_norm": 0.7705382447403313, "learning_rate": 2.5398215733982156e-06, "loss": 0.6921, "step": 18593 }, { "epoch": 0.5428746605938513, "grad_norm": 0.6965925919626572, "learning_rate": 2.539659367396594e-06, "loss": 0.5887, "step": 18594 }, { "epoch": 0.5429038568216986, "grad_norm": 0.7318221423374148, "learning_rate": 2.539497161394972e-06, "loss": 0.6504, "step": 18595 }, { "epoch": 0.542933053049546, "grad_norm": 0.7746929353134777, "learning_rate": 2.53933495539335e-06, "loss": 0.6683, "step": 18596 }, { "epoch": 0.5429622492773933, "grad_norm": 0.7053202646907853, "learning_rate": 2.5391727493917277e-06, "loss": 0.5833, "step": 18597 }, { "epoch": 0.5429914455052407, "grad_norm": 0.7111258386812372, "learning_rate": 2.5390105433901057e-06, "loss": 0.6216, "step": 18598 }, { "epoch": 0.5430206417330881, "grad_norm": 0.7304625597743076, "learning_rate": 2.5388483373884837e-06, "loss": 0.6956, "step": 18599 }, { "epoch": 0.5430498379609354, "grad_norm": 0.7009271344248327, "learning_rate": 2.5386861313868617e-06, "loss": 0.5709, "step": 18600 }, { "epoch": 0.5430790341887828, "grad_norm": 0.9005478098111085, "learning_rate": 2.5385239253852397e-06, "loss": 0.6158, "step": 18601 }, { "epoch": 0.5431082304166301, "grad_norm": 0.7074180182258146, "learning_rate": 2.5383617193836173e-06, "loss": 0.6359, "step": 18602 }, { "epoch": 0.5431374266444775, "grad_norm": 0.7040127233716597, "learning_rate": 2.5381995133819953e-06, "loss": 0.6018, "step": 18603 }, { "epoch": 0.5431666228723249, "grad_norm": 0.6863886350199108, "learning_rate": 2.5380373073803733e-06, "loss": 0.5923, "step": 18604 }, { "epoch": 0.5431958191001722, "grad_norm": 0.7415290672262305, "learning_rate": 2.5378751013787513e-06, "loss": 0.6843, "step": 18605 }, { "epoch": 0.5432250153280196, "grad_norm": 0.7371122459606633, "learning_rate": 2.537712895377129e-06, "loss": 0.6318, "step": 18606 }, { "epoch": 0.543254211555867, "grad_norm": 0.7001347391852908, "learning_rate": 2.537550689375507e-06, "loss": 0.6236, "step": 18607 }, { "epoch": 0.5432834077837143, "grad_norm": 0.7336262070350611, "learning_rate": 2.537388483373885e-06, "loss": 0.6722, "step": 18608 }, { "epoch": 0.5433126040115617, "grad_norm": 0.7471755157099959, "learning_rate": 2.537226277372263e-06, "loss": 0.7142, "step": 18609 }, { "epoch": 0.543341800239409, "grad_norm": 0.769849201176722, "learning_rate": 2.537064071370641e-06, "loss": 0.6416, "step": 18610 }, { "epoch": 0.5433709964672564, "grad_norm": 0.7837832131980205, "learning_rate": 2.5369018653690185e-06, "loss": 0.747, "step": 18611 }, { "epoch": 0.5434001926951038, "grad_norm": 0.7222079946847711, "learning_rate": 2.536739659367397e-06, "loss": 0.593, "step": 18612 }, { "epoch": 0.5434293889229511, "grad_norm": 0.6765132648634317, "learning_rate": 2.536577453365775e-06, "loss": 0.5608, "step": 18613 }, { "epoch": 0.5434585851507985, "grad_norm": 0.7105585909412994, "learning_rate": 2.536415247364153e-06, "loss": 0.6471, "step": 18614 }, { "epoch": 0.5434877813786458, "grad_norm": 0.6880764776332062, "learning_rate": 2.536253041362531e-06, "loss": 0.5669, "step": 18615 }, { "epoch": 0.5435169776064932, "grad_norm": 0.6992269259359765, "learning_rate": 2.5360908353609085e-06, "loss": 0.5753, "step": 18616 }, { "epoch": 0.5435461738343406, "grad_norm": 0.6880983924072697, "learning_rate": 2.5359286293592865e-06, "loss": 0.5872, "step": 18617 }, { "epoch": 0.5435753700621879, "grad_norm": 0.6938990742463207, "learning_rate": 2.5357664233576645e-06, "loss": 0.6586, "step": 18618 }, { "epoch": 0.5436045662900353, "grad_norm": 0.8051601582078016, "learning_rate": 2.5356042173560425e-06, "loss": 0.691, "step": 18619 }, { "epoch": 0.5436337625178826, "grad_norm": 0.6897691480776649, "learning_rate": 2.5354420113544205e-06, "loss": 0.5756, "step": 18620 }, { "epoch": 0.54366295874573, "grad_norm": 0.7437394272849946, "learning_rate": 2.535279805352798e-06, "loss": 0.7224, "step": 18621 }, { "epoch": 0.5436921549735774, "grad_norm": 0.7149755139763945, "learning_rate": 2.535117599351176e-06, "loss": 0.611, "step": 18622 }, { "epoch": 0.5437213512014247, "grad_norm": 0.7763863874739424, "learning_rate": 2.534955393349554e-06, "loss": 0.674, "step": 18623 }, { "epoch": 0.5437505474292721, "grad_norm": 0.7330115140866148, "learning_rate": 2.534793187347932e-06, "loss": 0.6144, "step": 18624 }, { "epoch": 0.5437797436571195, "grad_norm": 0.7577193807818425, "learning_rate": 2.5346309813463097e-06, "loss": 0.7219, "step": 18625 }, { "epoch": 0.5438089398849668, "grad_norm": 0.7201659166329041, "learning_rate": 2.5344687753446877e-06, "loss": 0.634, "step": 18626 }, { "epoch": 0.5438381361128142, "grad_norm": 0.7026047996122367, "learning_rate": 2.5343065693430657e-06, "loss": 0.607, "step": 18627 }, { "epoch": 0.5438673323406615, "grad_norm": 0.6917197953523714, "learning_rate": 2.5341443633414437e-06, "loss": 0.5905, "step": 18628 }, { "epoch": 0.5438965285685089, "grad_norm": 0.7173404507203857, "learning_rate": 2.5339821573398218e-06, "loss": 0.6883, "step": 18629 }, { "epoch": 0.5439257247963563, "grad_norm": 0.7041017976740097, "learning_rate": 2.5338199513381993e-06, "loss": 0.6175, "step": 18630 }, { "epoch": 0.5439549210242036, "grad_norm": 0.7279096287377302, "learning_rate": 2.5336577453365778e-06, "loss": 0.6331, "step": 18631 }, { "epoch": 0.543984117252051, "grad_norm": 0.7634518001061447, "learning_rate": 2.5334955393349558e-06, "loss": 0.734, "step": 18632 }, { "epoch": 0.5440133134798983, "grad_norm": 0.810459982767442, "learning_rate": 2.5333333333333338e-06, "loss": 0.7078, "step": 18633 }, { "epoch": 0.5440425097077458, "grad_norm": 0.749065958937192, "learning_rate": 2.533171127331712e-06, "loss": 0.6105, "step": 18634 }, { "epoch": 0.5440717059355932, "grad_norm": 0.7400887026519509, "learning_rate": 2.5330089213300894e-06, "loss": 0.6593, "step": 18635 }, { "epoch": 0.5441009021634405, "grad_norm": 0.7711321783451229, "learning_rate": 2.5328467153284674e-06, "loss": 0.7562, "step": 18636 }, { "epoch": 0.5441300983912879, "grad_norm": 0.7022285640013973, "learning_rate": 2.5326845093268454e-06, "loss": 0.5842, "step": 18637 }, { "epoch": 0.5441592946191353, "grad_norm": 0.7591873501063178, "learning_rate": 2.5325223033252234e-06, "loss": 0.7144, "step": 18638 }, { "epoch": 0.5441884908469826, "grad_norm": 0.7906143046556028, "learning_rate": 2.5323600973236014e-06, "loss": 0.6002, "step": 18639 }, { "epoch": 0.54421768707483, "grad_norm": 0.7643135687690976, "learning_rate": 2.532197891321979e-06, "loss": 0.7258, "step": 18640 }, { "epoch": 0.5442468833026773, "grad_norm": 0.8063416600083472, "learning_rate": 2.532035685320357e-06, "loss": 0.7088, "step": 18641 }, { "epoch": 0.5442760795305247, "grad_norm": 0.7348984248518866, "learning_rate": 2.531873479318735e-06, "loss": 0.6636, "step": 18642 }, { "epoch": 0.5443052757583721, "grad_norm": 0.7364946010280276, "learning_rate": 2.531711273317113e-06, "loss": 0.6367, "step": 18643 }, { "epoch": 0.5443344719862194, "grad_norm": 0.6942368497835669, "learning_rate": 2.5315490673154906e-06, "loss": 0.5878, "step": 18644 }, { "epoch": 0.5443636682140668, "grad_norm": 0.7713411776968204, "learning_rate": 2.5313868613138686e-06, "loss": 0.6601, "step": 18645 }, { "epoch": 0.5443928644419141, "grad_norm": 0.6991442382345906, "learning_rate": 2.5312246553122466e-06, "loss": 0.6129, "step": 18646 }, { "epoch": 0.5444220606697615, "grad_norm": 0.7772252380115808, "learning_rate": 2.5310624493106246e-06, "loss": 0.7183, "step": 18647 }, { "epoch": 0.5444512568976089, "grad_norm": 0.723212570932787, "learning_rate": 2.5309002433090026e-06, "loss": 0.7269, "step": 18648 }, { "epoch": 0.5444804531254562, "grad_norm": 0.7669893171567653, "learning_rate": 2.53073803730738e-06, "loss": 0.642, "step": 18649 }, { "epoch": 0.5445096493533036, "grad_norm": 0.7178220199914864, "learning_rate": 2.5305758313057586e-06, "loss": 0.599, "step": 18650 }, { "epoch": 0.544538845581151, "grad_norm": 0.6905942643029678, "learning_rate": 2.5304136253041366e-06, "loss": 0.5947, "step": 18651 }, { "epoch": 0.5445680418089983, "grad_norm": 0.693185752551731, "learning_rate": 2.5302514193025146e-06, "loss": 0.621, "step": 18652 }, { "epoch": 0.5445972380368457, "grad_norm": 0.7143110915909028, "learning_rate": 2.5300892133008926e-06, "loss": 0.5686, "step": 18653 }, { "epoch": 0.544626434264693, "grad_norm": 0.7064153212141587, "learning_rate": 2.5299270072992702e-06, "loss": 0.6342, "step": 18654 }, { "epoch": 0.5446556304925404, "grad_norm": 0.7381595061746193, "learning_rate": 2.5297648012976482e-06, "loss": 0.64, "step": 18655 }, { "epoch": 0.5446848267203878, "grad_norm": 0.710561291552736, "learning_rate": 2.5296025952960262e-06, "loss": 0.6093, "step": 18656 }, { "epoch": 0.5447140229482351, "grad_norm": 0.6875721261238588, "learning_rate": 2.5294403892944043e-06, "loss": 0.6049, "step": 18657 }, { "epoch": 0.5447432191760825, "grad_norm": 0.7390051246351773, "learning_rate": 2.5292781832927823e-06, "loss": 0.6335, "step": 18658 }, { "epoch": 0.5447724154039298, "grad_norm": 0.7386351566521079, "learning_rate": 2.52911597729116e-06, "loss": 0.6841, "step": 18659 }, { "epoch": 0.5448016116317772, "grad_norm": 0.7193076093141471, "learning_rate": 2.528953771289538e-06, "loss": 0.6208, "step": 18660 }, { "epoch": 0.5448308078596246, "grad_norm": 0.7173881108590944, "learning_rate": 2.528791565287916e-06, "loss": 0.6452, "step": 18661 }, { "epoch": 0.5448600040874719, "grad_norm": 0.6462484872264405, "learning_rate": 2.528629359286294e-06, "loss": 0.522, "step": 18662 }, { "epoch": 0.5448892003153193, "grad_norm": 0.6910639108475127, "learning_rate": 2.5284671532846714e-06, "loss": 0.5818, "step": 18663 }, { "epoch": 0.5449183965431666, "grad_norm": 0.7141235559892251, "learning_rate": 2.5283049472830495e-06, "loss": 0.6655, "step": 18664 }, { "epoch": 0.544947592771014, "grad_norm": 0.7424189932708531, "learning_rate": 2.5281427412814275e-06, "loss": 0.7288, "step": 18665 }, { "epoch": 0.5449767889988614, "grad_norm": 0.6839473969866295, "learning_rate": 2.5279805352798055e-06, "loss": 0.6047, "step": 18666 }, { "epoch": 0.5450059852267087, "grad_norm": 0.7263319468980152, "learning_rate": 2.5278183292781835e-06, "loss": 0.6767, "step": 18667 }, { "epoch": 0.5450351814545561, "grad_norm": 0.7318107910866977, "learning_rate": 2.527656123276561e-06, "loss": 0.701, "step": 18668 }, { "epoch": 0.5450643776824035, "grad_norm": 0.6962345764352541, "learning_rate": 2.5274939172749395e-06, "loss": 0.6198, "step": 18669 }, { "epoch": 0.5450935739102508, "grad_norm": 0.7468623911210229, "learning_rate": 2.5273317112733175e-06, "loss": 0.7099, "step": 18670 }, { "epoch": 0.5451227701380982, "grad_norm": 0.6849024167308758, "learning_rate": 2.5271695052716955e-06, "loss": 0.6125, "step": 18671 }, { "epoch": 0.5451519663659455, "grad_norm": 0.6778997629732615, "learning_rate": 2.5270072992700735e-06, "loss": 0.5619, "step": 18672 }, { "epoch": 0.5451811625937929, "grad_norm": 0.8502524320850171, "learning_rate": 2.526845093268451e-06, "loss": 0.6401, "step": 18673 }, { "epoch": 0.5452103588216403, "grad_norm": 0.6876206440931731, "learning_rate": 2.526682887266829e-06, "loss": 0.5863, "step": 18674 }, { "epoch": 0.5452395550494876, "grad_norm": 0.7589171109709936, "learning_rate": 2.526520681265207e-06, "loss": 0.5545, "step": 18675 }, { "epoch": 0.545268751277335, "grad_norm": 0.7519932732308994, "learning_rate": 2.526358475263585e-06, "loss": 0.6579, "step": 18676 }, { "epoch": 0.5452979475051823, "grad_norm": 0.7093517272634143, "learning_rate": 2.526196269261963e-06, "loss": 0.6357, "step": 18677 }, { "epoch": 0.5453271437330297, "grad_norm": 0.6842391358596387, "learning_rate": 2.5260340632603407e-06, "loss": 0.5986, "step": 18678 }, { "epoch": 0.5453563399608771, "grad_norm": 0.8273843990283019, "learning_rate": 2.5258718572587187e-06, "loss": 0.6851, "step": 18679 }, { "epoch": 0.5453855361887244, "grad_norm": 0.7433378965207155, "learning_rate": 2.5257096512570967e-06, "loss": 0.6502, "step": 18680 }, { "epoch": 0.5454147324165718, "grad_norm": 0.754085893397633, "learning_rate": 2.5255474452554747e-06, "loss": 0.752, "step": 18681 }, { "epoch": 0.5454439286444192, "grad_norm": 0.8719944320595601, "learning_rate": 2.5253852392538523e-06, "loss": 0.7184, "step": 18682 }, { "epoch": 0.5454731248722665, "grad_norm": 0.7710603054689426, "learning_rate": 2.5252230332522303e-06, "loss": 0.6934, "step": 18683 }, { "epoch": 0.5455023211001139, "grad_norm": 0.7096305355443786, "learning_rate": 2.5250608272506083e-06, "loss": 0.6704, "step": 18684 }, { "epoch": 0.5455315173279612, "grad_norm": 0.6908044608053294, "learning_rate": 2.5248986212489863e-06, "loss": 0.6099, "step": 18685 }, { "epoch": 0.5455607135558086, "grad_norm": 0.734507845920452, "learning_rate": 2.5247364152473643e-06, "loss": 0.6501, "step": 18686 }, { "epoch": 0.545589909783656, "grad_norm": 0.7331519045196593, "learning_rate": 2.524574209245742e-06, "loss": 0.7089, "step": 18687 }, { "epoch": 0.5456191060115033, "grad_norm": 0.7650191852264189, "learning_rate": 2.5244120032441203e-06, "loss": 0.7024, "step": 18688 }, { "epoch": 0.5456483022393507, "grad_norm": 0.7358029313674563, "learning_rate": 2.5242497972424984e-06, "loss": 0.6461, "step": 18689 }, { "epoch": 0.545677498467198, "grad_norm": 0.7398985046066545, "learning_rate": 2.5240875912408764e-06, "loss": 0.6504, "step": 18690 }, { "epoch": 0.5457066946950454, "grad_norm": 0.7225658513289926, "learning_rate": 2.5239253852392544e-06, "loss": 0.6397, "step": 18691 }, { "epoch": 0.5457358909228928, "grad_norm": 0.7202285720074714, "learning_rate": 2.523763179237632e-06, "loss": 0.6348, "step": 18692 }, { "epoch": 0.5457650871507401, "grad_norm": 0.7238679533321092, "learning_rate": 2.52360097323601e-06, "loss": 0.693, "step": 18693 }, { "epoch": 0.5457942833785875, "grad_norm": 0.7948567096136653, "learning_rate": 2.523438767234388e-06, "loss": 0.6494, "step": 18694 }, { "epoch": 0.5458234796064348, "grad_norm": 0.6983314053876529, "learning_rate": 2.523276561232766e-06, "loss": 0.6215, "step": 18695 }, { "epoch": 0.5458526758342822, "grad_norm": 0.7388401964824073, "learning_rate": 2.523114355231144e-06, "loss": 0.6126, "step": 18696 }, { "epoch": 0.5458818720621296, "grad_norm": 0.7224749802482163, "learning_rate": 2.5229521492295216e-06, "loss": 0.6292, "step": 18697 }, { "epoch": 0.5459110682899769, "grad_norm": 0.7223582087977495, "learning_rate": 2.5227899432278996e-06, "loss": 0.677, "step": 18698 }, { "epoch": 0.5459402645178243, "grad_norm": 0.6809922831059321, "learning_rate": 2.5226277372262776e-06, "loss": 0.6174, "step": 18699 }, { "epoch": 0.5459694607456717, "grad_norm": 0.7070413116276981, "learning_rate": 2.5224655312246556e-06, "loss": 0.6397, "step": 18700 }, { "epoch": 0.545998656973519, "grad_norm": 0.7097425667753059, "learning_rate": 2.522303325223033e-06, "loss": 0.652, "step": 18701 }, { "epoch": 0.5460278532013664, "grad_norm": 0.7932324256758634, "learning_rate": 2.522141119221411e-06, "loss": 0.75, "step": 18702 }, { "epoch": 0.5460570494292137, "grad_norm": 0.729750486321297, "learning_rate": 2.521978913219789e-06, "loss": 0.6758, "step": 18703 }, { "epoch": 0.5460862456570611, "grad_norm": 0.713381843607947, "learning_rate": 2.521816707218167e-06, "loss": 0.5505, "step": 18704 }, { "epoch": 0.5461154418849085, "grad_norm": 0.7209562841393405, "learning_rate": 2.521654501216545e-06, "loss": 0.5973, "step": 18705 }, { "epoch": 0.5461446381127558, "grad_norm": 0.7248652991104344, "learning_rate": 2.5214922952149228e-06, "loss": 0.6424, "step": 18706 }, { "epoch": 0.5461738343406032, "grad_norm": 0.740479474658323, "learning_rate": 2.521330089213301e-06, "loss": 0.6802, "step": 18707 }, { "epoch": 0.5462030305684505, "grad_norm": 0.7160291501842327, "learning_rate": 2.521167883211679e-06, "loss": 0.5974, "step": 18708 }, { "epoch": 0.5462322267962979, "grad_norm": 0.7404174308225345, "learning_rate": 2.5210056772100572e-06, "loss": 0.6395, "step": 18709 }, { "epoch": 0.5462614230241453, "grad_norm": 0.7357027394702202, "learning_rate": 2.5208434712084352e-06, "loss": 0.6203, "step": 18710 }, { "epoch": 0.5462906192519926, "grad_norm": 0.7270325269633056, "learning_rate": 2.520681265206813e-06, "loss": 0.6695, "step": 18711 }, { "epoch": 0.54631981547984, "grad_norm": 0.7653268515977003, "learning_rate": 2.520519059205191e-06, "loss": 0.7183, "step": 18712 }, { "epoch": 0.5463490117076873, "grad_norm": 0.8135921934803743, "learning_rate": 2.520356853203569e-06, "loss": 0.8275, "step": 18713 }, { "epoch": 0.5463782079355347, "grad_norm": 0.7622740713887047, "learning_rate": 2.520194647201947e-06, "loss": 0.6438, "step": 18714 }, { "epoch": 0.5464074041633821, "grad_norm": 0.691340876380413, "learning_rate": 2.520032441200325e-06, "loss": 0.5692, "step": 18715 }, { "epoch": 0.5464366003912294, "grad_norm": 0.7180592379059635, "learning_rate": 2.5198702351987024e-06, "loss": 0.6399, "step": 18716 }, { "epoch": 0.5464657966190768, "grad_norm": 0.6821712832434971, "learning_rate": 2.5197080291970804e-06, "loss": 0.5894, "step": 18717 }, { "epoch": 0.5464949928469242, "grad_norm": 0.7060925617664644, "learning_rate": 2.5195458231954584e-06, "loss": 0.6365, "step": 18718 }, { "epoch": 0.5465241890747715, "grad_norm": 0.7200872072475428, "learning_rate": 2.5193836171938364e-06, "loss": 0.6297, "step": 18719 }, { "epoch": 0.5465533853026189, "grad_norm": 0.7826734294341006, "learning_rate": 2.519221411192214e-06, "loss": 0.7359, "step": 18720 }, { "epoch": 0.5465825815304662, "grad_norm": 0.7687362820040117, "learning_rate": 2.519059205190592e-06, "loss": 0.7428, "step": 18721 }, { "epoch": 0.5466117777583136, "grad_norm": 0.770963818351553, "learning_rate": 2.51889699918897e-06, "loss": 0.7266, "step": 18722 }, { "epoch": 0.546640973986161, "grad_norm": 0.7067872369659766, "learning_rate": 2.518734793187348e-06, "loss": 0.6177, "step": 18723 }, { "epoch": 0.5466701702140083, "grad_norm": 0.7080312615508102, "learning_rate": 2.5185725871857256e-06, "loss": 0.6102, "step": 18724 }, { "epoch": 0.5466993664418557, "grad_norm": 0.7345162344089678, "learning_rate": 2.5184103811841036e-06, "loss": 0.6831, "step": 18725 }, { "epoch": 0.546728562669703, "grad_norm": 0.7174063896235077, "learning_rate": 2.518248175182482e-06, "loss": 0.6343, "step": 18726 }, { "epoch": 0.5467577588975504, "grad_norm": 0.7379387911635523, "learning_rate": 2.51808596918086e-06, "loss": 0.7248, "step": 18727 }, { "epoch": 0.5467869551253978, "grad_norm": 0.7591622658025572, "learning_rate": 2.517923763179238e-06, "loss": 0.7234, "step": 18728 }, { "epoch": 0.5468161513532451, "grad_norm": 0.7085082287112071, "learning_rate": 2.517761557177616e-06, "loss": 0.6174, "step": 18729 }, { "epoch": 0.5468453475810925, "grad_norm": 0.7404692855486482, "learning_rate": 2.5175993511759937e-06, "loss": 0.6775, "step": 18730 }, { "epoch": 0.5468745438089399, "grad_norm": 0.7015992779264418, "learning_rate": 2.5174371451743717e-06, "loss": 0.5875, "step": 18731 }, { "epoch": 0.5469037400367872, "grad_norm": 0.718884551123253, "learning_rate": 2.5172749391727497e-06, "loss": 0.5992, "step": 18732 }, { "epoch": 0.5469329362646346, "grad_norm": 0.6988204261927133, "learning_rate": 2.5171127331711277e-06, "loss": 0.6511, "step": 18733 }, { "epoch": 0.5469621324924819, "grad_norm": 0.8025131875217854, "learning_rate": 2.5169505271695057e-06, "loss": 0.7107, "step": 18734 }, { "epoch": 0.5469913287203293, "grad_norm": 0.7474174828930011, "learning_rate": 2.5167883211678833e-06, "loss": 0.6677, "step": 18735 }, { "epoch": 0.5470205249481767, "grad_norm": 0.7338826242013625, "learning_rate": 2.5166261151662613e-06, "loss": 0.6757, "step": 18736 }, { "epoch": 0.547049721176024, "grad_norm": 0.7067029273849627, "learning_rate": 2.5164639091646393e-06, "loss": 0.6459, "step": 18737 }, { "epoch": 0.5470789174038714, "grad_norm": 0.8023610855151699, "learning_rate": 2.5163017031630173e-06, "loss": 0.6948, "step": 18738 }, { "epoch": 0.5471081136317187, "grad_norm": 0.7531907430630689, "learning_rate": 2.516139497161395e-06, "loss": 0.6524, "step": 18739 }, { "epoch": 0.5471373098595661, "grad_norm": 0.6989851767876136, "learning_rate": 2.515977291159773e-06, "loss": 0.6235, "step": 18740 }, { "epoch": 0.5471665060874135, "grad_norm": 0.7209957142513892, "learning_rate": 2.515815085158151e-06, "loss": 0.6705, "step": 18741 }, { "epoch": 0.5471957023152608, "grad_norm": 0.74486545536193, "learning_rate": 2.515652879156529e-06, "loss": 0.6747, "step": 18742 }, { "epoch": 0.5472248985431082, "grad_norm": 1.121733267224386, "learning_rate": 2.5154906731549065e-06, "loss": 0.627, "step": 18743 }, { "epoch": 0.5472540947709555, "grad_norm": 0.7575253067023736, "learning_rate": 2.5153284671532845e-06, "loss": 0.6963, "step": 18744 }, { "epoch": 0.5472832909988029, "grad_norm": 0.9295538279626941, "learning_rate": 2.515166261151663e-06, "loss": 0.7036, "step": 18745 }, { "epoch": 0.5473124872266503, "grad_norm": 0.7683614735829647, "learning_rate": 2.515004055150041e-06, "loss": 0.6774, "step": 18746 }, { "epoch": 0.5473416834544976, "grad_norm": 0.6855410284633813, "learning_rate": 2.514841849148419e-06, "loss": 0.5843, "step": 18747 }, { "epoch": 0.547370879682345, "grad_norm": 0.7272374880081905, "learning_rate": 2.514679643146797e-06, "loss": 0.6322, "step": 18748 }, { "epoch": 0.5474000759101924, "grad_norm": 0.7121077210142638, "learning_rate": 2.5145174371451745e-06, "loss": 0.6429, "step": 18749 }, { "epoch": 0.5474292721380397, "grad_norm": 0.7818440133166303, "learning_rate": 2.5143552311435525e-06, "loss": 0.6703, "step": 18750 }, { "epoch": 0.5474584683658871, "grad_norm": 0.7301324663705462, "learning_rate": 2.5141930251419305e-06, "loss": 0.632, "step": 18751 }, { "epoch": 0.5474876645937344, "grad_norm": 0.7291543024898681, "learning_rate": 2.5140308191403085e-06, "loss": 0.7017, "step": 18752 }, { "epoch": 0.5475168608215818, "grad_norm": 0.8054044702796798, "learning_rate": 2.5138686131386866e-06, "loss": 0.7603, "step": 18753 }, { "epoch": 0.5475460570494293, "grad_norm": 0.8018826569764056, "learning_rate": 2.513706407137064e-06, "loss": 0.7238, "step": 18754 }, { "epoch": 0.5475752532772766, "grad_norm": 0.6850231028759741, "learning_rate": 2.513544201135442e-06, "loss": 0.5852, "step": 18755 }, { "epoch": 0.547604449505124, "grad_norm": 0.6708305730456652, "learning_rate": 2.51338199513382e-06, "loss": 0.5754, "step": 18756 }, { "epoch": 0.5476336457329714, "grad_norm": 0.7418027825027247, "learning_rate": 2.513219789132198e-06, "loss": 0.6473, "step": 18757 }, { "epoch": 0.5476628419608187, "grad_norm": 0.7390819988666821, "learning_rate": 2.5130575831305757e-06, "loss": 0.6209, "step": 18758 }, { "epoch": 0.5476920381886661, "grad_norm": 0.7867757780822678, "learning_rate": 2.5128953771289537e-06, "loss": 0.7349, "step": 18759 }, { "epoch": 0.5477212344165134, "grad_norm": 0.7231842964584595, "learning_rate": 2.5127331711273318e-06, "loss": 0.6565, "step": 18760 }, { "epoch": 0.5477504306443608, "grad_norm": 0.7184320056364594, "learning_rate": 2.5125709651257098e-06, "loss": 0.6407, "step": 18761 }, { "epoch": 0.5477796268722082, "grad_norm": 0.7957650712720892, "learning_rate": 2.5124087591240873e-06, "loss": 0.7587, "step": 18762 }, { "epoch": 0.5478088231000555, "grad_norm": 0.6818075990294941, "learning_rate": 2.512246553122466e-06, "loss": 0.5628, "step": 18763 }, { "epoch": 0.5478380193279029, "grad_norm": 0.6907534434071725, "learning_rate": 2.5120843471208438e-06, "loss": 0.5923, "step": 18764 }, { "epoch": 0.5478672155557502, "grad_norm": 0.7541621882554573, "learning_rate": 2.5119221411192218e-06, "loss": 0.6674, "step": 18765 }, { "epoch": 0.5478964117835976, "grad_norm": 0.7524230400539058, "learning_rate": 2.5117599351176e-06, "loss": 0.6641, "step": 18766 }, { "epoch": 0.547925608011445, "grad_norm": 0.6757388655611418, "learning_rate": 2.511597729115978e-06, "loss": 0.5773, "step": 18767 }, { "epoch": 0.5479548042392923, "grad_norm": 0.7562311218949542, "learning_rate": 2.5114355231143554e-06, "loss": 0.6676, "step": 18768 }, { "epoch": 0.5479840004671397, "grad_norm": 0.7833642335680181, "learning_rate": 2.5112733171127334e-06, "loss": 0.7242, "step": 18769 }, { "epoch": 0.548013196694987, "grad_norm": 0.6616555002347485, "learning_rate": 2.5111111111111114e-06, "loss": 0.5829, "step": 18770 }, { "epoch": 0.5480423929228344, "grad_norm": 0.6854009067587908, "learning_rate": 2.5109489051094894e-06, "loss": 0.5556, "step": 18771 }, { "epoch": 0.5480715891506818, "grad_norm": 0.7753863146844024, "learning_rate": 2.5107866991078674e-06, "loss": 0.7344, "step": 18772 }, { "epoch": 0.5481007853785291, "grad_norm": 0.7434830081370954, "learning_rate": 2.510624493106245e-06, "loss": 0.6893, "step": 18773 }, { "epoch": 0.5481299816063765, "grad_norm": 0.739430185918039, "learning_rate": 2.510462287104623e-06, "loss": 0.6335, "step": 18774 }, { "epoch": 0.5481591778342239, "grad_norm": 0.9002911347201725, "learning_rate": 2.510300081103001e-06, "loss": 0.7021, "step": 18775 }, { "epoch": 0.5481883740620712, "grad_norm": 0.7721343094254163, "learning_rate": 2.510137875101379e-06, "loss": 0.7122, "step": 18776 }, { "epoch": 0.5482175702899186, "grad_norm": 0.7107224121985601, "learning_rate": 2.5099756690997566e-06, "loss": 0.6108, "step": 18777 }, { "epoch": 0.5482467665177659, "grad_norm": 0.7205902474692873, "learning_rate": 2.5098134630981346e-06, "loss": 0.6427, "step": 18778 }, { "epoch": 0.5482759627456133, "grad_norm": 0.766589605545357, "learning_rate": 2.5096512570965126e-06, "loss": 0.7338, "step": 18779 }, { "epoch": 0.5483051589734607, "grad_norm": 0.742447554539923, "learning_rate": 2.5094890510948906e-06, "loss": 0.661, "step": 18780 }, { "epoch": 0.548334355201308, "grad_norm": 0.6783621987887982, "learning_rate": 2.509326845093268e-06, "loss": 0.5755, "step": 18781 }, { "epoch": 0.5483635514291554, "grad_norm": 0.7429053887322508, "learning_rate": 2.509164639091647e-06, "loss": 0.7186, "step": 18782 }, { "epoch": 0.5483927476570027, "grad_norm": 0.7137960964598374, "learning_rate": 2.5090024330900246e-06, "loss": 0.646, "step": 18783 }, { "epoch": 0.5484219438848501, "grad_norm": 0.771347057050521, "learning_rate": 2.5088402270884026e-06, "loss": 0.7734, "step": 18784 }, { "epoch": 0.5484511401126975, "grad_norm": 0.7520762863379968, "learning_rate": 2.5086780210867807e-06, "loss": 0.6585, "step": 18785 }, { "epoch": 0.5484803363405448, "grad_norm": 0.7403897693899213, "learning_rate": 2.5085158150851587e-06, "loss": 0.6462, "step": 18786 }, { "epoch": 0.5485095325683922, "grad_norm": 0.7039378127278236, "learning_rate": 2.5083536090835362e-06, "loss": 0.6286, "step": 18787 }, { "epoch": 0.5485387287962395, "grad_norm": 0.7232799697015032, "learning_rate": 2.5081914030819142e-06, "loss": 0.6439, "step": 18788 }, { "epoch": 0.5485679250240869, "grad_norm": 0.7555551995977454, "learning_rate": 2.5080291970802923e-06, "loss": 0.6354, "step": 18789 }, { "epoch": 0.5485971212519343, "grad_norm": 0.7794140349719244, "learning_rate": 2.5078669910786703e-06, "loss": 0.6856, "step": 18790 }, { "epoch": 0.5486263174797816, "grad_norm": 0.7480180350716206, "learning_rate": 2.5077047850770483e-06, "loss": 0.7217, "step": 18791 }, { "epoch": 0.548655513707629, "grad_norm": 0.7551180037996819, "learning_rate": 2.507542579075426e-06, "loss": 0.6566, "step": 18792 }, { "epoch": 0.5486847099354764, "grad_norm": 0.6721803786004975, "learning_rate": 2.507380373073804e-06, "loss": 0.5419, "step": 18793 }, { "epoch": 0.5487139061633237, "grad_norm": 0.7234366749648107, "learning_rate": 2.507218167072182e-06, "loss": 0.6278, "step": 18794 }, { "epoch": 0.5487431023911711, "grad_norm": 0.7361870849173091, "learning_rate": 2.50705596107056e-06, "loss": 0.676, "step": 18795 }, { "epoch": 0.5487722986190184, "grad_norm": 0.7024050273891692, "learning_rate": 2.5068937550689375e-06, "loss": 0.5793, "step": 18796 }, { "epoch": 0.5488014948468658, "grad_norm": 0.7361895939675197, "learning_rate": 2.5067315490673155e-06, "loss": 0.6568, "step": 18797 }, { "epoch": 0.5488306910747132, "grad_norm": 0.9008393696970312, "learning_rate": 2.5065693430656935e-06, "loss": 0.6693, "step": 18798 }, { "epoch": 0.5488598873025605, "grad_norm": 0.6721845982799877, "learning_rate": 2.5064071370640715e-06, "loss": 0.5972, "step": 18799 }, { "epoch": 0.5488890835304079, "grad_norm": 0.7246754047760725, "learning_rate": 2.506244931062449e-06, "loss": 0.6641, "step": 18800 }, { "epoch": 0.5489182797582552, "grad_norm": 0.950837552329651, "learning_rate": 2.506082725060828e-06, "loss": 0.7197, "step": 18801 }, { "epoch": 0.5489474759861026, "grad_norm": 0.6988665762642913, "learning_rate": 2.5059205190592055e-06, "loss": 0.574, "step": 18802 }, { "epoch": 0.54897667221395, "grad_norm": 0.7779083298304166, "learning_rate": 2.5057583130575835e-06, "loss": 0.5507, "step": 18803 }, { "epoch": 0.5490058684417973, "grad_norm": 0.6908733754711583, "learning_rate": 2.5055961070559615e-06, "loss": 0.6163, "step": 18804 }, { "epoch": 0.5490350646696447, "grad_norm": 0.772606709906776, "learning_rate": 2.5054339010543395e-06, "loss": 0.7113, "step": 18805 }, { "epoch": 0.549064260897492, "grad_norm": 0.7271114391577949, "learning_rate": 2.505271695052717e-06, "loss": 0.6189, "step": 18806 }, { "epoch": 0.5490934571253394, "grad_norm": 0.7188558584221405, "learning_rate": 2.505109489051095e-06, "loss": 0.6246, "step": 18807 }, { "epoch": 0.5491226533531868, "grad_norm": 0.6963249556264738, "learning_rate": 2.504947283049473e-06, "loss": 0.6564, "step": 18808 }, { "epoch": 0.5491518495810341, "grad_norm": 0.7466460537876874, "learning_rate": 2.504785077047851e-06, "loss": 0.6667, "step": 18809 }, { "epoch": 0.5491810458088815, "grad_norm": 0.7408960853124094, "learning_rate": 2.504622871046229e-06, "loss": 0.6665, "step": 18810 }, { "epoch": 0.5492102420367289, "grad_norm": 0.7895042581576339, "learning_rate": 2.5044606650446067e-06, "loss": 0.7337, "step": 18811 }, { "epoch": 0.5492394382645762, "grad_norm": 0.724467617607926, "learning_rate": 2.5042984590429847e-06, "loss": 0.6103, "step": 18812 }, { "epoch": 0.5492686344924236, "grad_norm": 0.8770267813179188, "learning_rate": 2.5041362530413627e-06, "loss": 0.6852, "step": 18813 }, { "epoch": 0.5492978307202709, "grad_norm": 0.7622792846921442, "learning_rate": 2.5039740470397407e-06, "loss": 0.6862, "step": 18814 }, { "epoch": 0.5493270269481183, "grad_norm": 0.7757061035406934, "learning_rate": 2.5038118410381183e-06, "loss": 0.6018, "step": 18815 }, { "epoch": 0.5493562231759657, "grad_norm": 0.8128005983519095, "learning_rate": 2.5036496350364963e-06, "loss": 0.6456, "step": 18816 }, { "epoch": 0.549385419403813, "grad_norm": 0.6844331285328047, "learning_rate": 2.5034874290348743e-06, "loss": 0.5572, "step": 18817 }, { "epoch": 0.5494146156316604, "grad_norm": 0.7839858272212781, "learning_rate": 2.5033252230332523e-06, "loss": 0.7321, "step": 18818 }, { "epoch": 0.5494438118595077, "grad_norm": 0.7367645148652427, "learning_rate": 2.50316301703163e-06, "loss": 0.6708, "step": 18819 }, { "epoch": 0.5494730080873551, "grad_norm": 0.73139959240221, "learning_rate": 2.5030008110300088e-06, "loss": 0.7122, "step": 18820 }, { "epoch": 0.5495022043152025, "grad_norm": 0.7185200539787571, "learning_rate": 2.5028386050283864e-06, "loss": 0.6589, "step": 18821 }, { "epoch": 0.5495314005430498, "grad_norm": 0.7391458985125325, "learning_rate": 2.5026763990267644e-06, "loss": 0.631, "step": 18822 }, { "epoch": 0.5495605967708972, "grad_norm": 0.7437340263864013, "learning_rate": 2.5025141930251424e-06, "loss": 0.6255, "step": 18823 }, { "epoch": 0.5495897929987446, "grad_norm": 0.8008497343409358, "learning_rate": 2.5023519870235204e-06, "loss": 0.6731, "step": 18824 }, { "epoch": 0.5496189892265919, "grad_norm": 0.7708342449484652, "learning_rate": 2.502189781021898e-06, "loss": 0.6675, "step": 18825 }, { "epoch": 0.5496481854544393, "grad_norm": 0.7417467194540691, "learning_rate": 2.502027575020276e-06, "loss": 0.6571, "step": 18826 }, { "epoch": 0.5496773816822866, "grad_norm": 0.7367357833812977, "learning_rate": 2.501865369018654e-06, "loss": 0.7036, "step": 18827 }, { "epoch": 0.549706577910134, "grad_norm": 0.7027801466672614, "learning_rate": 2.501703163017032e-06, "loss": 0.6233, "step": 18828 }, { "epoch": 0.5497357741379814, "grad_norm": 0.7606685422435252, "learning_rate": 2.50154095701541e-06, "loss": 0.7074, "step": 18829 }, { "epoch": 0.5497649703658287, "grad_norm": 0.7523028299188927, "learning_rate": 2.5013787510137876e-06, "loss": 0.6296, "step": 18830 }, { "epoch": 0.5497941665936761, "grad_norm": 0.7127858500401791, "learning_rate": 2.5012165450121656e-06, "loss": 0.597, "step": 18831 }, { "epoch": 0.5498233628215234, "grad_norm": 0.6971182345204617, "learning_rate": 2.5010543390105436e-06, "loss": 0.6056, "step": 18832 }, { "epoch": 0.5498525590493708, "grad_norm": 0.7395792259923523, "learning_rate": 2.5008921330089216e-06, "loss": 0.6772, "step": 18833 }, { "epoch": 0.5498817552772182, "grad_norm": 0.7115611501283522, "learning_rate": 2.500729927007299e-06, "loss": 0.5721, "step": 18834 }, { "epoch": 0.5499109515050655, "grad_norm": 0.7220785987500586, "learning_rate": 2.500567721005677e-06, "loss": 0.6532, "step": 18835 }, { "epoch": 0.5499401477329129, "grad_norm": 0.7133122499553981, "learning_rate": 2.500405515004055e-06, "loss": 0.5983, "step": 18836 }, { "epoch": 0.5499693439607602, "grad_norm": 0.7777820938602603, "learning_rate": 2.500243309002433e-06, "loss": 0.6588, "step": 18837 }, { "epoch": 0.5499985401886076, "grad_norm": 0.7372942271037598, "learning_rate": 2.5000811030008108e-06, "loss": 0.6758, "step": 18838 }, { "epoch": 0.550027736416455, "grad_norm": 0.8782454760752226, "learning_rate": 2.499918896999189e-06, "loss": 0.6477, "step": 18839 }, { "epoch": 0.5500569326443023, "grad_norm": 0.7386127522394232, "learning_rate": 2.4997566909975672e-06, "loss": 0.6311, "step": 18840 }, { "epoch": 0.5500861288721497, "grad_norm": 0.748382487406478, "learning_rate": 2.499594484995945e-06, "loss": 0.6616, "step": 18841 }, { "epoch": 0.550115325099997, "grad_norm": 0.7222834788524115, "learning_rate": 2.499432278994323e-06, "loss": 0.6778, "step": 18842 }, { "epoch": 0.5501445213278444, "grad_norm": 0.7326944319883102, "learning_rate": 2.499270072992701e-06, "loss": 0.6859, "step": 18843 }, { "epoch": 0.5501737175556918, "grad_norm": 0.7301713887652863, "learning_rate": 2.499107866991079e-06, "loss": 0.6766, "step": 18844 }, { "epoch": 0.5502029137835391, "grad_norm": 0.7272474813657585, "learning_rate": 2.498945660989457e-06, "loss": 0.6172, "step": 18845 }, { "epoch": 0.5502321100113865, "grad_norm": 0.7142002377272266, "learning_rate": 2.498783454987835e-06, "loss": 0.6285, "step": 18846 }, { "epoch": 0.5502613062392339, "grad_norm": 0.7198678313110155, "learning_rate": 2.498621248986213e-06, "loss": 0.6257, "step": 18847 }, { "epoch": 0.5502905024670812, "grad_norm": 0.7601632408759138, "learning_rate": 2.4984590429845904e-06, "loss": 0.6421, "step": 18848 }, { "epoch": 0.5503196986949286, "grad_norm": 0.7506579538423317, "learning_rate": 2.4982968369829684e-06, "loss": 0.6474, "step": 18849 }, { "epoch": 0.5503488949227759, "grad_norm": 0.7160239027764204, "learning_rate": 2.4981346309813464e-06, "loss": 0.6094, "step": 18850 }, { "epoch": 0.5503780911506233, "grad_norm": 0.7788634439305541, "learning_rate": 2.4979724249797244e-06, "loss": 0.6961, "step": 18851 }, { "epoch": 0.5504072873784707, "grad_norm": 0.7593485870321188, "learning_rate": 2.4978102189781024e-06, "loss": 0.665, "step": 18852 }, { "epoch": 0.550436483606318, "grad_norm": 0.7654997416672732, "learning_rate": 2.4976480129764805e-06, "loss": 0.7205, "step": 18853 }, { "epoch": 0.5504656798341654, "grad_norm": 0.7086629447562216, "learning_rate": 2.4974858069748585e-06, "loss": 0.6514, "step": 18854 }, { "epoch": 0.5504948760620128, "grad_norm": 0.6912256163881384, "learning_rate": 2.4973236009732365e-06, "loss": 0.6295, "step": 18855 }, { "epoch": 0.5505240722898601, "grad_norm": 0.6992140181734177, "learning_rate": 2.497161394971614e-06, "loss": 0.6266, "step": 18856 }, { "epoch": 0.5505532685177075, "grad_norm": 0.7697919150903237, "learning_rate": 2.496999188969992e-06, "loss": 0.7093, "step": 18857 }, { "epoch": 0.5505824647455548, "grad_norm": 0.7261136760605132, "learning_rate": 2.49683698296837e-06, "loss": 0.6723, "step": 18858 }, { "epoch": 0.5506116609734022, "grad_norm": 0.7392499840551486, "learning_rate": 2.496674776966748e-06, "loss": 0.6336, "step": 18859 }, { "epoch": 0.5506408572012496, "grad_norm": 0.704472610953999, "learning_rate": 2.4965125709651257e-06, "loss": 0.6358, "step": 18860 }, { "epoch": 0.5506700534290969, "grad_norm": 0.6454371511104775, "learning_rate": 2.4963503649635037e-06, "loss": 0.5211, "step": 18861 }, { "epoch": 0.5506992496569443, "grad_norm": 0.8928255548054541, "learning_rate": 2.4961881589618817e-06, "loss": 0.7149, "step": 18862 }, { "epoch": 0.5507284458847916, "grad_norm": 0.7873624852163138, "learning_rate": 2.4960259529602597e-06, "loss": 0.7757, "step": 18863 }, { "epoch": 0.550757642112639, "grad_norm": 0.76605526069026, "learning_rate": 2.4958637469586377e-06, "loss": 0.7278, "step": 18864 }, { "epoch": 0.5507868383404864, "grad_norm": 0.7672349146305975, "learning_rate": 2.4957015409570157e-06, "loss": 0.6991, "step": 18865 }, { "epoch": 0.5508160345683337, "grad_norm": 0.7661594210258621, "learning_rate": 2.4955393349553937e-06, "loss": 0.6589, "step": 18866 }, { "epoch": 0.5508452307961811, "grad_norm": 0.7331682692903172, "learning_rate": 2.4953771289537713e-06, "loss": 0.6686, "step": 18867 }, { "epoch": 0.5508744270240284, "grad_norm": 0.7743236446343462, "learning_rate": 2.4952149229521493e-06, "loss": 0.6868, "step": 18868 }, { "epoch": 0.5509036232518758, "grad_norm": 0.7242673684307334, "learning_rate": 2.4950527169505273e-06, "loss": 0.669, "step": 18869 }, { "epoch": 0.5509328194797232, "grad_norm": 0.6759900270114877, "learning_rate": 2.4948905109489053e-06, "loss": 0.59, "step": 18870 }, { "epoch": 0.5509620157075705, "grad_norm": 0.6965877340476981, "learning_rate": 2.4947283049472833e-06, "loss": 0.56, "step": 18871 }, { "epoch": 0.5509912119354179, "grad_norm": 0.7079217333651284, "learning_rate": 2.4945660989456613e-06, "loss": 0.5602, "step": 18872 }, { "epoch": 0.5510204081632653, "grad_norm": 0.70593183283407, "learning_rate": 2.4944038929440393e-06, "loss": 0.6345, "step": 18873 }, { "epoch": 0.5510496043911126, "grad_norm": 0.7369641803599165, "learning_rate": 2.4942416869424173e-06, "loss": 0.6042, "step": 18874 }, { "epoch": 0.5510788006189601, "grad_norm": 0.6660769841408888, "learning_rate": 2.494079480940795e-06, "loss": 0.5708, "step": 18875 }, { "epoch": 0.5511079968468074, "grad_norm": 0.7366086928876934, "learning_rate": 2.493917274939173e-06, "loss": 0.6181, "step": 18876 }, { "epoch": 0.5511371930746548, "grad_norm": 0.700960274090751, "learning_rate": 2.493755068937551e-06, "loss": 0.6405, "step": 18877 }, { "epoch": 0.5511663893025022, "grad_norm": 0.8006548971096177, "learning_rate": 2.493592862935929e-06, "loss": 0.7554, "step": 18878 }, { "epoch": 0.5511955855303495, "grad_norm": 0.7775714945740602, "learning_rate": 2.4934306569343065e-06, "loss": 0.7309, "step": 18879 }, { "epoch": 0.5512247817581969, "grad_norm": 0.7028253644774461, "learning_rate": 2.4932684509326845e-06, "loss": 0.6235, "step": 18880 }, { "epoch": 0.5512539779860443, "grad_norm": 0.719668595488342, "learning_rate": 2.493106244931063e-06, "loss": 0.6747, "step": 18881 }, { "epoch": 0.5512831742138916, "grad_norm": 0.7413339278834531, "learning_rate": 2.4929440389294405e-06, "loss": 0.6681, "step": 18882 }, { "epoch": 0.551312370441739, "grad_norm": 0.7854972461029883, "learning_rate": 2.4927818329278185e-06, "loss": 0.7566, "step": 18883 }, { "epoch": 0.5513415666695863, "grad_norm": 0.8038670223805168, "learning_rate": 2.4926196269261965e-06, "loss": 0.7483, "step": 18884 }, { "epoch": 0.5513707628974337, "grad_norm": 0.7211355016987664, "learning_rate": 2.4924574209245746e-06, "loss": 0.6485, "step": 18885 }, { "epoch": 0.5513999591252811, "grad_norm": 0.7413474016386333, "learning_rate": 2.492295214922952e-06, "loss": 0.6958, "step": 18886 }, { "epoch": 0.5514291553531284, "grad_norm": 0.7090001995359393, "learning_rate": 2.49213300892133e-06, "loss": 0.6379, "step": 18887 }, { "epoch": 0.5514583515809758, "grad_norm": 0.702345533513001, "learning_rate": 2.491970802919708e-06, "loss": 0.6088, "step": 18888 }, { "epoch": 0.5514875478088231, "grad_norm": 0.6724534698100944, "learning_rate": 2.491808596918086e-06, "loss": 0.5676, "step": 18889 }, { "epoch": 0.5515167440366705, "grad_norm": 0.6575245167238198, "learning_rate": 2.491646390916464e-06, "loss": 0.5527, "step": 18890 }, { "epoch": 0.5515459402645179, "grad_norm": 0.7497447959904562, "learning_rate": 2.491484184914842e-06, "loss": 0.6825, "step": 18891 }, { "epoch": 0.5515751364923652, "grad_norm": 0.749455561674058, "learning_rate": 2.49132197891322e-06, "loss": 0.6049, "step": 18892 }, { "epoch": 0.5516043327202126, "grad_norm": 0.7253237515182699, "learning_rate": 2.491159772911598e-06, "loss": 0.6791, "step": 18893 }, { "epoch": 0.55163352894806, "grad_norm": 0.6884891902511465, "learning_rate": 2.4909975669099758e-06, "loss": 0.565, "step": 18894 }, { "epoch": 0.5516627251759073, "grad_norm": 0.7013705493892346, "learning_rate": 2.4908353609083538e-06, "loss": 0.6072, "step": 18895 }, { "epoch": 0.5516919214037547, "grad_norm": 0.7572569442921372, "learning_rate": 2.4906731549067318e-06, "loss": 0.7091, "step": 18896 }, { "epoch": 0.551721117631602, "grad_norm": 0.7157053625031494, "learning_rate": 2.49051094890511e-06, "loss": 0.6369, "step": 18897 }, { "epoch": 0.5517503138594494, "grad_norm": 0.7227632453812033, "learning_rate": 2.4903487429034874e-06, "loss": 0.6595, "step": 18898 }, { "epoch": 0.5517795100872968, "grad_norm": 0.8200747875785143, "learning_rate": 2.4901865369018654e-06, "loss": 0.6993, "step": 18899 }, { "epoch": 0.5518087063151441, "grad_norm": 0.6843321159892303, "learning_rate": 2.490024330900244e-06, "loss": 0.6051, "step": 18900 }, { "epoch": 0.5518379025429915, "grad_norm": 0.7406732494347543, "learning_rate": 2.4898621248986214e-06, "loss": 0.6901, "step": 18901 }, { "epoch": 0.5518670987708388, "grad_norm": 0.6897652953888468, "learning_rate": 2.4896999188969994e-06, "loss": 0.5664, "step": 18902 }, { "epoch": 0.5518962949986862, "grad_norm": 0.7247506415234981, "learning_rate": 2.4895377128953774e-06, "loss": 0.647, "step": 18903 }, { "epoch": 0.5519254912265336, "grad_norm": 0.7761850557437259, "learning_rate": 2.4893755068937554e-06, "loss": 0.7185, "step": 18904 }, { "epoch": 0.5519546874543809, "grad_norm": 0.7167052368238228, "learning_rate": 2.489213300892133e-06, "loss": 0.6352, "step": 18905 }, { "epoch": 0.5519838836822283, "grad_norm": 0.7580199169975613, "learning_rate": 2.489051094890511e-06, "loss": 0.6871, "step": 18906 }, { "epoch": 0.5520130799100756, "grad_norm": 0.7147994184621199, "learning_rate": 2.488888888888889e-06, "loss": 0.6535, "step": 18907 }, { "epoch": 0.552042276137923, "grad_norm": 0.725645368007636, "learning_rate": 2.488726682887267e-06, "loss": 0.6197, "step": 18908 }, { "epoch": 0.5520714723657704, "grad_norm": 0.6994270547528865, "learning_rate": 2.488564476885645e-06, "loss": 0.5872, "step": 18909 }, { "epoch": 0.5521006685936177, "grad_norm": 0.7628789101555437, "learning_rate": 2.488402270884023e-06, "loss": 0.606, "step": 18910 }, { "epoch": 0.5521298648214651, "grad_norm": 0.7184335650628827, "learning_rate": 2.488240064882401e-06, "loss": 0.6119, "step": 18911 }, { "epoch": 0.5521590610493124, "grad_norm": 0.7804914382164998, "learning_rate": 2.488077858880779e-06, "loss": 0.5722, "step": 18912 }, { "epoch": 0.5521882572771598, "grad_norm": 0.6576923000731582, "learning_rate": 2.4879156528791566e-06, "loss": 0.5412, "step": 18913 }, { "epoch": 0.5522174535050072, "grad_norm": 0.7708531911936867, "learning_rate": 2.4877534468775346e-06, "loss": 0.638, "step": 18914 }, { "epoch": 0.5522466497328545, "grad_norm": 0.738989086023005, "learning_rate": 2.4875912408759126e-06, "loss": 0.6048, "step": 18915 }, { "epoch": 0.5522758459607019, "grad_norm": 0.7728123176306523, "learning_rate": 2.4874290348742906e-06, "loss": 0.6814, "step": 18916 }, { "epoch": 0.5523050421885493, "grad_norm": 0.673524468076718, "learning_rate": 2.4872668288726682e-06, "loss": 0.6042, "step": 18917 }, { "epoch": 0.5523342384163966, "grad_norm": 0.7746058900936804, "learning_rate": 2.4871046228710462e-06, "loss": 0.7388, "step": 18918 }, { "epoch": 0.552363434644244, "grad_norm": 0.7192412541075608, "learning_rate": 2.4869424168694247e-06, "loss": 0.6165, "step": 18919 }, { "epoch": 0.5523926308720913, "grad_norm": 0.7516957294309613, "learning_rate": 2.4867802108678023e-06, "loss": 0.6381, "step": 18920 }, { "epoch": 0.5524218270999387, "grad_norm": 0.7189042458460645, "learning_rate": 2.4866180048661803e-06, "loss": 0.6288, "step": 18921 }, { "epoch": 0.5524510233277861, "grad_norm": 0.7080259987423769, "learning_rate": 2.4864557988645583e-06, "loss": 0.6595, "step": 18922 }, { "epoch": 0.5524802195556334, "grad_norm": 0.8005855117517819, "learning_rate": 2.4862935928629363e-06, "loss": 0.6909, "step": 18923 }, { "epoch": 0.5525094157834808, "grad_norm": 0.6983170142728489, "learning_rate": 2.486131386861314e-06, "loss": 0.5951, "step": 18924 }, { "epoch": 0.5525386120113281, "grad_norm": 0.6923404150027631, "learning_rate": 2.485969180859692e-06, "loss": 0.6292, "step": 18925 }, { "epoch": 0.5525678082391755, "grad_norm": 0.7084194792636922, "learning_rate": 2.48580697485807e-06, "loss": 0.6242, "step": 18926 }, { "epoch": 0.5525970044670229, "grad_norm": 0.7880935867535636, "learning_rate": 2.485644768856448e-06, "loss": 0.6823, "step": 18927 }, { "epoch": 0.5526262006948702, "grad_norm": 0.6971265368359613, "learning_rate": 2.485482562854826e-06, "loss": 0.5912, "step": 18928 }, { "epoch": 0.5526553969227176, "grad_norm": 0.7318177426693985, "learning_rate": 2.485320356853204e-06, "loss": 0.6386, "step": 18929 }, { "epoch": 0.552684593150565, "grad_norm": 0.7413953757748618, "learning_rate": 2.485158150851582e-06, "loss": 0.7151, "step": 18930 }, { "epoch": 0.5527137893784123, "grad_norm": 0.6652785490375708, "learning_rate": 2.48499594484996e-06, "loss": 0.5556, "step": 18931 }, { "epoch": 0.5527429856062597, "grad_norm": 0.767905431475762, "learning_rate": 2.4848337388483375e-06, "loss": 0.7276, "step": 18932 }, { "epoch": 0.552772181834107, "grad_norm": 0.7188730929175414, "learning_rate": 2.4846715328467155e-06, "loss": 0.6668, "step": 18933 }, { "epoch": 0.5528013780619544, "grad_norm": 0.7314617856655178, "learning_rate": 2.4845093268450935e-06, "loss": 0.6176, "step": 18934 }, { "epoch": 0.5528305742898018, "grad_norm": 0.7738986076076766, "learning_rate": 2.4843471208434715e-06, "loss": 0.6936, "step": 18935 }, { "epoch": 0.5528597705176491, "grad_norm": 0.7299756355653293, "learning_rate": 2.484184914841849e-06, "loss": 0.5807, "step": 18936 }, { "epoch": 0.5528889667454965, "grad_norm": 0.7539792672530455, "learning_rate": 2.484022708840227e-06, "loss": 0.6608, "step": 18937 }, { "epoch": 0.5529181629733438, "grad_norm": 0.6892017023534261, "learning_rate": 2.4838605028386055e-06, "loss": 0.6073, "step": 18938 }, { "epoch": 0.5529473592011912, "grad_norm": 0.7069148388052565, "learning_rate": 2.483698296836983e-06, "loss": 0.6678, "step": 18939 }, { "epoch": 0.5529765554290386, "grad_norm": 0.6871451562708332, "learning_rate": 2.483536090835361e-06, "loss": 0.5591, "step": 18940 }, { "epoch": 0.5530057516568859, "grad_norm": 0.6911737351873773, "learning_rate": 2.483373884833739e-06, "loss": 0.5564, "step": 18941 }, { "epoch": 0.5530349478847333, "grad_norm": 0.7755168494789638, "learning_rate": 2.483211678832117e-06, "loss": 0.731, "step": 18942 }, { "epoch": 0.5530641441125806, "grad_norm": 0.7031889856966975, "learning_rate": 2.4830494728304947e-06, "loss": 0.607, "step": 18943 }, { "epoch": 0.553093340340428, "grad_norm": 0.6670737267522928, "learning_rate": 2.4828872668288727e-06, "loss": 0.5811, "step": 18944 }, { "epoch": 0.5531225365682754, "grad_norm": 0.7378371598431966, "learning_rate": 2.4827250608272507e-06, "loss": 0.6642, "step": 18945 }, { "epoch": 0.5531517327961227, "grad_norm": 0.7402008776325324, "learning_rate": 2.4825628548256287e-06, "loss": 0.6519, "step": 18946 }, { "epoch": 0.5531809290239701, "grad_norm": 0.6983584580406235, "learning_rate": 2.4824006488240067e-06, "loss": 0.5813, "step": 18947 }, { "epoch": 0.5532101252518175, "grad_norm": 0.7342802253771149, "learning_rate": 2.4822384428223847e-06, "loss": 0.6431, "step": 18948 }, { "epoch": 0.5532393214796648, "grad_norm": 0.7566669311538637, "learning_rate": 2.4820762368207628e-06, "loss": 0.7318, "step": 18949 }, { "epoch": 0.5532685177075122, "grad_norm": 0.7136829195989443, "learning_rate": 2.4819140308191408e-06, "loss": 0.656, "step": 18950 }, { "epoch": 0.5532977139353595, "grad_norm": 0.6465922071253495, "learning_rate": 2.4817518248175183e-06, "loss": 0.5502, "step": 18951 }, { "epoch": 0.5533269101632069, "grad_norm": 0.7530532550419629, "learning_rate": 2.4815896188158964e-06, "loss": 0.6895, "step": 18952 }, { "epoch": 0.5533561063910543, "grad_norm": 0.7750086435652563, "learning_rate": 2.4814274128142744e-06, "loss": 0.7528, "step": 18953 }, { "epoch": 0.5533853026189016, "grad_norm": 0.7003262193444687, "learning_rate": 2.4812652068126524e-06, "loss": 0.6128, "step": 18954 }, { "epoch": 0.553414498846749, "grad_norm": 0.7760902158402526, "learning_rate": 2.48110300081103e-06, "loss": 0.6554, "step": 18955 }, { "epoch": 0.5534436950745963, "grad_norm": 0.690064629888574, "learning_rate": 2.480940794809408e-06, "loss": 0.5851, "step": 18956 }, { "epoch": 0.5534728913024437, "grad_norm": 0.7671459113123844, "learning_rate": 2.4807785888077864e-06, "loss": 0.7182, "step": 18957 }, { "epoch": 0.5535020875302911, "grad_norm": 0.7576216933673163, "learning_rate": 2.480616382806164e-06, "loss": 0.6301, "step": 18958 }, { "epoch": 0.5535312837581384, "grad_norm": 0.7249164314639227, "learning_rate": 2.480454176804542e-06, "loss": 0.6347, "step": 18959 }, { "epoch": 0.5535604799859858, "grad_norm": 0.7678443124522398, "learning_rate": 2.48029197080292e-06, "loss": 0.6734, "step": 18960 }, { "epoch": 0.5535896762138331, "grad_norm": 0.7408768546234005, "learning_rate": 2.480129764801298e-06, "loss": 0.7165, "step": 18961 }, { "epoch": 0.5536188724416805, "grad_norm": 0.8469669030743485, "learning_rate": 2.4799675587996756e-06, "loss": 0.6941, "step": 18962 }, { "epoch": 0.5536480686695279, "grad_norm": 0.6997670469444437, "learning_rate": 2.4798053527980536e-06, "loss": 0.62, "step": 18963 }, { "epoch": 0.5536772648973752, "grad_norm": 0.6727582384943042, "learning_rate": 2.4796431467964316e-06, "loss": 0.5845, "step": 18964 }, { "epoch": 0.5537064611252226, "grad_norm": 0.7183211358064474, "learning_rate": 2.4794809407948096e-06, "loss": 0.6446, "step": 18965 }, { "epoch": 0.55373565735307, "grad_norm": 0.7461156267121524, "learning_rate": 2.4793187347931876e-06, "loss": 0.5947, "step": 18966 }, { "epoch": 0.5537648535809173, "grad_norm": 0.7765586028723811, "learning_rate": 2.4791565287915656e-06, "loss": 0.6896, "step": 18967 }, { "epoch": 0.5537940498087647, "grad_norm": 0.7359066882180796, "learning_rate": 2.4789943227899436e-06, "loss": 0.6979, "step": 18968 }, { "epoch": 0.553823246036612, "grad_norm": 0.7368780771022444, "learning_rate": 2.4788321167883216e-06, "loss": 0.6198, "step": 18969 }, { "epoch": 0.5538524422644594, "grad_norm": 0.6791895274546501, "learning_rate": 2.478669910786699e-06, "loss": 0.5779, "step": 18970 }, { "epoch": 0.5538816384923068, "grad_norm": 0.6950285919244934, "learning_rate": 2.478507704785077e-06, "loss": 0.6187, "step": 18971 }, { "epoch": 0.5539108347201541, "grad_norm": 0.7513210635554637, "learning_rate": 2.4783454987834552e-06, "loss": 0.6643, "step": 18972 }, { "epoch": 0.5539400309480015, "grad_norm": 0.7139000560899685, "learning_rate": 2.4781832927818332e-06, "loss": 0.5744, "step": 18973 }, { "epoch": 0.5539692271758488, "grad_norm": 0.6578397299186031, "learning_rate": 2.478021086780211e-06, "loss": 0.5415, "step": 18974 }, { "epoch": 0.5539984234036962, "grad_norm": 0.7700140826184382, "learning_rate": 2.477858880778589e-06, "loss": 0.7596, "step": 18975 }, { "epoch": 0.5540276196315436, "grad_norm": 0.6980568603979334, "learning_rate": 2.4776966747769672e-06, "loss": 0.6252, "step": 18976 }, { "epoch": 0.5540568158593909, "grad_norm": 0.7537527222084899, "learning_rate": 2.477534468775345e-06, "loss": 0.6807, "step": 18977 }, { "epoch": 0.5540860120872383, "grad_norm": 0.7887359481471268, "learning_rate": 2.477372262773723e-06, "loss": 0.6248, "step": 18978 }, { "epoch": 0.5541152083150856, "grad_norm": 0.8907084342386707, "learning_rate": 2.477210056772101e-06, "loss": 0.7633, "step": 18979 }, { "epoch": 0.554144404542933, "grad_norm": 0.8725686076364878, "learning_rate": 2.477047850770479e-06, "loss": 0.6687, "step": 18980 }, { "epoch": 0.5541736007707804, "grad_norm": 0.767651067199934, "learning_rate": 2.4768856447688564e-06, "loss": 0.6666, "step": 18981 }, { "epoch": 0.5542027969986277, "grad_norm": 0.7244438653076793, "learning_rate": 2.4767234387672344e-06, "loss": 0.6628, "step": 18982 }, { "epoch": 0.5542319932264751, "grad_norm": 0.7452431731061905, "learning_rate": 2.4765612327656124e-06, "loss": 0.6519, "step": 18983 }, { "epoch": 0.5542611894543225, "grad_norm": 0.7537246343736057, "learning_rate": 2.4763990267639905e-06, "loss": 0.6789, "step": 18984 }, { "epoch": 0.5542903856821698, "grad_norm": 0.7635919782054895, "learning_rate": 2.4762368207623685e-06, "loss": 0.6305, "step": 18985 }, { "epoch": 0.5543195819100172, "grad_norm": 0.7645040598255266, "learning_rate": 2.4760746147607465e-06, "loss": 0.7664, "step": 18986 }, { "epoch": 0.5543487781378645, "grad_norm": 0.7030632699778723, "learning_rate": 2.4759124087591245e-06, "loss": 0.5884, "step": 18987 }, { "epoch": 0.5543779743657119, "grad_norm": 0.7406927840361233, "learning_rate": 2.4757502027575025e-06, "loss": 0.6685, "step": 18988 }, { "epoch": 0.5544071705935593, "grad_norm": 0.7629519552387117, "learning_rate": 2.47558799675588e-06, "loss": 0.6906, "step": 18989 }, { "epoch": 0.5544363668214066, "grad_norm": 0.7577280977432829, "learning_rate": 2.475425790754258e-06, "loss": 0.6546, "step": 18990 }, { "epoch": 0.554465563049254, "grad_norm": 0.7308564435915317, "learning_rate": 2.475263584752636e-06, "loss": 0.5645, "step": 18991 }, { "epoch": 0.5544947592771013, "grad_norm": 0.7009610643090446, "learning_rate": 2.475101378751014e-06, "loss": 0.6128, "step": 18992 }, { "epoch": 0.5545239555049487, "grad_norm": 0.7363562497910588, "learning_rate": 2.4749391727493917e-06, "loss": 0.645, "step": 18993 }, { "epoch": 0.5545531517327961, "grad_norm": 0.7190192165837219, "learning_rate": 2.4747769667477697e-06, "loss": 0.6266, "step": 18994 }, { "epoch": 0.5545823479606435, "grad_norm": 0.7323984212800845, "learning_rate": 2.474614760746148e-06, "loss": 0.6884, "step": 18995 }, { "epoch": 0.5546115441884909, "grad_norm": 0.7105041251404304, "learning_rate": 2.4744525547445257e-06, "loss": 0.6276, "step": 18996 }, { "epoch": 0.5546407404163383, "grad_norm": 0.7291160313497438, "learning_rate": 2.4742903487429037e-06, "loss": 0.629, "step": 18997 }, { "epoch": 0.5546699366441856, "grad_norm": 0.6915385729319922, "learning_rate": 2.4741281427412817e-06, "loss": 0.5742, "step": 18998 }, { "epoch": 0.554699132872033, "grad_norm": 0.7571852093106483, "learning_rate": 2.4739659367396597e-06, "loss": 0.6608, "step": 18999 }, { "epoch": 0.5547283290998803, "grad_norm": 0.7006578549338058, "learning_rate": 2.4738037307380373e-06, "loss": 0.6277, "step": 19000 }, { "epoch": 0.5547575253277277, "grad_norm": 0.7471194983829965, "learning_rate": 2.4736415247364153e-06, "loss": 0.6809, "step": 19001 }, { "epoch": 0.5547867215555751, "grad_norm": 0.7040596134476153, "learning_rate": 2.4734793187347933e-06, "loss": 0.6567, "step": 19002 }, { "epoch": 0.5548159177834224, "grad_norm": 0.7301054015103611, "learning_rate": 2.4733171127331713e-06, "loss": 0.6435, "step": 19003 }, { "epoch": 0.5548451140112698, "grad_norm": 0.7157000345988409, "learning_rate": 2.4731549067315493e-06, "loss": 0.6039, "step": 19004 }, { "epoch": 0.5548743102391172, "grad_norm": 0.7293871994234047, "learning_rate": 2.4729927007299273e-06, "loss": 0.6746, "step": 19005 }, { "epoch": 0.5549035064669645, "grad_norm": 0.8970176617460263, "learning_rate": 2.4728304947283053e-06, "loss": 0.7038, "step": 19006 }, { "epoch": 0.5549327026948119, "grad_norm": 0.6702589266218384, "learning_rate": 2.4726682887266833e-06, "loss": 0.5804, "step": 19007 }, { "epoch": 0.5549618989226592, "grad_norm": 0.7255912963672408, "learning_rate": 2.472506082725061e-06, "loss": 0.6616, "step": 19008 }, { "epoch": 0.5549910951505066, "grad_norm": 0.7063488411432273, "learning_rate": 2.472343876723439e-06, "loss": 0.6628, "step": 19009 }, { "epoch": 0.555020291378354, "grad_norm": 0.7688413934454936, "learning_rate": 2.472181670721817e-06, "loss": 0.6638, "step": 19010 }, { "epoch": 0.5550494876062013, "grad_norm": 0.7259902499527713, "learning_rate": 2.472019464720195e-06, "loss": 0.6521, "step": 19011 }, { "epoch": 0.5550786838340487, "grad_norm": 0.6949494563460148, "learning_rate": 2.4718572587185725e-06, "loss": 0.5851, "step": 19012 }, { "epoch": 0.555107880061896, "grad_norm": 0.7339937477994207, "learning_rate": 2.4716950527169505e-06, "loss": 0.7282, "step": 19013 }, { "epoch": 0.5551370762897434, "grad_norm": 0.7083039816347125, "learning_rate": 2.471532846715329e-06, "loss": 0.6771, "step": 19014 }, { "epoch": 0.5551662725175908, "grad_norm": 0.7753761972157475, "learning_rate": 2.4713706407137065e-06, "loss": 0.7036, "step": 19015 }, { "epoch": 0.5551954687454381, "grad_norm": 0.8657803275613959, "learning_rate": 2.4712084347120846e-06, "loss": 0.776, "step": 19016 }, { "epoch": 0.5552246649732855, "grad_norm": 0.7377484425220192, "learning_rate": 2.4710462287104626e-06, "loss": 0.6411, "step": 19017 }, { "epoch": 0.5552538612011328, "grad_norm": 0.7980848910797428, "learning_rate": 2.4708840227088406e-06, "loss": 0.7237, "step": 19018 }, { "epoch": 0.5552830574289802, "grad_norm": 0.7455443403744749, "learning_rate": 2.470721816707218e-06, "loss": 0.6784, "step": 19019 }, { "epoch": 0.5553122536568276, "grad_norm": 0.7100545460300578, "learning_rate": 2.470559610705596e-06, "loss": 0.6693, "step": 19020 }, { "epoch": 0.5553414498846749, "grad_norm": 0.7323739319539628, "learning_rate": 2.470397404703974e-06, "loss": 0.6737, "step": 19021 }, { "epoch": 0.5553706461125223, "grad_norm": 0.7726059035053591, "learning_rate": 2.470235198702352e-06, "loss": 0.7452, "step": 19022 }, { "epoch": 0.5553998423403697, "grad_norm": 0.7329190114964864, "learning_rate": 2.47007299270073e-06, "loss": 0.691, "step": 19023 }, { "epoch": 0.555429038568217, "grad_norm": 0.7054416946889097, "learning_rate": 2.469910786699108e-06, "loss": 0.627, "step": 19024 }, { "epoch": 0.5554582347960644, "grad_norm": 0.6991207707523664, "learning_rate": 2.469748580697486e-06, "loss": 0.5759, "step": 19025 }, { "epoch": 0.5554874310239117, "grad_norm": 0.7030944157469833, "learning_rate": 2.469586374695864e-06, "loss": 0.6242, "step": 19026 }, { "epoch": 0.5555166272517591, "grad_norm": 0.8448619384589171, "learning_rate": 2.4694241686942418e-06, "loss": 0.7672, "step": 19027 }, { "epoch": 0.5555458234796065, "grad_norm": 0.7261431237832223, "learning_rate": 2.4692619626926198e-06, "loss": 0.6287, "step": 19028 }, { "epoch": 0.5555750197074538, "grad_norm": 0.7418448169768416, "learning_rate": 2.469099756690998e-06, "loss": 0.6631, "step": 19029 }, { "epoch": 0.5556042159353012, "grad_norm": 0.712742457688342, "learning_rate": 2.468937550689376e-06, "loss": 0.6714, "step": 19030 }, { "epoch": 0.5556334121631485, "grad_norm": 0.8656309514256554, "learning_rate": 2.4687753446877534e-06, "loss": 0.7636, "step": 19031 }, { "epoch": 0.5556626083909959, "grad_norm": 0.7304839730454118, "learning_rate": 2.468613138686132e-06, "loss": 0.6221, "step": 19032 }, { "epoch": 0.5556918046188433, "grad_norm": 0.7699648712085508, "learning_rate": 2.46845093268451e-06, "loss": 0.727, "step": 19033 }, { "epoch": 0.5557210008466906, "grad_norm": 0.7232913261723477, "learning_rate": 2.4682887266828874e-06, "loss": 0.6661, "step": 19034 }, { "epoch": 0.555750197074538, "grad_norm": 0.6470955279031805, "learning_rate": 2.4681265206812654e-06, "loss": 0.5518, "step": 19035 }, { "epoch": 0.5557793933023853, "grad_norm": 1.0118133800380915, "learning_rate": 2.4679643146796434e-06, "loss": 0.6523, "step": 19036 }, { "epoch": 0.5558085895302327, "grad_norm": 0.7031874506405937, "learning_rate": 2.4678021086780214e-06, "loss": 0.6117, "step": 19037 }, { "epoch": 0.5558377857580801, "grad_norm": 0.7520908181904986, "learning_rate": 2.467639902676399e-06, "loss": 0.6989, "step": 19038 }, { "epoch": 0.5558669819859274, "grad_norm": 0.7570942288729727, "learning_rate": 2.467477696674777e-06, "loss": 0.7122, "step": 19039 }, { "epoch": 0.5558961782137748, "grad_norm": 0.7322085107671362, "learning_rate": 2.467315490673155e-06, "loss": 0.6638, "step": 19040 }, { "epoch": 0.5559253744416222, "grad_norm": 0.6413095356667049, "learning_rate": 2.467153284671533e-06, "loss": 0.5168, "step": 19041 }, { "epoch": 0.5559545706694695, "grad_norm": 0.7707335860535357, "learning_rate": 2.466991078669911e-06, "loss": 0.7159, "step": 19042 }, { "epoch": 0.5559837668973169, "grad_norm": 0.7133484923260439, "learning_rate": 2.466828872668289e-06, "loss": 0.6214, "step": 19043 }, { "epoch": 0.5560129631251642, "grad_norm": 0.7921070426526737, "learning_rate": 2.466666666666667e-06, "loss": 0.6671, "step": 19044 }, { "epoch": 0.5560421593530116, "grad_norm": 0.7240825839075604, "learning_rate": 2.4665044606650446e-06, "loss": 0.6356, "step": 19045 }, { "epoch": 0.556071355580859, "grad_norm": 0.7374960516833827, "learning_rate": 2.4663422546634226e-06, "loss": 0.6313, "step": 19046 }, { "epoch": 0.5561005518087063, "grad_norm": 0.7878555393900253, "learning_rate": 2.4661800486618006e-06, "loss": 0.6713, "step": 19047 }, { "epoch": 0.5561297480365537, "grad_norm": 0.7236978370011865, "learning_rate": 2.4660178426601787e-06, "loss": 0.6349, "step": 19048 }, { "epoch": 0.556158944264401, "grad_norm": 0.7532372389048078, "learning_rate": 2.4658556366585567e-06, "loss": 0.6375, "step": 19049 }, { "epoch": 0.5561881404922484, "grad_norm": 0.7560115129336941, "learning_rate": 2.4656934306569342e-06, "loss": 0.649, "step": 19050 }, { "epoch": 0.5562173367200958, "grad_norm": 0.6950402866311121, "learning_rate": 2.4655312246553127e-06, "loss": 0.6188, "step": 19051 }, { "epoch": 0.5562465329479431, "grad_norm": 0.7345509741608377, "learning_rate": 2.4653690186536907e-06, "loss": 0.666, "step": 19052 }, { "epoch": 0.5562757291757905, "grad_norm": 0.7094922409732051, "learning_rate": 2.4652068126520683e-06, "loss": 0.654, "step": 19053 }, { "epoch": 0.5563049254036379, "grad_norm": 0.8471680316629552, "learning_rate": 2.4650446066504463e-06, "loss": 0.7498, "step": 19054 }, { "epoch": 0.5563341216314852, "grad_norm": 0.7313028274859208, "learning_rate": 2.4648824006488243e-06, "loss": 0.6912, "step": 19055 }, { "epoch": 0.5563633178593326, "grad_norm": 0.7831681280037511, "learning_rate": 2.4647201946472023e-06, "loss": 0.7082, "step": 19056 }, { "epoch": 0.5563925140871799, "grad_norm": 0.8220486521303136, "learning_rate": 2.46455798864558e-06, "loss": 0.7901, "step": 19057 }, { "epoch": 0.5564217103150273, "grad_norm": 0.7234671055195103, "learning_rate": 2.464395782643958e-06, "loss": 0.6291, "step": 19058 }, { "epoch": 0.5564509065428747, "grad_norm": 0.7144040794790467, "learning_rate": 2.464233576642336e-06, "loss": 0.5817, "step": 19059 }, { "epoch": 0.556480102770722, "grad_norm": 0.7213114711373629, "learning_rate": 2.464071370640714e-06, "loss": 0.6342, "step": 19060 }, { "epoch": 0.5565092989985694, "grad_norm": 0.7512574128586809, "learning_rate": 2.463909164639092e-06, "loss": 0.6031, "step": 19061 }, { "epoch": 0.5565384952264167, "grad_norm": 0.7445872874046747, "learning_rate": 2.46374695863747e-06, "loss": 0.6675, "step": 19062 }, { "epoch": 0.5565676914542641, "grad_norm": 0.7394892872213635, "learning_rate": 2.463584752635848e-06, "loss": 0.6234, "step": 19063 }, { "epoch": 0.5565968876821115, "grad_norm": 0.7314176525426536, "learning_rate": 2.4634225466342255e-06, "loss": 0.6756, "step": 19064 }, { "epoch": 0.5566260839099588, "grad_norm": 0.7637283234259814, "learning_rate": 2.4632603406326035e-06, "loss": 0.6754, "step": 19065 }, { "epoch": 0.5566552801378062, "grad_norm": 0.7078374225843391, "learning_rate": 2.4630981346309815e-06, "loss": 0.5611, "step": 19066 }, { "epoch": 0.5566844763656535, "grad_norm": 0.7462838005340431, "learning_rate": 2.4629359286293595e-06, "loss": 0.6911, "step": 19067 }, { "epoch": 0.5567136725935009, "grad_norm": 0.7055455577780679, "learning_rate": 2.4627737226277375e-06, "loss": 0.6236, "step": 19068 }, { "epoch": 0.5567428688213483, "grad_norm": 0.7154640458648034, "learning_rate": 2.462611516626115e-06, "loss": 0.6077, "step": 19069 }, { "epoch": 0.5567720650491956, "grad_norm": 0.6983375714374043, "learning_rate": 2.4624493106244935e-06, "loss": 0.5598, "step": 19070 }, { "epoch": 0.556801261277043, "grad_norm": 0.7588599917179569, "learning_rate": 2.4622871046228715e-06, "loss": 0.6749, "step": 19071 }, { "epoch": 0.5568304575048904, "grad_norm": 0.7364745817647482, "learning_rate": 2.462124898621249e-06, "loss": 0.7047, "step": 19072 }, { "epoch": 0.5568596537327377, "grad_norm": 0.6975167832676799, "learning_rate": 2.461962692619627e-06, "loss": 0.6747, "step": 19073 }, { "epoch": 0.5568888499605851, "grad_norm": 0.7146628571763335, "learning_rate": 2.461800486618005e-06, "loss": 0.6052, "step": 19074 }, { "epoch": 0.5569180461884324, "grad_norm": 0.7280358483799692, "learning_rate": 2.461638280616383e-06, "loss": 0.6744, "step": 19075 }, { "epoch": 0.5569472424162798, "grad_norm": 0.7710170418175388, "learning_rate": 2.4614760746147607e-06, "loss": 0.7267, "step": 19076 }, { "epoch": 0.5569764386441272, "grad_norm": 0.7046514619793818, "learning_rate": 2.4613138686131387e-06, "loss": 0.622, "step": 19077 }, { "epoch": 0.5570056348719745, "grad_norm": 0.7536188326600872, "learning_rate": 2.4611516626115167e-06, "loss": 0.7279, "step": 19078 }, { "epoch": 0.5570348310998219, "grad_norm": 0.7494637669070943, "learning_rate": 2.4609894566098947e-06, "loss": 0.6834, "step": 19079 }, { "epoch": 0.5570640273276692, "grad_norm": 0.7337587215242649, "learning_rate": 2.4608272506082728e-06, "loss": 0.6811, "step": 19080 }, { "epoch": 0.5570932235555166, "grad_norm": 0.7411867411643336, "learning_rate": 2.4606650446066508e-06, "loss": 0.6388, "step": 19081 }, { "epoch": 0.557122419783364, "grad_norm": 0.7766957278830485, "learning_rate": 2.4605028386050288e-06, "loss": 0.5946, "step": 19082 }, { "epoch": 0.5571516160112113, "grad_norm": 0.7200244557431398, "learning_rate": 2.4603406326034063e-06, "loss": 0.6264, "step": 19083 }, { "epoch": 0.5571808122390587, "grad_norm": 0.6841878154225285, "learning_rate": 2.4601784266017844e-06, "loss": 0.6234, "step": 19084 }, { "epoch": 0.557210008466906, "grad_norm": 0.6852753161716658, "learning_rate": 2.4600162206001624e-06, "loss": 0.6009, "step": 19085 }, { "epoch": 0.5572392046947534, "grad_norm": 0.7774236406679982, "learning_rate": 2.4598540145985404e-06, "loss": 0.6789, "step": 19086 }, { "epoch": 0.5572684009226008, "grad_norm": 0.7065720523834407, "learning_rate": 2.4596918085969184e-06, "loss": 0.6589, "step": 19087 }, { "epoch": 0.5572975971504481, "grad_norm": 0.7442328545848169, "learning_rate": 2.459529602595296e-06, "loss": 0.6845, "step": 19088 }, { "epoch": 0.5573267933782955, "grad_norm": 0.7129875732858137, "learning_rate": 2.4593673965936744e-06, "loss": 0.6569, "step": 19089 }, { "epoch": 0.5573559896061429, "grad_norm": 0.752530674666788, "learning_rate": 2.4592051905920524e-06, "loss": 0.6233, "step": 19090 }, { "epoch": 0.5573851858339902, "grad_norm": 0.7820025308663016, "learning_rate": 2.45904298459043e-06, "loss": 0.7058, "step": 19091 }, { "epoch": 0.5574143820618376, "grad_norm": 0.6825688111967955, "learning_rate": 2.458880778588808e-06, "loss": 0.5572, "step": 19092 }, { "epoch": 0.5574435782896849, "grad_norm": 0.7061388532351174, "learning_rate": 2.458718572587186e-06, "loss": 0.6173, "step": 19093 }, { "epoch": 0.5574727745175323, "grad_norm": 0.7734647555514502, "learning_rate": 2.458556366585564e-06, "loss": 0.6472, "step": 19094 }, { "epoch": 0.5575019707453797, "grad_norm": 0.7315288302729076, "learning_rate": 2.4583941605839416e-06, "loss": 0.5757, "step": 19095 }, { "epoch": 0.557531166973227, "grad_norm": 0.7452964896069497, "learning_rate": 2.4582319545823196e-06, "loss": 0.6771, "step": 19096 }, { "epoch": 0.5575603632010744, "grad_norm": 0.7414846629510617, "learning_rate": 2.4580697485806976e-06, "loss": 0.6615, "step": 19097 }, { "epoch": 0.5575895594289217, "grad_norm": 0.7517755205246579, "learning_rate": 2.4579075425790756e-06, "loss": 0.6558, "step": 19098 }, { "epoch": 0.5576187556567691, "grad_norm": 0.7683248835054809, "learning_rate": 2.4577453365774536e-06, "loss": 0.6835, "step": 19099 }, { "epoch": 0.5576479518846165, "grad_norm": 0.7192645876705032, "learning_rate": 2.4575831305758316e-06, "loss": 0.6624, "step": 19100 }, { "epoch": 0.5576771481124638, "grad_norm": 0.7470795232060992, "learning_rate": 2.4574209245742096e-06, "loss": 0.6948, "step": 19101 }, { "epoch": 0.5577063443403112, "grad_norm": 0.6276483360300168, "learning_rate": 2.457258718572587e-06, "loss": 0.5065, "step": 19102 }, { "epoch": 0.5577355405681585, "grad_norm": 0.7254818221008156, "learning_rate": 2.4570965125709652e-06, "loss": 0.6325, "step": 19103 }, { "epoch": 0.5577647367960059, "grad_norm": 0.6891780610618334, "learning_rate": 2.4569343065693432e-06, "loss": 0.6409, "step": 19104 }, { "epoch": 0.5577939330238533, "grad_norm": 0.773043235932206, "learning_rate": 2.4567721005677212e-06, "loss": 0.6505, "step": 19105 }, { "epoch": 0.5578231292517006, "grad_norm": 0.7459834994355596, "learning_rate": 2.4566098945660992e-06, "loss": 0.7053, "step": 19106 }, { "epoch": 0.557852325479548, "grad_norm": 0.6597422711759626, "learning_rate": 2.456447688564477e-06, "loss": 0.5626, "step": 19107 }, { "epoch": 0.5578815217073954, "grad_norm": 0.7195518553371605, "learning_rate": 2.4562854825628552e-06, "loss": 0.6146, "step": 19108 }, { "epoch": 0.5579107179352427, "grad_norm": 0.7317724741264219, "learning_rate": 2.4561232765612333e-06, "loss": 0.6652, "step": 19109 }, { "epoch": 0.5579399141630901, "grad_norm": 0.7045613027672777, "learning_rate": 2.455961070559611e-06, "loss": 0.5963, "step": 19110 }, { "epoch": 0.5579691103909374, "grad_norm": 0.7041967140491514, "learning_rate": 2.455798864557989e-06, "loss": 0.6329, "step": 19111 }, { "epoch": 0.5579983066187848, "grad_norm": 0.7282777006616461, "learning_rate": 2.455636658556367e-06, "loss": 0.6214, "step": 19112 }, { "epoch": 0.5580275028466322, "grad_norm": 0.6769130104619542, "learning_rate": 2.455474452554745e-06, "loss": 0.5472, "step": 19113 }, { "epoch": 0.5580566990744795, "grad_norm": 0.7519765200923693, "learning_rate": 2.4553122465531224e-06, "loss": 0.6531, "step": 19114 }, { "epoch": 0.5580858953023269, "grad_norm": 0.6963289049669468, "learning_rate": 2.4551500405515004e-06, "loss": 0.6457, "step": 19115 }, { "epoch": 0.5581150915301744, "grad_norm": 0.6829179261254938, "learning_rate": 2.4549878345498785e-06, "loss": 0.5959, "step": 19116 }, { "epoch": 0.5581442877580217, "grad_norm": 0.7339838731647859, "learning_rate": 2.4548256285482565e-06, "loss": 0.7072, "step": 19117 }, { "epoch": 0.5581734839858691, "grad_norm": 0.7332215624456815, "learning_rate": 2.4546634225466345e-06, "loss": 0.6179, "step": 19118 }, { "epoch": 0.5582026802137164, "grad_norm": 0.7581872750081338, "learning_rate": 2.4545012165450125e-06, "loss": 0.6929, "step": 19119 }, { "epoch": 0.5582318764415638, "grad_norm": 0.7664512811033889, "learning_rate": 2.4543390105433905e-06, "loss": 0.6997, "step": 19120 }, { "epoch": 0.5582610726694112, "grad_norm": 0.7655791421938918, "learning_rate": 2.454176804541768e-06, "loss": 0.6893, "step": 19121 }, { "epoch": 0.5582902688972585, "grad_norm": 0.7398483242806352, "learning_rate": 2.454014598540146e-06, "loss": 0.6262, "step": 19122 }, { "epoch": 0.5583194651251059, "grad_norm": 0.7291986318308558, "learning_rate": 2.453852392538524e-06, "loss": 0.6426, "step": 19123 }, { "epoch": 0.5583486613529532, "grad_norm": 0.7019299994977746, "learning_rate": 2.453690186536902e-06, "loss": 0.646, "step": 19124 }, { "epoch": 0.5583778575808006, "grad_norm": 0.8342028255963738, "learning_rate": 2.45352798053528e-06, "loss": 0.7874, "step": 19125 }, { "epoch": 0.558407053808648, "grad_norm": 0.7179607962820174, "learning_rate": 2.4533657745336577e-06, "loss": 0.6233, "step": 19126 }, { "epoch": 0.5584362500364953, "grad_norm": 0.7255544131987339, "learning_rate": 2.453203568532036e-06, "loss": 0.6712, "step": 19127 }, { "epoch": 0.5584654462643427, "grad_norm": 0.7076339400933754, "learning_rate": 2.453041362530414e-06, "loss": 0.596, "step": 19128 }, { "epoch": 0.55849464249219, "grad_norm": 0.7568857887952172, "learning_rate": 2.4528791565287917e-06, "loss": 0.679, "step": 19129 }, { "epoch": 0.5585238387200374, "grad_norm": 0.6948326965882491, "learning_rate": 2.4527169505271697e-06, "loss": 0.5615, "step": 19130 }, { "epoch": 0.5585530349478848, "grad_norm": 0.6962689718328343, "learning_rate": 2.4525547445255477e-06, "loss": 0.6485, "step": 19131 }, { "epoch": 0.5585822311757321, "grad_norm": 0.7743306749265124, "learning_rate": 2.4523925385239257e-06, "loss": 0.7418, "step": 19132 }, { "epoch": 0.5586114274035795, "grad_norm": 0.7120153108915765, "learning_rate": 2.4522303325223033e-06, "loss": 0.6019, "step": 19133 }, { "epoch": 0.5586406236314269, "grad_norm": 0.6908453106728235, "learning_rate": 2.4520681265206813e-06, "loss": 0.5661, "step": 19134 }, { "epoch": 0.5586698198592742, "grad_norm": 0.6989959495671774, "learning_rate": 2.4519059205190593e-06, "loss": 0.6452, "step": 19135 }, { "epoch": 0.5586990160871216, "grad_norm": 0.684013471909203, "learning_rate": 2.4517437145174373e-06, "loss": 0.605, "step": 19136 }, { "epoch": 0.5587282123149689, "grad_norm": 0.7177399103985014, "learning_rate": 2.4515815085158153e-06, "loss": 0.6486, "step": 19137 }, { "epoch": 0.5587574085428163, "grad_norm": 0.6961812447439676, "learning_rate": 2.4514193025141933e-06, "loss": 0.6461, "step": 19138 }, { "epoch": 0.5587866047706637, "grad_norm": 0.6989074489466535, "learning_rate": 2.4512570965125713e-06, "loss": 0.6183, "step": 19139 }, { "epoch": 0.558815800998511, "grad_norm": 0.7302481896912297, "learning_rate": 2.451094890510949e-06, "loss": 0.6641, "step": 19140 }, { "epoch": 0.5588449972263584, "grad_norm": 0.7690072649507307, "learning_rate": 2.450932684509327e-06, "loss": 0.6365, "step": 19141 }, { "epoch": 0.5588741934542057, "grad_norm": 0.7693189479936905, "learning_rate": 2.450770478507705e-06, "loss": 0.7047, "step": 19142 }, { "epoch": 0.5589033896820531, "grad_norm": 0.695137621290424, "learning_rate": 2.450608272506083e-06, "loss": 0.5888, "step": 19143 }, { "epoch": 0.5589325859099005, "grad_norm": 0.9739339674489808, "learning_rate": 2.450446066504461e-06, "loss": 0.567, "step": 19144 }, { "epoch": 0.5589617821377478, "grad_norm": 0.7511763286460725, "learning_rate": 2.4502838605028385e-06, "loss": 0.6779, "step": 19145 }, { "epoch": 0.5589909783655952, "grad_norm": 0.7390960387403668, "learning_rate": 2.450121654501217e-06, "loss": 0.6741, "step": 19146 }, { "epoch": 0.5590201745934426, "grad_norm": 0.7038947306213974, "learning_rate": 2.449959448499595e-06, "loss": 0.6005, "step": 19147 }, { "epoch": 0.5590493708212899, "grad_norm": 0.7630239146419532, "learning_rate": 2.4497972424979726e-06, "loss": 0.6596, "step": 19148 }, { "epoch": 0.5590785670491373, "grad_norm": 0.7886469520761388, "learning_rate": 2.4496350364963506e-06, "loss": 0.7208, "step": 19149 }, { "epoch": 0.5591077632769846, "grad_norm": 0.7562768538079059, "learning_rate": 2.4494728304947286e-06, "loss": 0.6837, "step": 19150 }, { "epoch": 0.559136959504832, "grad_norm": 0.7791684148696184, "learning_rate": 2.4493106244931066e-06, "loss": 0.6336, "step": 19151 }, { "epoch": 0.5591661557326794, "grad_norm": 0.730132764225254, "learning_rate": 2.449148418491484e-06, "loss": 0.6806, "step": 19152 }, { "epoch": 0.5591953519605267, "grad_norm": 0.739626156056236, "learning_rate": 2.448986212489862e-06, "loss": 0.6387, "step": 19153 }, { "epoch": 0.5592245481883741, "grad_norm": 0.8411404737316668, "learning_rate": 2.44882400648824e-06, "loss": 0.7115, "step": 19154 }, { "epoch": 0.5592537444162214, "grad_norm": 0.7605025911617628, "learning_rate": 2.448661800486618e-06, "loss": 0.7209, "step": 19155 }, { "epoch": 0.5592829406440688, "grad_norm": 0.7637459469801456, "learning_rate": 2.448499594484996e-06, "loss": 0.6437, "step": 19156 }, { "epoch": 0.5593121368719162, "grad_norm": 0.7727513127892313, "learning_rate": 2.448337388483374e-06, "loss": 0.7028, "step": 19157 }, { "epoch": 0.5593413330997635, "grad_norm": 0.7258468549932973, "learning_rate": 2.448175182481752e-06, "loss": 0.6322, "step": 19158 }, { "epoch": 0.5593705293276109, "grad_norm": 0.6748875930974166, "learning_rate": 2.4480129764801298e-06, "loss": 0.5801, "step": 19159 }, { "epoch": 0.5593997255554582, "grad_norm": 0.737794623012116, "learning_rate": 2.447850770478508e-06, "loss": 0.6566, "step": 19160 }, { "epoch": 0.5594289217833056, "grad_norm": 0.7294292374458801, "learning_rate": 2.447688564476886e-06, "loss": 0.6323, "step": 19161 }, { "epoch": 0.559458118011153, "grad_norm": 0.7273231180112218, "learning_rate": 2.447526358475264e-06, "loss": 0.6475, "step": 19162 }, { "epoch": 0.5594873142390003, "grad_norm": 0.742673174615881, "learning_rate": 2.447364152473642e-06, "loss": 0.6351, "step": 19163 }, { "epoch": 0.5595165104668477, "grad_norm": 0.8149516752701896, "learning_rate": 2.4472019464720194e-06, "loss": 0.703, "step": 19164 }, { "epoch": 0.559545706694695, "grad_norm": 0.6995027602973093, "learning_rate": 2.447039740470398e-06, "loss": 0.6293, "step": 19165 }, { "epoch": 0.5595749029225424, "grad_norm": 0.7754131876744882, "learning_rate": 2.446877534468776e-06, "loss": 0.7418, "step": 19166 }, { "epoch": 0.5596040991503898, "grad_norm": 0.6994926612947748, "learning_rate": 2.4467153284671534e-06, "loss": 0.6325, "step": 19167 }, { "epoch": 0.5596332953782371, "grad_norm": 0.751703755202715, "learning_rate": 2.4465531224655314e-06, "loss": 0.6996, "step": 19168 }, { "epoch": 0.5596624916060845, "grad_norm": 0.698863378223037, "learning_rate": 2.4463909164639094e-06, "loss": 0.6225, "step": 19169 }, { "epoch": 0.5596916878339319, "grad_norm": 0.7473684695144535, "learning_rate": 2.4462287104622874e-06, "loss": 0.6957, "step": 19170 }, { "epoch": 0.5597208840617792, "grad_norm": 0.7173616751231799, "learning_rate": 2.446066504460665e-06, "loss": 0.6113, "step": 19171 }, { "epoch": 0.5597500802896266, "grad_norm": 0.7488507498825124, "learning_rate": 2.445904298459043e-06, "loss": 0.5845, "step": 19172 }, { "epoch": 0.559779276517474, "grad_norm": 0.7010307053464515, "learning_rate": 2.445742092457421e-06, "loss": 0.6481, "step": 19173 }, { "epoch": 0.5598084727453213, "grad_norm": 0.6967803549762808, "learning_rate": 2.445579886455799e-06, "loss": 0.5879, "step": 19174 }, { "epoch": 0.5598376689731687, "grad_norm": 0.6915359680329918, "learning_rate": 2.445417680454177e-06, "loss": 0.6303, "step": 19175 }, { "epoch": 0.559866865201016, "grad_norm": 0.8316464095642163, "learning_rate": 2.445255474452555e-06, "loss": 0.6788, "step": 19176 }, { "epoch": 0.5598960614288634, "grad_norm": 0.773245881630408, "learning_rate": 2.445093268450933e-06, "loss": 0.7277, "step": 19177 }, { "epoch": 0.5599252576567108, "grad_norm": 0.7599006167440391, "learning_rate": 2.4449310624493106e-06, "loss": 0.7353, "step": 19178 }, { "epoch": 0.5599544538845581, "grad_norm": 0.6940503221905221, "learning_rate": 2.4447688564476886e-06, "loss": 0.5666, "step": 19179 }, { "epoch": 0.5599836501124055, "grad_norm": 1.2907608205155234, "learning_rate": 2.4446066504460667e-06, "loss": 0.7052, "step": 19180 }, { "epoch": 0.5600128463402528, "grad_norm": 0.7546798959327025, "learning_rate": 2.4444444444444447e-06, "loss": 0.6888, "step": 19181 }, { "epoch": 0.5600420425681002, "grad_norm": 0.7297346231075074, "learning_rate": 2.4442822384428227e-06, "loss": 0.6183, "step": 19182 }, { "epoch": 0.5600712387959476, "grad_norm": 0.6833547612641578, "learning_rate": 2.4441200324412007e-06, "loss": 0.5851, "step": 19183 }, { "epoch": 0.5601004350237949, "grad_norm": 0.7253039071056405, "learning_rate": 2.4439578264395787e-06, "loss": 0.6392, "step": 19184 }, { "epoch": 0.5601296312516423, "grad_norm": 0.7218952012142725, "learning_rate": 2.4437956204379567e-06, "loss": 0.6484, "step": 19185 }, { "epoch": 0.5601588274794896, "grad_norm": 0.7256931936590618, "learning_rate": 2.4436334144363343e-06, "loss": 0.6263, "step": 19186 }, { "epoch": 0.560188023707337, "grad_norm": 0.7270398452427852, "learning_rate": 2.4434712084347123e-06, "loss": 0.6355, "step": 19187 }, { "epoch": 0.5602172199351844, "grad_norm": 0.675686182524597, "learning_rate": 2.4433090024330903e-06, "loss": 0.6171, "step": 19188 }, { "epoch": 0.5602464161630317, "grad_norm": 0.7919625428702074, "learning_rate": 2.4431467964314683e-06, "loss": 0.7341, "step": 19189 }, { "epoch": 0.5602756123908791, "grad_norm": 0.8622927754540857, "learning_rate": 2.442984590429846e-06, "loss": 0.6366, "step": 19190 }, { "epoch": 0.5603048086187264, "grad_norm": 0.7141536401026523, "learning_rate": 2.442822384428224e-06, "loss": 0.6841, "step": 19191 }, { "epoch": 0.5603340048465738, "grad_norm": 0.6957703342533051, "learning_rate": 2.442660178426602e-06, "loss": 0.6068, "step": 19192 }, { "epoch": 0.5603632010744212, "grad_norm": 0.7490143559637069, "learning_rate": 2.44249797242498e-06, "loss": 0.7053, "step": 19193 }, { "epoch": 0.5603923973022685, "grad_norm": 0.7139277814006122, "learning_rate": 2.442335766423358e-06, "loss": 0.6002, "step": 19194 }, { "epoch": 0.5604215935301159, "grad_norm": 0.7353492659847151, "learning_rate": 2.442173560421736e-06, "loss": 0.6944, "step": 19195 }, { "epoch": 0.5604507897579633, "grad_norm": 0.7046484898035119, "learning_rate": 2.442011354420114e-06, "loss": 0.595, "step": 19196 }, { "epoch": 0.5604799859858106, "grad_norm": 0.7399341144518145, "learning_rate": 2.4418491484184915e-06, "loss": 0.6867, "step": 19197 }, { "epoch": 0.560509182213658, "grad_norm": 0.7155984599049541, "learning_rate": 2.4416869424168695e-06, "loss": 0.6266, "step": 19198 }, { "epoch": 0.5605383784415053, "grad_norm": 0.7809029585209136, "learning_rate": 2.4415247364152475e-06, "loss": 0.6714, "step": 19199 }, { "epoch": 0.5605675746693527, "grad_norm": 0.704567711641288, "learning_rate": 2.4413625304136255e-06, "loss": 0.6616, "step": 19200 }, { "epoch": 0.5605967708972001, "grad_norm": 0.765459045485785, "learning_rate": 2.4412003244120035e-06, "loss": 0.7175, "step": 19201 }, { "epoch": 0.5606259671250474, "grad_norm": 0.7007388650665864, "learning_rate": 2.4410381184103815e-06, "loss": 0.5959, "step": 19202 }, { "epoch": 0.5606551633528948, "grad_norm": 0.6994334204434927, "learning_rate": 2.4408759124087595e-06, "loss": 0.6079, "step": 19203 }, { "epoch": 0.5606843595807421, "grad_norm": 0.7291996640067723, "learning_rate": 2.4407137064071375e-06, "loss": 0.6498, "step": 19204 }, { "epoch": 0.5607135558085895, "grad_norm": 0.7188312804956685, "learning_rate": 2.440551500405515e-06, "loss": 0.6577, "step": 19205 }, { "epoch": 0.5607427520364369, "grad_norm": 0.8050688500468658, "learning_rate": 2.440389294403893e-06, "loss": 0.7779, "step": 19206 }, { "epoch": 0.5607719482642842, "grad_norm": 0.7658165365119899, "learning_rate": 2.440227088402271e-06, "loss": 0.7102, "step": 19207 }, { "epoch": 0.5608011444921316, "grad_norm": 0.7861669251323263, "learning_rate": 2.440064882400649e-06, "loss": 0.6863, "step": 19208 }, { "epoch": 0.560830340719979, "grad_norm": 0.6404792606129704, "learning_rate": 2.4399026763990267e-06, "loss": 0.5131, "step": 19209 }, { "epoch": 0.5608595369478263, "grad_norm": 0.7538955761906658, "learning_rate": 2.4397404703974047e-06, "loss": 0.6723, "step": 19210 }, { "epoch": 0.5608887331756737, "grad_norm": 0.7543747602197852, "learning_rate": 2.4395782643957827e-06, "loss": 0.6634, "step": 19211 }, { "epoch": 0.560917929403521, "grad_norm": 0.7262252608225602, "learning_rate": 2.4394160583941608e-06, "loss": 0.6522, "step": 19212 }, { "epoch": 0.5609471256313684, "grad_norm": 0.8031249586710183, "learning_rate": 2.4392538523925388e-06, "loss": 0.7531, "step": 19213 }, { "epoch": 0.5609763218592158, "grad_norm": 0.7399348383018722, "learning_rate": 2.4390916463909168e-06, "loss": 0.6362, "step": 19214 }, { "epoch": 0.5610055180870631, "grad_norm": 0.7948953953766901, "learning_rate": 2.4389294403892948e-06, "loss": 0.6721, "step": 19215 }, { "epoch": 0.5610347143149105, "grad_norm": 0.7308678169871132, "learning_rate": 2.4387672343876724e-06, "loss": 0.6058, "step": 19216 }, { "epoch": 0.5610639105427578, "grad_norm": 0.7290340759576365, "learning_rate": 2.4386050283860504e-06, "loss": 0.6716, "step": 19217 }, { "epoch": 0.5610931067706052, "grad_norm": 0.692425814229986, "learning_rate": 2.4384428223844284e-06, "loss": 0.5589, "step": 19218 }, { "epoch": 0.5611223029984526, "grad_norm": 0.738181649241466, "learning_rate": 2.4382806163828064e-06, "loss": 0.6342, "step": 19219 }, { "epoch": 0.5611514992262999, "grad_norm": 0.7668100211510417, "learning_rate": 2.4381184103811844e-06, "loss": 0.6953, "step": 19220 }, { "epoch": 0.5611806954541473, "grad_norm": 0.7512842719565533, "learning_rate": 2.4379562043795624e-06, "loss": 0.6575, "step": 19221 }, { "epoch": 0.5612098916819946, "grad_norm": 0.7290664380476886, "learning_rate": 2.4377939983779404e-06, "loss": 0.6716, "step": 19222 }, { "epoch": 0.561239087909842, "grad_norm": 0.6919389455405803, "learning_rate": 2.437631792376318e-06, "loss": 0.5414, "step": 19223 }, { "epoch": 0.5612682841376894, "grad_norm": 0.7620923953787112, "learning_rate": 2.437469586374696e-06, "loss": 0.6247, "step": 19224 }, { "epoch": 0.5612974803655367, "grad_norm": 0.7480770542086895, "learning_rate": 2.437307380373074e-06, "loss": 0.7104, "step": 19225 }, { "epoch": 0.5613266765933841, "grad_norm": 0.7109201332149454, "learning_rate": 2.437145174371452e-06, "loss": 0.5979, "step": 19226 }, { "epoch": 0.5613558728212314, "grad_norm": 0.728126172072889, "learning_rate": 2.43698296836983e-06, "loss": 0.64, "step": 19227 }, { "epoch": 0.5613850690490788, "grad_norm": 0.699997918359136, "learning_rate": 2.4368207623682076e-06, "loss": 0.5785, "step": 19228 }, { "epoch": 0.5614142652769262, "grad_norm": 0.7566196581416605, "learning_rate": 2.4366585563665856e-06, "loss": 0.6672, "step": 19229 }, { "epoch": 0.5614434615047735, "grad_norm": 0.7368011255651817, "learning_rate": 2.4364963503649636e-06, "loss": 0.7013, "step": 19230 }, { "epoch": 0.5614726577326209, "grad_norm": 0.6990126534252635, "learning_rate": 2.4363341443633416e-06, "loss": 0.6029, "step": 19231 }, { "epoch": 0.5615018539604683, "grad_norm": 0.7938962891997187, "learning_rate": 2.4361719383617196e-06, "loss": 0.7048, "step": 19232 }, { "epoch": 0.5615310501883156, "grad_norm": 0.6806831912353695, "learning_rate": 2.4360097323600976e-06, "loss": 0.6116, "step": 19233 }, { "epoch": 0.561560246416163, "grad_norm": 0.7828180773643745, "learning_rate": 2.4358475263584756e-06, "loss": 0.7963, "step": 19234 }, { "epoch": 0.5615894426440103, "grad_norm": 0.7991081963522719, "learning_rate": 2.4356853203568532e-06, "loss": 0.7471, "step": 19235 }, { "epoch": 0.5616186388718577, "grad_norm": 0.8492386805412391, "learning_rate": 2.4355231143552312e-06, "loss": 0.7558, "step": 19236 }, { "epoch": 0.5616478350997052, "grad_norm": 0.7299375696766877, "learning_rate": 2.4353609083536092e-06, "loss": 0.7063, "step": 19237 }, { "epoch": 0.5616770313275525, "grad_norm": 0.7496406283852116, "learning_rate": 2.4351987023519872e-06, "loss": 0.665, "step": 19238 }, { "epoch": 0.5617062275553999, "grad_norm": 0.7190638380113116, "learning_rate": 2.4350364963503652e-06, "loss": 0.594, "step": 19239 }, { "epoch": 0.5617354237832473, "grad_norm": 0.6965467006270153, "learning_rate": 2.4348742903487433e-06, "loss": 0.6111, "step": 19240 }, { "epoch": 0.5617646200110946, "grad_norm": 0.7043064574074941, "learning_rate": 2.4347120843471213e-06, "loss": 0.6122, "step": 19241 }, { "epoch": 0.561793816238942, "grad_norm": 0.7141795011011423, "learning_rate": 2.434549878345499e-06, "loss": 0.6246, "step": 19242 }, { "epoch": 0.5618230124667893, "grad_norm": 0.7263689927762279, "learning_rate": 2.434387672343877e-06, "loss": 0.687, "step": 19243 }, { "epoch": 0.5618522086946367, "grad_norm": 0.7075928664713147, "learning_rate": 2.434225466342255e-06, "loss": 0.6118, "step": 19244 }, { "epoch": 0.5618814049224841, "grad_norm": 0.8200372279650205, "learning_rate": 2.434063260340633e-06, "loss": 0.7289, "step": 19245 }, { "epoch": 0.5619106011503314, "grad_norm": 0.6529065625072306, "learning_rate": 2.433901054339011e-06, "loss": 0.5818, "step": 19246 }, { "epoch": 0.5619397973781788, "grad_norm": 0.7269375737729379, "learning_rate": 2.4337388483373885e-06, "loss": 0.602, "step": 19247 }, { "epoch": 0.5619689936060261, "grad_norm": 0.724812812262284, "learning_rate": 2.4335766423357665e-06, "loss": 0.6025, "step": 19248 }, { "epoch": 0.5619981898338735, "grad_norm": 0.8719475214295465, "learning_rate": 2.4334144363341445e-06, "loss": 0.627, "step": 19249 }, { "epoch": 0.5620273860617209, "grad_norm": 0.687218814648648, "learning_rate": 2.4332522303325225e-06, "loss": 0.598, "step": 19250 }, { "epoch": 0.5620565822895682, "grad_norm": 0.8473327130499516, "learning_rate": 2.4330900243309005e-06, "loss": 0.7032, "step": 19251 }, { "epoch": 0.5620857785174156, "grad_norm": 0.7852994000711637, "learning_rate": 2.4329278183292785e-06, "loss": 0.7297, "step": 19252 }, { "epoch": 0.562114974745263, "grad_norm": 0.6980689482930752, "learning_rate": 2.4327656123276565e-06, "loss": 0.5809, "step": 19253 }, { "epoch": 0.5621441709731103, "grad_norm": 0.6649648655069816, "learning_rate": 2.432603406326034e-06, "loss": 0.5404, "step": 19254 }, { "epoch": 0.5621733672009577, "grad_norm": 0.7200198995253554, "learning_rate": 2.432441200324412e-06, "loss": 0.604, "step": 19255 }, { "epoch": 0.562202563428805, "grad_norm": 0.8193310013489359, "learning_rate": 2.43227899432279e-06, "loss": 0.6236, "step": 19256 }, { "epoch": 0.5622317596566524, "grad_norm": 0.7118013052518477, "learning_rate": 2.432116788321168e-06, "loss": 0.6363, "step": 19257 }, { "epoch": 0.5622609558844998, "grad_norm": 0.6846798176914682, "learning_rate": 2.431954582319546e-06, "loss": 0.5778, "step": 19258 }, { "epoch": 0.5622901521123471, "grad_norm": 0.7345035031397739, "learning_rate": 2.431792376317924e-06, "loss": 0.6389, "step": 19259 }, { "epoch": 0.5623193483401945, "grad_norm": 0.7889696884223688, "learning_rate": 2.431630170316302e-06, "loss": 0.6782, "step": 19260 }, { "epoch": 0.5623485445680418, "grad_norm": 0.7468023464462358, "learning_rate": 2.4314679643146797e-06, "loss": 0.6713, "step": 19261 }, { "epoch": 0.5623777407958892, "grad_norm": 0.7083760244150982, "learning_rate": 2.4313057583130577e-06, "loss": 0.6383, "step": 19262 }, { "epoch": 0.5624069370237366, "grad_norm": 0.7122502478760213, "learning_rate": 2.4311435523114357e-06, "loss": 0.6243, "step": 19263 }, { "epoch": 0.5624361332515839, "grad_norm": 0.712883344589223, "learning_rate": 2.4309813463098137e-06, "loss": 0.6221, "step": 19264 }, { "epoch": 0.5624653294794313, "grad_norm": 0.7716912627604426, "learning_rate": 2.4308191403081917e-06, "loss": 0.6521, "step": 19265 }, { "epoch": 0.5624945257072786, "grad_norm": 0.702281162882016, "learning_rate": 2.4306569343065693e-06, "loss": 0.5881, "step": 19266 }, { "epoch": 0.562523721935126, "grad_norm": 0.7649111788784866, "learning_rate": 2.4304947283049473e-06, "loss": 0.6156, "step": 19267 }, { "epoch": 0.5625529181629734, "grad_norm": 0.7309615461991448, "learning_rate": 2.4303325223033257e-06, "loss": 0.6543, "step": 19268 }, { "epoch": 0.5625821143908207, "grad_norm": 0.7291316232995385, "learning_rate": 2.4301703163017033e-06, "loss": 0.6314, "step": 19269 }, { "epoch": 0.5626113106186681, "grad_norm": 0.712935846381977, "learning_rate": 2.4300081103000813e-06, "loss": 0.6473, "step": 19270 }, { "epoch": 0.5626405068465155, "grad_norm": 0.8282522519461504, "learning_rate": 2.4298459042984593e-06, "loss": 0.7765, "step": 19271 }, { "epoch": 0.5626697030743628, "grad_norm": 0.7410985623922554, "learning_rate": 2.4296836982968374e-06, "loss": 0.6641, "step": 19272 }, { "epoch": 0.5626988993022102, "grad_norm": 0.7076473241544052, "learning_rate": 2.429521492295215e-06, "loss": 0.6309, "step": 19273 }, { "epoch": 0.5627280955300575, "grad_norm": 0.7308800135176579, "learning_rate": 2.429359286293593e-06, "loss": 0.6657, "step": 19274 }, { "epoch": 0.5627572917579049, "grad_norm": 0.6494843403396248, "learning_rate": 2.429197080291971e-06, "loss": 0.5491, "step": 19275 }, { "epoch": 0.5627864879857523, "grad_norm": 0.7030249287897302, "learning_rate": 2.429034874290349e-06, "loss": 0.598, "step": 19276 }, { "epoch": 0.5628156842135996, "grad_norm": 0.789978739872306, "learning_rate": 2.428872668288727e-06, "loss": 0.732, "step": 19277 }, { "epoch": 0.562844880441447, "grad_norm": 0.7362865248525321, "learning_rate": 2.428710462287105e-06, "loss": 0.6519, "step": 19278 }, { "epoch": 0.5628740766692943, "grad_norm": 0.725621531854293, "learning_rate": 2.428548256285483e-06, "loss": 0.6478, "step": 19279 }, { "epoch": 0.5629032728971417, "grad_norm": 0.7219148171650019, "learning_rate": 2.4283860502838606e-06, "loss": 0.6115, "step": 19280 }, { "epoch": 0.5629324691249891, "grad_norm": 0.6894960644504348, "learning_rate": 2.4282238442822386e-06, "loss": 0.6236, "step": 19281 }, { "epoch": 0.5629616653528364, "grad_norm": 0.7377445153483189, "learning_rate": 2.4280616382806166e-06, "loss": 0.6925, "step": 19282 }, { "epoch": 0.5629908615806838, "grad_norm": 0.6592728313269778, "learning_rate": 2.4278994322789946e-06, "loss": 0.5654, "step": 19283 }, { "epoch": 0.5630200578085311, "grad_norm": 0.7186257509741236, "learning_rate": 2.4277372262773726e-06, "loss": 0.648, "step": 19284 }, { "epoch": 0.5630492540363785, "grad_norm": 0.7976684932794059, "learning_rate": 2.42757502027575e-06, "loss": 0.7, "step": 19285 }, { "epoch": 0.5630784502642259, "grad_norm": 0.702051926194826, "learning_rate": 2.427412814274128e-06, "loss": 0.6043, "step": 19286 }, { "epoch": 0.5631076464920732, "grad_norm": 0.77010382560494, "learning_rate": 2.4272506082725066e-06, "loss": 0.698, "step": 19287 }, { "epoch": 0.5631368427199206, "grad_norm": 0.7233052260162239, "learning_rate": 2.427088402270884e-06, "loss": 0.6424, "step": 19288 }, { "epoch": 0.563166038947768, "grad_norm": 0.7220937179761716, "learning_rate": 2.426926196269262e-06, "loss": 0.6687, "step": 19289 }, { "epoch": 0.5631952351756153, "grad_norm": 0.6887491346072665, "learning_rate": 2.42676399026764e-06, "loss": 0.6137, "step": 19290 }, { "epoch": 0.5632244314034627, "grad_norm": 0.7226610825182888, "learning_rate": 2.426601784266018e-06, "loss": 0.6725, "step": 19291 }, { "epoch": 0.56325362763131, "grad_norm": 0.7566827893070407, "learning_rate": 2.426439578264396e-06, "loss": 0.6469, "step": 19292 }, { "epoch": 0.5632828238591574, "grad_norm": 0.6831748761291516, "learning_rate": 2.426277372262774e-06, "loss": 0.591, "step": 19293 }, { "epoch": 0.5633120200870048, "grad_norm": 0.6797952284595477, "learning_rate": 2.426115166261152e-06, "loss": 0.6316, "step": 19294 }, { "epoch": 0.5633412163148521, "grad_norm": 0.7097194316553895, "learning_rate": 2.42595296025953e-06, "loss": 0.6007, "step": 19295 }, { "epoch": 0.5633704125426995, "grad_norm": 0.7314680165114894, "learning_rate": 2.4257907542579074e-06, "loss": 0.6084, "step": 19296 }, { "epoch": 0.5633996087705468, "grad_norm": 0.7449692957923617, "learning_rate": 2.425628548256286e-06, "loss": 0.5857, "step": 19297 }, { "epoch": 0.5634288049983942, "grad_norm": 0.7572909640581436, "learning_rate": 2.425466342254664e-06, "loss": 0.707, "step": 19298 }, { "epoch": 0.5634580012262416, "grad_norm": 0.7364956273334444, "learning_rate": 2.4253041362530414e-06, "loss": 0.6665, "step": 19299 }, { "epoch": 0.5634871974540889, "grad_norm": 0.7637812596792266, "learning_rate": 2.4251419302514194e-06, "loss": 0.6647, "step": 19300 }, { "epoch": 0.5635163936819363, "grad_norm": 0.6766931421928436, "learning_rate": 2.4249797242497974e-06, "loss": 0.5694, "step": 19301 }, { "epoch": 0.5635455899097837, "grad_norm": 0.7234160432526568, "learning_rate": 2.4248175182481754e-06, "loss": 0.6617, "step": 19302 }, { "epoch": 0.563574786137631, "grad_norm": 0.6736327609278183, "learning_rate": 2.4246553122465534e-06, "loss": 0.5619, "step": 19303 }, { "epoch": 0.5636039823654784, "grad_norm": 0.7425624529516192, "learning_rate": 2.424493106244931e-06, "loss": 0.6387, "step": 19304 }, { "epoch": 0.5636331785933257, "grad_norm": 0.7236612853831115, "learning_rate": 2.424330900243309e-06, "loss": 0.6261, "step": 19305 }, { "epoch": 0.5636623748211731, "grad_norm": 0.6640659325051809, "learning_rate": 2.4241686942416875e-06, "loss": 0.5524, "step": 19306 }, { "epoch": 0.5636915710490205, "grad_norm": 0.7223476718757432, "learning_rate": 2.424006488240065e-06, "loss": 0.6627, "step": 19307 }, { "epoch": 0.5637207672768678, "grad_norm": 0.7324510081132858, "learning_rate": 2.423844282238443e-06, "loss": 0.634, "step": 19308 }, { "epoch": 0.5637499635047152, "grad_norm": 0.7626117915036924, "learning_rate": 2.423682076236821e-06, "loss": 0.7355, "step": 19309 }, { "epoch": 0.5637791597325625, "grad_norm": 0.671895974883449, "learning_rate": 2.423519870235199e-06, "loss": 0.5735, "step": 19310 }, { "epoch": 0.5638083559604099, "grad_norm": 0.6883771125313347, "learning_rate": 2.4233576642335767e-06, "loss": 0.577, "step": 19311 }, { "epoch": 0.5638375521882573, "grad_norm": 0.6891718214459565, "learning_rate": 2.4231954582319547e-06, "loss": 0.6301, "step": 19312 }, { "epoch": 0.5638667484161046, "grad_norm": 0.783659443172455, "learning_rate": 2.4230332522303327e-06, "loss": 0.7518, "step": 19313 }, { "epoch": 0.563895944643952, "grad_norm": 0.7554151142408433, "learning_rate": 2.4228710462287107e-06, "loss": 0.6965, "step": 19314 }, { "epoch": 0.5639251408717993, "grad_norm": 0.7007864425302577, "learning_rate": 2.4227088402270883e-06, "loss": 0.6231, "step": 19315 }, { "epoch": 0.5639543370996467, "grad_norm": 0.7231493333435212, "learning_rate": 2.4225466342254667e-06, "loss": 0.6409, "step": 19316 }, { "epoch": 0.5639835333274941, "grad_norm": 0.7997724078218386, "learning_rate": 2.4223844282238447e-06, "loss": 0.7433, "step": 19317 }, { "epoch": 0.5640127295553414, "grad_norm": 0.6810824899797994, "learning_rate": 2.4222222222222223e-06, "loss": 0.6143, "step": 19318 }, { "epoch": 0.5640419257831888, "grad_norm": 0.7279029843334889, "learning_rate": 2.4220600162206003e-06, "loss": 0.653, "step": 19319 }, { "epoch": 0.5640711220110362, "grad_norm": 0.6632775927059619, "learning_rate": 2.4218978102189783e-06, "loss": 0.5555, "step": 19320 }, { "epoch": 0.5641003182388835, "grad_norm": 0.7046254675728227, "learning_rate": 2.4217356042173563e-06, "loss": 0.6707, "step": 19321 }, { "epoch": 0.5641295144667309, "grad_norm": 0.7101863145558889, "learning_rate": 2.4215733982157343e-06, "loss": 0.6085, "step": 19322 }, { "epoch": 0.5641587106945782, "grad_norm": 0.7347035473604917, "learning_rate": 2.421411192214112e-06, "loss": 0.662, "step": 19323 }, { "epoch": 0.5641879069224256, "grad_norm": 0.752777233601716, "learning_rate": 2.42124898621249e-06, "loss": 0.6896, "step": 19324 }, { "epoch": 0.564217103150273, "grad_norm": 0.7160650063136494, "learning_rate": 2.4210867802108683e-06, "loss": 0.6102, "step": 19325 }, { "epoch": 0.5642462993781203, "grad_norm": 0.6954478415701874, "learning_rate": 2.420924574209246e-06, "loss": 0.5671, "step": 19326 }, { "epoch": 0.5642754956059677, "grad_norm": 0.7662370317016098, "learning_rate": 2.420762368207624e-06, "loss": 0.7, "step": 19327 }, { "epoch": 0.564304691833815, "grad_norm": 0.7307364525440172, "learning_rate": 2.420600162206002e-06, "loss": 0.6642, "step": 19328 }, { "epoch": 0.5643338880616624, "grad_norm": 0.8064054978785836, "learning_rate": 2.42043795620438e-06, "loss": 0.7595, "step": 19329 }, { "epoch": 0.5643630842895098, "grad_norm": 0.7085184547300039, "learning_rate": 2.4202757502027575e-06, "loss": 0.648, "step": 19330 }, { "epoch": 0.5643922805173571, "grad_norm": 0.8235701723472421, "learning_rate": 2.4201135442011355e-06, "loss": 0.724, "step": 19331 }, { "epoch": 0.5644214767452045, "grad_norm": 0.7581999054563661, "learning_rate": 2.4199513381995135e-06, "loss": 0.5987, "step": 19332 }, { "epoch": 0.5644506729730518, "grad_norm": 0.7193977138790512, "learning_rate": 2.4197891321978915e-06, "loss": 0.6102, "step": 19333 }, { "epoch": 0.5644798692008992, "grad_norm": 0.7496898775223304, "learning_rate": 2.419626926196269e-06, "loss": 0.6126, "step": 19334 }, { "epoch": 0.5645090654287466, "grad_norm": 0.7202421810042878, "learning_rate": 2.4194647201946475e-06, "loss": 0.6358, "step": 19335 }, { "epoch": 0.5645382616565939, "grad_norm": 0.7053039168183778, "learning_rate": 2.4193025141930256e-06, "loss": 0.6372, "step": 19336 }, { "epoch": 0.5645674578844413, "grad_norm": 0.7382028808468966, "learning_rate": 2.419140308191403e-06, "loss": 0.7281, "step": 19337 }, { "epoch": 0.5645966541122887, "grad_norm": 0.819682852571196, "learning_rate": 2.418978102189781e-06, "loss": 0.6238, "step": 19338 }, { "epoch": 0.564625850340136, "grad_norm": 0.653731356614184, "learning_rate": 2.418815896188159e-06, "loss": 0.5631, "step": 19339 }, { "epoch": 0.5646550465679834, "grad_norm": 0.6549370999334491, "learning_rate": 2.418653690186537e-06, "loss": 0.5374, "step": 19340 }, { "epoch": 0.5646842427958307, "grad_norm": 0.6976220645826474, "learning_rate": 2.418491484184915e-06, "loss": 0.6564, "step": 19341 }, { "epoch": 0.5647134390236781, "grad_norm": 0.7497696725869618, "learning_rate": 2.4183292781832927e-06, "loss": 0.7001, "step": 19342 }, { "epoch": 0.5647426352515255, "grad_norm": 0.8127491874098896, "learning_rate": 2.4181670721816708e-06, "loss": 0.679, "step": 19343 }, { "epoch": 0.5647718314793728, "grad_norm": 0.7713998980609728, "learning_rate": 2.418004866180049e-06, "loss": 0.7153, "step": 19344 }, { "epoch": 0.5648010277072202, "grad_norm": 0.8109604028172419, "learning_rate": 2.4178426601784268e-06, "loss": 0.6922, "step": 19345 }, { "epoch": 0.5648302239350675, "grad_norm": 0.7507571456055098, "learning_rate": 2.4176804541768048e-06, "loss": 0.628, "step": 19346 }, { "epoch": 0.5648594201629149, "grad_norm": 0.7657804718420973, "learning_rate": 2.4175182481751828e-06, "loss": 0.6986, "step": 19347 }, { "epoch": 0.5648886163907623, "grad_norm": 0.7268896707782276, "learning_rate": 2.4173560421735608e-06, "loss": 0.6472, "step": 19348 }, { "epoch": 0.5649178126186096, "grad_norm": 0.8127374926723042, "learning_rate": 2.4171938361719384e-06, "loss": 0.6967, "step": 19349 }, { "epoch": 0.564947008846457, "grad_norm": 0.6819283618898159, "learning_rate": 2.4170316301703164e-06, "loss": 0.5995, "step": 19350 }, { "epoch": 0.5649762050743043, "grad_norm": 0.7277633061560032, "learning_rate": 2.4168694241686944e-06, "loss": 0.6469, "step": 19351 }, { "epoch": 0.5650054013021517, "grad_norm": 0.7216612643394215, "learning_rate": 2.4167072181670724e-06, "loss": 0.5578, "step": 19352 }, { "epoch": 0.5650345975299991, "grad_norm": 0.7639640257128739, "learning_rate": 2.4165450121654504e-06, "loss": 0.7093, "step": 19353 }, { "epoch": 0.5650637937578464, "grad_norm": 0.7028249791670701, "learning_rate": 2.4163828061638284e-06, "loss": 0.6258, "step": 19354 }, { "epoch": 0.5650929899856938, "grad_norm": 0.8194076749625944, "learning_rate": 2.4162206001622064e-06, "loss": 0.6606, "step": 19355 }, { "epoch": 0.5651221862135412, "grad_norm": 0.7349652357683726, "learning_rate": 2.416058394160584e-06, "loss": 0.6614, "step": 19356 }, { "epoch": 0.5651513824413886, "grad_norm": 0.7311682615468715, "learning_rate": 2.415896188158962e-06, "loss": 0.647, "step": 19357 }, { "epoch": 0.565180578669236, "grad_norm": 0.7790876349114336, "learning_rate": 2.41573398215734e-06, "loss": 0.6964, "step": 19358 }, { "epoch": 0.5652097748970834, "grad_norm": 0.6881911172830052, "learning_rate": 2.415571776155718e-06, "loss": 0.5795, "step": 19359 }, { "epoch": 0.5652389711249307, "grad_norm": 0.7526819129854383, "learning_rate": 2.415409570154096e-06, "loss": 0.6461, "step": 19360 }, { "epoch": 0.5652681673527781, "grad_norm": 0.7095196085380324, "learning_rate": 2.4152473641524736e-06, "loss": 0.6224, "step": 19361 }, { "epoch": 0.5652973635806254, "grad_norm": 0.7360798276753124, "learning_rate": 2.4150851581508516e-06, "loss": 0.7316, "step": 19362 }, { "epoch": 0.5653265598084728, "grad_norm": 0.7647222432758803, "learning_rate": 2.41492295214923e-06, "loss": 0.6869, "step": 19363 }, { "epoch": 0.5653557560363202, "grad_norm": 0.6987669889491764, "learning_rate": 2.4147607461476076e-06, "loss": 0.6143, "step": 19364 }, { "epoch": 0.5653849522641675, "grad_norm": 0.7693061142653477, "learning_rate": 2.4145985401459856e-06, "loss": 0.7696, "step": 19365 }, { "epoch": 0.5654141484920149, "grad_norm": 0.7762782936486493, "learning_rate": 2.4144363341443636e-06, "loss": 0.668, "step": 19366 }, { "epoch": 0.5654433447198622, "grad_norm": 0.6600306193726361, "learning_rate": 2.4142741281427416e-06, "loss": 0.5437, "step": 19367 }, { "epoch": 0.5654725409477096, "grad_norm": 0.7414599560584286, "learning_rate": 2.4141119221411192e-06, "loss": 0.63, "step": 19368 }, { "epoch": 0.565501737175557, "grad_norm": 0.7994988262225315, "learning_rate": 2.4139497161394972e-06, "loss": 0.708, "step": 19369 }, { "epoch": 0.5655309334034043, "grad_norm": 0.7319071406260299, "learning_rate": 2.4137875101378752e-06, "loss": 0.6314, "step": 19370 }, { "epoch": 0.5655601296312517, "grad_norm": 0.7166791430896288, "learning_rate": 2.4136253041362532e-06, "loss": 0.5802, "step": 19371 }, { "epoch": 0.565589325859099, "grad_norm": 0.7358361059534669, "learning_rate": 2.4134630981346313e-06, "loss": 0.6763, "step": 19372 }, { "epoch": 0.5656185220869464, "grad_norm": 0.7344921168072169, "learning_rate": 2.4133008921330093e-06, "loss": 0.6749, "step": 19373 }, { "epoch": 0.5656477183147938, "grad_norm": 0.7977590570218638, "learning_rate": 2.4131386861313873e-06, "loss": 0.6672, "step": 19374 }, { "epoch": 0.5656769145426411, "grad_norm": 0.7963987738626509, "learning_rate": 2.412976480129765e-06, "loss": 0.6176, "step": 19375 }, { "epoch": 0.5657061107704885, "grad_norm": 0.7182524120869558, "learning_rate": 2.412814274128143e-06, "loss": 0.6126, "step": 19376 }, { "epoch": 0.5657353069983359, "grad_norm": 0.7170779138986427, "learning_rate": 2.412652068126521e-06, "loss": 0.6198, "step": 19377 }, { "epoch": 0.5657645032261832, "grad_norm": 0.6595404253537944, "learning_rate": 2.412489862124899e-06, "loss": 0.547, "step": 19378 }, { "epoch": 0.5657936994540306, "grad_norm": 0.7664382544595113, "learning_rate": 2.412327656123277e-06, "loss": 0.6349, "step": 19379 }, { "epoch": 0.5658228956818779, "grad_norm": 0.9089307907474068, "learning_rate": 2.4121654501216545e-06, "loss": 0.7088, "step": 19380 }, { "epoch": 0.5658520919097253, "grad_norm": 0.7301869058559781, "learning_rate": 2.4120032441200325e-06, "loss": 0.6621, "step": 19381 }, { "epoch": 0.5658812881375727, "grad_norm": 0.7119238552856751, "learning_rate": 2.411841038118411e-06, "loss": 0.654, "step": 19382 }, { "epoch": 0.56591048436542, "grad_norm": 0.7124730471709024, "learning_rate": 2.4116788321167885e-06, "loss": 0.6163, "step": 19383 }, { "epoch": 0.5659396805932674, "grad_norm": 0.7493704732927031, "learning_rate": 2.4115166261151665e-06, "loss": 0.6312, "step": 19384 }, { "epoch": 0.5659688768211147, "grad_norm": 0.6953599980517736, "learning_rate": 2.4113544201135445e-06, "loss": 0.6106, "step": 19385 }, { "epoch": 0.5659980730489621, "grad_norm": 0.7281791585211281, "learning_rate": 2.4111922141119225e-06, "loss": 0.5931, "step": 19386 }, { "epoch": 0.5660272692768095, "grad_norm": 0.7102695776892869, "learning_rate": 2.4110300081103e-06, "loss": 0.5975, "step": 19387 }, { "epoch": 0.5660564655046568, "grad_norm": 0.7083619127638023, "learning_rate": 2.410867802108678e-06, "loss": 0.6348, "step": 19388 }, { "epoch": 0.5660856617325042, "grad_norm": 0.7668808862077094, "learning_rate": 2.410705596107056e-06, "loss": 0.648, "step": 19389 }, { "epoch": 0.5661148579603515, "grad_norm": 0.7431561325146276, "learning_rate": 2.410543390105434e-06, "loss": 0.6342, "step": 19390 }, { "epoch": 0.5661440541881989, "grad_norm": 0.7178031773084561, "learning_rate": 2.410381184103812e-06, "loss": 0.6695, "step": 19391 }, { "epoch": 0.5661732504160463, "grad_norm": 0.6990948916094377, "learning_rate": 2.41021897810219e-06, "loss": 0.5879, "step": 19392 }, { "epoch": 0.5662024466438936, "grad_norm": 0.7473543353706485, "learning_rate": 2.410056772100568e-06, "loss": 0.6166, "step": 19393 }, { "epoch": 0.566231642871741, "grad_norm": 0.6556391233959743, "learning_rate": 2.4098945660989457e-06, "loss": 0.5509, "step": 19394 }, { "epoch": 0.5662608390995884, "grad_norm": 0.8064648064455967, "learning_rate": 2.4097323600973237e-06, "loss": 0.668, "step": 19395 }, { "epoch": 0.5662900353274357, "grad_norm": 0.6735818774256485, "learning_rate": 2.4095701540957017e-06, "loss": 0.5567, "step": 19396 }, { "epoch": 0.5663192315552831, "grad_norm": 0.7180821265123438, "learning_rate": 2.4094079480940797e-06, "loss": 0.5877, "step": 19397 }, { "epoch": 0.5663484277831304, "grad_norm": 0.7978434879286261, "learning_rate": 2.4092457420924577e-06, "loss": 0.745, "step": 19398 }, { "epoch": 0.5663776240109778, "grad_norm": 0.7737292472665316, "learning_rate": 2.4090835360908353e-06, "loss": 0.7227, "step": 19399 }, { "epoch": 0.5664068202388252, "grad_norm": 0.7524088096153151, "learning_rate": 2.4089213300892133e-06, "loss": 0.7132, "step": 19400 }, { "epoch": 0.5664360164666725, "grad_norm": 0.7588515596166152, "learning_rate": 2.4087591240875918e-06, "loss": 0.6986, "step": 19401 }, { "epoch": 0.5664652126945199, "grad_norm": 0.7510168942336487, "learning_rate": 2.4085969180859693e-06, "loss": 0.6783, "step": 19402 }, { "epoch": 0.5664944089223672, "grad_norm": 0.7350596161602647, "learning_rate": 2.4084347120843473e-06, "loss": 0.6251, "step": 19403 }, { "epoch": 0.5665236051502146, "grad_norm": 0.7161836594007877, "learning_rate": 2.4082725060827254e-06, "loss": 0.6314, "step": 19404 }, { "epoch": 0.566552801378062, "grad_norm": 0.7353003678968686, "learning_rate": 2.4081103000811034e-06, "loss": 0.6315, "step": 19405 }, { "epoch": 0.5665819976059093, "grad_norm": 0.7255076486464103, "learning_rate": 2.407948094079481e-06, "loss": 0.6235, "step": 19406 }, { "epoch": 0.5666111938337567, "grad_norm": 0.7324918616766913, "learning_rate": 2.407785888077859e-06, "loss": 0.6351, "step": 19407 }, { "epoch": 0.566640390061604, "grad_norm": 0.7323341459744693, "learning_rate": 2.407623682076237e-06, "loss": 0.6029, "step": 19408 }, { "epoch": 0.5666695862894514, "grad_norm": 0.6973892474296275, "learning_rate": 2.407461476074615e-06, "loss": 0.6486, "step": 19409 }, { "epoch": 0.5666987825172988, "grad_norm": 0.7401676821993669, "learning_rate": 2.407299270072993e-06, "loss": 0.6981, "step": 19410 }, { "epoch": 0.5667279787451461, "grad_norm": 0.8853675021668647, "learning_rate": 2.407137064071371e-06, "loss": 0.6996, "step": 19411 }, { "epoch": 0.5667571749729935, "grad_norm": 0.7048236311110496, "learning_rate": 2.406974858069749e-06, "loss": 0.6156, "step": 19412 }, { "epoch": 0.5667863712008409, "grad_norm": 0.7835348039046941, "learning_rate": 2.4068126520681266e-06, "loss": 0.6625, "step": 19413 }, { "epoch": 0.5668155674286882, "grad_norm": 0.7097481968474867, "learning_rate": 2.4066504460665046e-06, "loss": 0.6145, "step": 19414 }, { "epoch": 0.5668447636565356, "grad_norm": 0.6898788455262443, "learning_rate": 2.4064882400648826e-06, "loss": 0.5582, "step": 19415 }, { "epoch": 0.5668739598843829, "grad_norm": 0.7188927273319877, "learning_rate": 2.4063260340632606e-06, "loss": 0.647, "step": 19416 }, { "epoch": 0.5669031561122303, "grad_norm": 0.7425022338910745, "learning_rate": 2.4061638280616386e-06, "loss": 0.6678, "step": 19417 }, { "epoch": 0.5669323523400777, "grad_norm": 0.7095106696453907, "learning_rate": 2.406001622060016e-06, "loss": 0.5534, "step": 19418 }, { "epoch": 0.566961548567925, "grad_norm": 0.8225541576547604, "learning_rate": 2.4058394160583946e-06, "loss": 0.691, "step": 19419 }, { "epoch": 0.5669907447957724, "grad_norm": 0.6958921694100064, "learning_rate": 2.405677210056772e-06, "loss": 0.5902, "step": 19420 }, { "epoch": 0.5670199410236197, "grad_norm": 0.7369965322102742, "learning_rate": 2.40551500405515e-06, "loss": 0.6756, "step": 19421 }, { "epoch": 0.5670491372514671, "grad_norm": 0.8017333022301565, "learning_rate": 2.405352798053528e-06, "loss": 0.7458, "step": 19422 }, { "epoch": 0.5670783334793145, "grad_norm": 0.7020301104439103, "learning_rate": 2.4051905920519062e-06, "loss": 0.6089, "step": 19423 }, { "epoch": 0.5671075297071618, "grad_norm": 0.7166504170748964, "learning_rate": 2.4050283860502842e-06, "loss": 0.5933, "step": 19424 }, { "epoch": 0.5671367259350092, "grad_norm": 0.7160662320343565, "learning_rate": 2.404866180048662e-06, "loss": 0.6522, "step": 19425 }, { "epoch": 0.5671659221628566, "grad_norm": 0.7816367647421415, "learning_rate": 2.40470397404704e-06, "loss": 0.6909, "step": 19426 }, { "epoch": 0.5671951183907039, "grad_norm": 0.6731058642884122, "learning_rate": 2.404541768045418e-06, "loss": 0.5593, "step": 19427 }, { "epoch": 0.5672243146185513, "grad_norm": 0.7713195298048732, "learning_rate": 2.404379562043796e-06, "loss": 0.6295, "step": 19428 }, { "epoch": 0.5672535108463986, "grad_norm": 0.7047155530004442, "learning_rate": 2.404217356042174e-06, "loss": 0.6177, "step": 19429 }, { "epoch": 0.567282707074246, "grad_norm": 0.7389653556604086, "learning_rate": 2.404055150040552e-06, "loss": 0.6693, "step": 19430 }, { "epoch": 0.5673119033020934, "grad_norm": 0.8647299066001004, "learning_rate": 2.40389294403893e-06, "loss": 0.6204, "step": 19431 }, { "epoch": 0.5673410995299407, "grad_norm": 0.7504654486843579, "learning_rate": 2.4037307380373074e-06, "loss": 0.6798, "step": 19432 }, { "epoch": 0.5673702957577881, "grad_norm": 0.7959962153983771, "learning_rate": 2.4035685320356854e-06, "loss": 0.6719, "step": 19433 }, { "epoch": 0.5673994919856354, "grad_norm": 0.7167853379835801, "learning_rate": 2.4034063260340634e-06, "loss": 0.6939, "step": 19434 }, { "epoch": 0.5674286882134828, "grad_norm": 0.699080333034481, "learning_rate": 2.4032441200324414e-06, "loss": 0.5571, "step": 19435 }, { "epoch": 0.5674578844413302, "grad_norm": 0.7257250099476432, "learning_rate": 2.4030819140308195e-06, "loss": 0.5727, "step": 19436 }, { "epoch": 0.5674870806691775, "grad_norm": 0.7381057277991979, "learning_rate": 2.402919708029197e-06, "loss": 0.6272, "step": 19437 }, { "epoch": 0.5675162768970249, "grad_norm": 0.7748099365655261, "learning_rate": 2.4027575020275755e-06, "loss": 0.6432, "step": 19438 }, { "epoch": 0.5675454731248722, "grad_norm": 0.7230123799879064, "learning_rate": 2.402595296025953e-06, "loss": 0.5971, "step": 19439 }, { "epoch": 0.5675746693527196, "grad_norm": 0.7388739903153593, "learning_rate": 2.402433090024331e-06, "loss": 0.6939, "step": 19440 }, { "epoch": 0.567603865580567, "grad_norm": 0.7457450356999085, "learning_rate": 2.402270884022709e-06, "loss": 0.6765, "step": 19441 }, { "epoch": 0.5676330618084143, "grad_norm": 0.681295954934486, "learning_rate": 2.402108678021087e-06, "loss": 0.5711, "step": 19442 }, { "epoch": 0.5676622580362617, "grad_norm": 0.7359312449461718, "learning_rate": 2.401946472019465e-06, "loss": 0.6615, "step": 19443 }, { "epoch": 0.567691454264109, "grad_norm": 0.7202197302315791, "learning_rate": 2.4017842660178427e-06, "loss": 0.6811, "step": 19444 }, { "epoch": 0.5677206504919564, "grad_norm": 0.7129289274022155, "learning_rate": 2.4016220600162207e-06, "loss": 0.6518, "step": 19445 }, { "epoch": 0.5677498467198038, "grad_norm": 0.7169876611985069, "learning_rate": 2.4014598540145987e-06, "loss": 0.6325, "step": 19446 }, { "epoch": 0.5677790429476511, "grad_norm": 0.7051655291154643, "learning_rate": 2.4012976480129767e-06, "loss": 0.6653, "step": 19447 }, { "epoch": 0.5678082391754985, "grad_norm": 0.6766489881528648, "learning_rate": 2.4011354420113547e-06, "loss": 0.5996, "step": 19448 }, { "epoch": 0.5678374354033459, "grad_norm": 0.7782438516525315, "learning_rate": 2.4009732360097327e-06, "loss": 0.6537, "step": 19449 }, { "epoch": 0.5678666316311932, "grad_norm": 0.7655145801195863, "learning_rate": 2.4008110300081107e-06, "loss": 0.608, "step": 19450 }, { "epoch": 0.5678958278590406, "grad_norm": 0.787442831269311, "learning_rate": 2.4006488240064883e-06, "loss": 0.6705, "step": 19451 }, { "epoch": 0.5679250240868879, "grad_norm": 0.67137860361044, "learning_rate": 2.4004866180048663e-06, "loss": 0.5959, "step": 19452 }, { "epoch": 0.5679542203147353, "grad_norm": 0.6790904895393515, "learning_rate": 2.4003244120032443e-06, "loss": 0.5899, "step": 19453 }, { "epoch": 0.5679834165425827, "grad_norm": 0.6697401800529682, "learning_rate": 2.4001622060016223e-06, "loss": 0.5673, "step": 19454 }, { "epoch": 0.56801261277043, "grad_norm": 0.7114780006150587, "learning_rate": 2.4000000000000003e-06, "loss": 0.6488, "step": 19455 }, { "epoch": 0.5680418089982774, "grad_norm": 0.678881777935284, "learning_rate": 2.399837793998378e-06, "loss": 0.6087, "step": 19456 }, { "epoch": 0.5680710052261247, "grad_norm": 0.725147833351831, "learning_rate": 2.3996755879967563e-06, "loss": 0.5842, "step": 19457 }, { "epoch": 0.5681002014539721, "grad_norm": 0.7666586868004542, "learning_rate": 2.399513381995134e-06, "loss": 0.683, "step": 19458 }, { "epoch": 0.5681293976818195, "grad_norm": 0.7237283743491708, "learning_rate": 2.399351175993512e-06, "loss": 0.6491, "step": 19459 }, { "epoch": 0.5681585939096668, "grad_norm": 0.7537926098765909, "learning_rate": 2.39918896999189e-06, "loss": 0.7233, "step": 19460 }, { "epoch": 0.5681877901375142, "grad_norm": 0.6785006112388919, "learning_rate": 2.399026763990268e-06, "loss": 0.5287, "step": 19461 }, { "epoch": 0.5682169863653616, "grad_norm": 0.7467800329745159, "learning_rate": 2.398864557988646e-06, "loss": 0.6862, "step": 19462 }, { "epoch": 0.5682461825932089, "grad_norm": 0.7219115121675946, "learning_rate": 2.3987023519870235e-06, "loss": 0.6493, "step": 19463 }, { "epoch": 0.5682753788210563, "grad_norm": 0.7954718393576784, "learning_rate": 2.3985401459854015e-06, "loss": 0.6574, "step": 19464 }, { "epoch": 0.5683045750489036, "grad_norm": 0.7197807462873294, "learning_rate": 2.3983779399837795e-06, "loss": 0.6509, "step": 19465 }, { "epoch": 0.568333771276751, "grad_norm": 0.7488649355261993, "learning_rate": 2.3982157339821575e-06, "loss": 0.7021, "step": 19466 }, { "epoch": 0.5683629675045984, "grad_norm": 0.7148814763466255, "learning_rate": 2.3980535279805355e-06, "loss": 0.6308, "step": 19467 }, { "epoch": 0.5683921637324457, "grad_norm": 0.7147689889878779, "learning_rate": 2.3978913219789136e-06, "loss": 0.6645, "step": 19468 }, { "epoch": 0.5684213599602931, "grad_norm": 0.7739519150521151, "learning_rate": 2.3977291159772916e-06, "loss": 0.6063, "step": 19469 }, { "epoch": 0.5684505561881404, "grad_norm": 0.7502890594830562, "learning_rate": 2.397566909975669e-06, "loss": 0.6716, "step": 19470 }, { "epoch": 0.5684797524159878, "grad_norm": 0.6609333035691799, "learning_rate": 2.397404703974047e-06, "loss": 0.5868, "step": 19471 }, { "epoch": 0.5685089486438352, "grad_norm": 0.7177725351786087, "learning_rate": 2.397242497972425e-06, "loss": 0.6453, "step": 19472 }, { "epoch": 0.5685381448716825, "grad_norm": 0.7613085384747704, "learning_rate": 2.397080291970803e-06, "loss": 0.7043, "step": 19473 }, { "epoch": 0.5685673410995299, "grad_norm": 0.7436427666655527, "learning_rate": 2.396918085969181e-06, "loss": 0.6716, "step": 19474 }, { "epoch": 0.5685965373273772, "grad_norm": 0.6661305676139092, "learning_rate": 2.3967558799675588e-06, "loss": 0.6052, "step": 19475 }, { "epoch": 0.5686257335552246, "grad_norm": 0.6843411250386942, "learning_rate": 2.396593673965937e-06, "loss": 0.5711, "step": 19476 }, { "epoch": 0.568654929783072, "grad_norm": 0.7078753649495501, "learning_rate": 2.3964314679643148e-06, "loss": 0.6212, "step": 19477 }, { "epoch": 0.5686841260109194, "grad_norm": 0.7751390182723531, "learning_rate": 2.3962692619626928e-06, "loss": 0.7211, "step": 19478 }, { "epoch": 0.5687133222387668, "grad_norm": 0.735254505808736, "learning_rate": 2.3961070559610708e-06, "loss": 0.6478, "step": 19479 }, { "epoch": 0.5687425184666142, "grad_norm": 0.8740360365683821, "learning_rate": 2.395944849959449e-06, "loss": 0.6909, "step": 19480 }, { "epoch": 0.5687717146944615, "grad_norm": 0.705073922762578, "learning_rate": 2.395782643957827e-06, "loss": 0.6003, "step": 19481 }, { "epoch": 0.5688009109223089, "grad_norm": 0.7145500445265938, "learning_rate": 2.3956204379562044e-06, "loss": 0.6355, "step": 19482 }, { "epoch": 0.5688301071501563, "grad_norm": 0.7475090002265375, "learning_rate": 2.3954582319545824e-06, "loss": 0.6753, "step": 19483 }, { "epoch": 0.5688593033780036, "grad_norm": 0.7236501282291125, "learning_rate": 2.3952960259529604e-06, "loss": 0.6627, "step": 19484 }, { "epoch": 0.568888499605851, "grad_norm": 0.7653118398107828, "learning_rate": 2.3951338199513384e-06, "loss": 0.6722, "step": 19485 }, { "epoch": 0.5689176958336983, "grad_norm": 0.7593103971153305, "learning_rate": 2.3949716139497164e-06, "loss": 0.6924, "step": 19486 }, { "epoch": 0.5689468920615457, "grad_norm": 0.7612381270969281, "learning_rate": 2.3948094079480944e-06, "loss": 0.6895, "step": 19487 }, { "epoch": 0.5689760882893931, "grad_norm": 0.7393367615852298, "learning_rate": 2.3946472019464724e-06, "loss": 0.6259, "step": 19488 }, { "epoch": 0.5690052845172404, "grad_norm": 0.7234462755134906, "learning_rate": 2.39448499594485e-06, "loss": 0.6678, "step": 19489 }, { "epoch": 0.5690344807450878, "grad_norm": 0.7404088145463665, "learning_rate": 2.394322789943228e-06, "loss": 0.6644, "step": 19490 }, { "epoch": 0.5690636769729351, "grad_norm": 0.8065836627267117, "learning_rate": 2.394160583941606e-06, "loss": 0.7493, "step": 19491 }, { "epoch": 0.5690928732007825, "grad_norm": 0.7546224691885456, "learning_rate": 2.393998377939984e-06, "loss": 0.6499, "step": 19492 }, { "epoch": 0.5691220694286299, "grad_norm": 0.7346111196342007, "learning_rate": 2.3938361719383616e-06, "loss": 0.6719, "step": 19493 }, { "epoch": 0.5691512656564772, "grad_norm": 0.7589287668245467, "learning_rate": 2.3936739659367396e-06, "loss": 0.6625, "step": 19494 }, { "epoch": 0.5691804618843246, "grad_norm": 0.7221818516237193, "learning_rate": 2.393511759935118e-06, "loss": 0.6648, "step": 19495 }, { "epoch": 0.569209658112172, "grad_norm": 0.7822458796254994, "learning_rate": 2.3933495539334956e-06, "loss": 0.6851, "step": 19496 }, { "epoch": 0.5692388543400193, "grad_norm": 0.6904521336161816, "learning_rate": 2.3931873479318736e-06, "loss": 0.63, "step": 19497 }, { "epoch": 0.5692680505678667, "grad_norm": 0.750683338052155, "learning_rate": 2.3930251419302516e-06, "loss": 0.6481, "step": 19498 }, { "epoch": 0.569297246795714, "grad_norm": 0.6798225431415942, "learning_rate": 2.3928629359286296e-06, "loss": 0.5929, "step": 19499 }, { "epoch": 0.5693264430235614, "grad_norm": 0.6968203863404211, "learning_rate": 2.3927007299270077e-06, "loss": 0.5868, "step": 19500 }, { "epoch": 0.5693556392514088, "grad_norm": 0.8144112689122085, "learning_rate": 2.3925385239253852e-06, "loss": 0.7067, "step": 19501 }, { "epoch": 0.5693848354792561, "grad_norm": 0.7908002680626726, "learning_rate": 2.3923763179237632e-06, "loss": 0.6846, "step": 19502 }, { "epoch": 0.5694140317071035, "grad_norm": 0.7519839742380746, "learning_rate": 2.3922141119221413e-06, "loss": 0.6826, "step": 19503 }, { "epoch": 0.5694432279349508, "grad_norm": 0.7047450795670419, "learning_rate": 2.3920519059205193e-06, "loss": 0.6546, "step": 19504 }, { "epoch": 0.5694724241627982, "grad_norm": 0.7002335460062246, "learning_rate": 2.3918896999188973e-06, "loss": 0.5934, "step": 19505 }, { "epoch": 0.5695016203906456, "grad_norm": 0.7032727541787854, "learning_rate": 2.3917274939172753e-06, "loss": 0.6145, "step": 19506 }, { "epoch": 0.5695308166184929, "grad_norm": 0.6852197061403453, "learning_rate": 2.3915652879156533e-06, "loss": 0.5845, "step": 19507 }, { "epoch": 0.5695600128463403, "grad_norm": 0.7545404282492375, "learning_rate": 2.391403081914031e-06, "loss": 0.6779, "step": 19508 }, { "epoch": 0.5695892090741876, "grad_norm": 0.7216547495520305, "learning_rate": 2.391240875912409e-06, "loss": 0.6008, "step": 19509 }, { "epoch": 0.569618405302035, "grad_norm": 0.7239486725320541, "learning_rate": 2.391078669910787e-06, "loss": 0.6603, "step": 19510 }, { "epoch": 0.5696476015298824, "grad_norm": 0.7175683468371482, "learning_rate": 2.390916463909165e-06, "loss": 0.6359, "step": 19511 }, { "epoch": 0.5696767977577297, "grad_norm": 0.7271642814651438, "learning_rate": 2.3907542579075425e-06, "loss": 0.6568, "step": 19512 }, { "epoch": 0.5697059939855771, "grad_norm": 0.6500204256599441, "learning_rate": 2.3905920519059205e-06, "loss": 0.5843, "step": 19513 }, { "epoch": 0.5697351902134244, "grad_norm": 0.6474086842047058, "learning_rate": 2.390429845904299e-06, "loss": 0.5278, "step": 19514 }, { "epoch": 0.5697643864412718, "grad_norm": 0.7490541691926785, "learning_rate": 2.3902676399026765e-06, "loss": 0.6479, "step": 19515 }, { "epoch": 0.5697935826691192, "grad_norm": 0.6727105541653545, "learning_rate": 2.3901054339010545e-06, "loss": 0.6024, "step": 19516 }, { "epoch": 0.5698227788969665, "grad_norm": 0.6961592018004279, "learning_rate": 2.3899432278994325e-06, "loss": 0.6257, "step": 19517 }, { "epoch": 0.5698519751248139, "grad_norm": 0.7315165771308774, "learning_rate": 2.3897810218978105e-06, "loss": 0.6739, "step": 19518 }, { "epoch": 0.5698811713526613, "grad_norm": 0.7153728566077537, "learning_rate": 2.3896188158961885e-06, "loss": 0.6446, "step": 19519 }, { "epoch": 0.5699103675805086, "grad_norm": 0.746802975278669, "learning_rate": 2.389456609894566e-06, "loss": 0.6788, "step": 19520 }, { "epoch": 0.569939563808356, "grad_norm": 0.7726222952479126, "learning_rate": 2.389294403892944e-06, "loss": 0.64, "step": 19521 }, { "epoch": 0.5699687600362033, "grad_norm": 0.7680770193880159, "learning_rate": 2.389132197891322e-06, "loss": 0.7114, "step": 19522 }, { "epoch": 0.5699979562640507, "grad_norm": 0.719898040450402, "learning_rate": 2.3889699918897e-06, "loss": 0.6265, "step": 19523 }, { "epoch": 0.5700271524918981, "grad_norm": 0.7370408590021459, "learning_rate": 2.388807785888078e-06, "loss": 0.6336, "step": 19524 }, { "epoch": 0.5700563487197454, "grad_norm": 0.8686389393474586, "learning_rate": 2.388645579886456e-06, "loss": 0.7095, "step": 19525 }, { "epoch": 0.5700855449475928, "grad_norm": 0.7050045851437787, "learning_rate": 2.388483373884834e-06, "loss": 0.6467, "step": 19526 }, { "epoch": 0.5701147411754401, "grad_norm": 0.7321706310261665, "learning_rate": 2.3883211678832117e-06, "loss": 0.6613, "step": 19527 }, { "epoch": 0.5701439374032875, "grad_norm": 0.7767052667089573, "learning_rate": 2.3881589618815897e-06, "loss": 0.6796, "step": 19528 }, { "epoch": 0.5701731336311349, "grad_norm": 0.7054387246778897, "learning_rate": 2.3879967558799677e-06, "loss": 0.6186, "step": 19529 }, { "epoch": 0.5702023298589822, "grad_norm": 0.8043974212943888, "learning_rate": 2.3878345498783457e-06, "loss": 0.7246, "step": 19530 }, { "epoch": 0.5702315260868296, "grad_norm": 0.8014934417321854, "learning_rate": 2.3876723438767233e-06, "loss": 0.7396, "step": 19531 }, { "epoch": 0.570260722314677, "grad_norm": 0.6890151182713178, "learning_rate": 2.3875101378751013e-06, "loss": 0.6018, "step": 19532 }, { "epoch": 0.5702899185425243, "grad_norm": 0.739065656440765, "learning_rate": 2.3873479318734798e-06, "loss": 0.6579, "step": 19533 }, { "epoch": 0.5703191147703717, "grad_norm": 0.8407917957795304, "learning_rate": 2.3871857258718573e-06, "loss": 0.7161, "step": 19534 }, { "epoch": 0.570348310998219, "grad_norm": 0.7801254781513048, "learning_rate": 2.3870235198702354e-06, "loss": 0.7459, "step": 19535 }, { "epoch": 0.5703775072260664, "grad_norm": 0.6887409181403901, "learning_rate": 2.3868613138686134e-06, "loss": 0.5721, "step": 19536 }, { "epoch": 0.5704067034539138, "grad_norm": 0.7165586198334851, "learning_rate": 2.3866991078669914e-06, "loss": 0.6526, "step": 19537 }, { "epoch": 0.5704358996817611, "grad_norm": 0.6883513619699779, "learning_rate": 2.3865369018653694e-06, "loss": 0.5499, "step": 19538 }, { "epoch": 0.5704650959096085, "grad_norm": 0.7083795396738849, "learning_rate": 2.386374695863747e-06, "loss": 0.6481, "step": 19539 }, { "epoch": 0.5704942921374558, "grad_norm": 0.6972082660211103, "learning_rate": 2.386212489862125e-06, "loss": 0.5697, "step": 19540 }, { "epoch": 0.5705234883653032, "grad_norm": 0.7702861015738899, "learning_rate": 2.386050283860503e-06, "loss": 0.7428, "step": 19541 }, { "epoch": 0.5705526845931506, "grad_norm": 0.7132389651936454, "learning_rate": 2.385888077858881e-06, "loss": 0.6268, "step": 19542 }, { "epoch": 0.5705818808209979, "grad_norm": 0.7808133636450801, "learning_rate": 2.385725871857259e-06, "loss": 0.7109, "step": 19543 }, { "epoch": 0.5706110770488453, "grad_norm": 0.7547418829301206, "learning_rate": 2.385563665855637e-06, "loss": 0.6581, "step": 19544 }, { "epoch": 0.5706402732766926, "grad_norm": 0.6960436135834006, "learning_rate": 2.385401459854015e-06, "loss": 0.6273, "step": 19545 }, { "epoch": 0.57066946950454, "grad_norm": 0.7010427737858743, "learning_rate": 2.3852392538523926e-06, "loss": 0.589, "step": 19546 }, { "epoch": 0.5706986657323874, "grad_norm": 0.6602743287376175, "learning_rate": 2.3850770478507706e-06, "loss": 0.5692, "step": 19547 }, { "epoch": 0.5707278619602347, "grad_norm": 0.7482313965636349, "learning_rate": 2.3849148418491486e-06, "loss": 0.7047, "step": 19548 }, { "epoch": 0.5707570581880821, "grad_norm": 0.7074918665191814, "learning_rate": 2.3847526358475266e-06, "loss": 0.6, "step": 19549 }, { "epoch": 0.5707862544159295, "grad_norm": 0.7100906096412634, "learning_rate": 2.384590429845904e-06, "loss": 0.6194, "step": 19550 }, { "epoch": 0.5708154506437768, "grad_norm": 0.8003098501025072, "learning_rate": 2.384428223844282e-06, "loss": 0.6902, "step": 19551 }, { "epoch": 0.5708446468716242, "grad_norm": 0.685925104362012, "learning_rate": 2.3842660178426606e-06, "loss": 0.5919, "step": 19552 }, { "epoch": 0.5708738430994715, "grad_norm": 0.8299145988280878, "learning_rate": 2.384103811841038e-06, "loss": 0.6965, "step": 19553 }, { "epoch": 0.5709030393273189, "grad_norm": 0.7650968841525458, "learning_rate": 2.383941605839416e-06, "loss": 0.615, "step": 19554 }, { "epoch": 0.5709322355551663, "grad_norm": 0.7300366965530226, "learning_rate": 2.3837793998377942e-06, "loss": 0.6622, "step": 19555 }, { "epoch": 0.5709614317830136, "grad_norm": 0.7554961980856802, "learning_rate": 2.3836171938361722e-06, "loss": 0.6942, "step": 19556 }, { "epoch": 0.570990628010861, "grad_norm": 0.7133110737339289, "learning_rate": 2.3834549878345502e-06, "loss": 0.5981, "step": 19557 }, { "epoch": 0.5710198242387083, "grad_norm": 0.7338895559163339, "learning_rate": 2.383292781832928e-06, "loss": 0.6487, "step": 19558 }, { "epoch": 0.5710490204665557, "grad_norm": 0.7292544628025527, "learning_rate": 2.383130575831306e-06, "loss": 0.6895, "step": 19559 }, { "epoch": 0.5710782166944031, "grad_norm": 0.7050280088198626, "learning_rate": 2.382968369829684e-06, "loss": 0.6387, "step": 19560 }, { "epoch": 0.5711074129222504, "grad_norm": 0.731249227934776, "learning_rate": 2.382806163828062e-06, "loss": 0.6263, "step": 19561 }, { "epoch": 0.5711366091500978, "grad_norm": 0.7317143751458433, "learning_rate": 2.38264395782644e-06, "loss": 0.681, "step": 19562 }, { "epoch": 0.5711658053779451, "grad_norm": 0.6905555175667187, "learning_rate": 2.382481751824818e-06, "loss": 0.5841, "step": 19563 }, { "epoch": 0.5711950016057925, "grad_norm": 0.7308801543693343, "learning_rate": 2.382319545823196e-06, "loss": 0.6584, "step": 19564 }, { "epoch": 0.5712241978336399, "grad_norm": 0.7073961946161916, "learning_rate": 2.3821573398215734e-06, "loss": 0.645, "step": 19565 }, { "epoch": 0.5712533940614872, "grad_norm": 0.7486990540686705, "learning_rate": 2.3819951338199514e-06, "loss": 0.6982, "step": 19566 }, { "epoch": 0.5712825902893346, "grad_norm": 0.7653587709431254, "learning_rate": 2.3818329278183295e-06, "loss": 0.7172, "step": 19567 }, { "epoch": 0.571311786517182, "grad_norm": 0.9451228771430601, "learning_rate": 2.3816707218167075e-06, "loss": 0.6773, "step": 19568 }, { "epoch": 0.5713409827450293, "grad_norm": 0.7177942828048381, "learning_rate": 2.381508515815085e-06, "loss": 0.664, "step": 19569 }, { "epoch": 0.5713701789728767, "grad_norm": 0.6865226673140155, "learning_rate": 2.3813463098134635e-06, "loss": 0.6178, "step": 19570 }, { "epoch": 0.571399375200724, "grad_norm": 0.7585115098330146, "learning_rate": 2.3811841038118415e-06, "loss": 0.6954, "step": 19571 }, { "epoch": 0.5714285714285714, "grad_norm": 0.7300374089948818, "learning_rate": 2.381021897810219e-06, "loss": 0.6715, "step": 19572 }, { "epoch": 0.5714577676564188, "grad_norm": 0.7358417289415865, "learning_rate": 2.380859691808597e-06, "loss": 0.6788, "step": 19573 }, { "epoch": 0.5714869638842661, "grad_norm": 0.7309515560980914, "learning_rate": 2.380697485806975e-06, "loss": 0.6357, "step": 19574 }, { "epoch": 0.5715161601121135, "grad_norm": 0.7043008273819283, "learning_rate": 2.380535279805353e-06, "loss": 0.5535, "step": 19575 }, { "epoch": 0.5715453563399608, "grad_norm": 0.762471316715509, "learning_rate": 2.380373073803731e-06, "loss": 0.6744, "step": 19576 }, { "epoch": 0.5715745525678082, "grad_norm": 0.7587223562225898, "learning_rate": 2.3802108678021087e-06, "loss": 0.7635, "step": 19577 }, { "epoch": 0.5716037487956556, "grad_norm": 0.7058529465668916, "learning_rate": 2.3800486618004867e-06, "loss": 0.6375, "step": 19578 }, { "epoch": 0.5716329450235029, "grad_norm": 0.7544130825827072, "learning_rate": 2.3798864557988647e-06, "loss": 0.6586, "step": 19579 }, { "epoch": 0.5716621412513503, "grad_norm": 0.6995632134219013, "learning_rate": 2.3797242497972427e-06, "loss": 0.6149, "step": 19580 }, { "epoch": 0.5716913374791976, "grad_norm": 0.7468879171190564, "learning_rate": 2.3795620437956207e-06, "loss": 0.6306, "step": 19581 }, { "epoch": 0.571720533707045, "grad_norm": 0.7064605242580217, "learning_rate": 2.3793998377939987e-06, "loss": 0.6033, "step": 19582 }, { "epoch": 0.5717497299348924, "grad_norm": 0.7423400841465063, "learning_rate": 2.3792376317923767e-06, "loss": 0.6747, "step": 19583 }, { "epoch": 0.5717789261627397, "grad_norm": 0.7728360297202319, "learning_rate": 2.3790754257907543e-06, "loss": 0.6241, "step": 19584 }, { "epoch": 0.5718081223905871, "grad_norm": 0.7026195963611349, "learning_rate": 2.3789132197891323e-06, "loss": 0.5967, "step": 19585 }, { "epoch": 0.5718373186184345, "grad_norm": 0.7474121891228835, "learning_rate": 2.3787510137875103e-06, "loss": 0.731, "step": 19586 }, { "epoch": 0.5718665148462818, "grad_norm": 0.7493277294831294, "learning_rate": 2.3785888077858883e-06, "loss": 0.6602, "step": 19587 }, { "epoch": 0.5718957110741292, "grad_norm": 0.6956886098895376, "learning_rate": 2.378426601784266e-06, "loss": 0.643, "step": 19588 }, { "epoch": 0.5719249073019765, "grad_norm": 0.7208916917953306, "learning_rate": 2.3782643957826443e-06, "loss": 0.6408, "step": 19589 }, { "epoch": 0.5719541035298239, "grad_norm": 0.7241885517235814, "learning_rate": 2.3781021897810223e-06, "loss": 0.6566, "step": 19590 }, { "epoch": 0.5719832997576713, "grad_norm": 0.7904800418943011, "learning_rate": 2.3779399837794e-06, "loss": 0.7632, "step": 19591 }, { "epoch": 0.5720124959855186, "grad_norm": 0.7497712861242589, "learning_rate": 2.377777777777778e-06, "loss": 0.7052, "step": 19592 }, { "epoch": 0.572041692213366, "grad_norm": 0.782305402321303, "learning_rate": 2.377615571776156e-06, "loss": 0.7289, "step": 19593 }, { "epoch": 0.5720708884412133, "grad_norm": 0.7793630627180416, "learning_rate": 2.377453365774534e-06, "loss": 0.697, "step": 19594 }, { "epoch": 0.5721000846690607, "grad_norm": 0.6918580509327091, "learning_rate": 2.377291159772912e-06, "loss": 0.6115, "step": 19595 }, { "epoch": 0.5721292808969081, "grad_norm": 0.7582555092840609, "learning_rate": 2.3771289537712895e-06, "loss": 0.6765, "step": 19596 }, { "epoch": 0.5721584771247554, "grad_norm": 0.7023615846350642, "learning_rate": 2.3769667477696675e-06, "loss": 0.615, "step": 19597 }, { "epoch": 0.5721876733526029, "grad_norm": 0.7258749734409993, "learning_rate": 2.3768045417680455e-06, "loss": 0.6416, "step": 19598 }, { "epoch": 0.5722168695804503, "grad_norm": 0.7021306890867686, "learning_rate": 2.3766423357664236e-06, "loss": 0.5779, "step": 19599 }, { "epoch": 0.5722460658082976, "grad_norm": 0.747741727847792, "learning_rate": 2.3764801297648016e-06, "loss": 0.6982, "step": 19600 }, { "epoch": 0.572275262036145, "grad_norm": 0.7363926530416759, "learning_rate": 2.3763179237631796e-06, "loss": 0.6149, "step": 19601 }, { "epoch": 0.5723044582639923, "grad_norm": 0.7168433759249276, "learning_rate": 2.3761557177615576e-06, "loss": 0.6074, "step": 19602 }, { "epoch": 0.5723336544918397, "grad_norm": 0.6676429335117448, "learning_rate": 2.375993511759935e-06, "loss": 0.6067, "step": 19603 }, { "epoch": 0.5723628507196871, "grad_norm": 0.7274559203921369, "learning_rate": 2.375831305758313e-06, "loss": 0.6453, "step": 19604 }, { "epoch": 0.5723920469475344, "grad_norm": 0.7670082922783247, "learning_rate": 2.375669099756691e-06, "loss": 0.7344, "step": 19605 }, { "epoch": 0.5724212431753818, "grad_norm": 0.7341354427016261, "learning_rate": 2.375506893755069e-06, "loss": 0.6484, "step": 19606 }, { "epoch": 0.5724504394032291, "grad_norm": 0.715939556518555, "learning_rate": 2.3753446877534468e-06, "loss": 0.6093, "step": 19607 }, { "epoch": 0.5724796356310765, "grad_norm": 0.7067370655355356, "learning_rate": 2.375182481751825e-06, "loss": 0.6387, "step": 19608 }, { "epoch": 0.5725088318589239, "grad_norm": 0.7276950917278774, "learning_rate": 2.375020275750203e-06, "loss": 0.6621, "step": 19609 }, { "epoch": 0.5725380280867712, "grad_norm": 0.8483172105850839, "learning_rate": 2.3748580697485808e-06, "loss": 0.7027, "step": 19610 }, { "epoch": 0.5725672243146186, "grad_norm": 0.6920190084822965, "learning_rate": 2.3746958637469588e-06, "loss": 0.6363, "step": 19611 }, { "epoch": 0.572596420542466, "grad_norm": 0.6915537005474691, "learning_rate": 2.374533657745337e-06, "loss": 0.6336, "step": 19612 }, { "epoch": 0.5726256167703133, "grad_norm": 0.7207026081095657, "learning_rate": 2.374371451743715e-06, "loss": 0.6379, "step": 19613 }, { "epoch": 0.5726548129981607, "grad_norm": 0.7100774404035561, "learning_rate": 2.374209245742093e-06, "loss": 0.6446, "step": 19614 }, { "epoch": 0.572684009226008, "grad_norm": 0.7700151997752377, "learning_rate": 2.3740470397404704e-06, "loss": 0.6876, "step": 19615 }, { "epoch": 0.5727132054538554, "grad_norm": 0.6936237838406333, "learning_rate": 2.3738848337388484e-06, "loss": 0.6208, "step": 19616 }, { "epoch": 0.5727424016817028, "grad_norm": 0.7060341496635203, "learning_rate": 2.3737226277372264e-06, "loss": 0.5993, "step": 19617 }, { "epoch": 0.5727715979095501, "grad_norm": 0.7332149939272661, "learning_rate": 2.3735604217356044e-06, "loss": 0.6869, "step": 19618 }, { "epoch": 0.5728007941373975, "grad_norm": 0.9140528211357103, "learning_rate": 2.3733982157339824e-06, "loss": 0.715, "step": 19619 }, { "epoch": 0.5728299903652448, "grad_norm": 0.731362031109599, "learning_rate": 2.3732360097323604e-06, "loss": 0.6545, "step": 19620 }, { "epoch": 0.5728591865930922, "grad_norm": 0.7759077547087887, "learning_rate": 2.3730738037307384e-06, "loss": 0.7354, "step": 19621 }, { "epoch": 0.5728883828209396, "grad_norm": 0.7164819607541144, "learning_rate": 2.372911597729116e-06, "loss": 0.6044, "step": 19622 }, { "epoch": 0.5729175790487869, "grad_norm": 0.8347090807527732, "learning_rate": 2.372749391727494e-06, "loss": 0.6668, "step": 19623 }, { "epoch": 0.5729467752766343, "grad_norm": 0.7379655591715311, "learning_rate": 2.372587185725872e-06, "loss": 0.717, "step": 19624 }, { "epoch": 0.5729759715044817, "grad_norm": 0.7147402229314451, "learning_rate": 2.37242497972425e-06, "loss": 0.6447, "step": 19625 }, { "epoch": 0.573005167732329, "grad_norm": 0.6803813427326854, "learning_rate": 2.3722627737226276e-06, "loss": 0.6114, "step": 19626 }, { "epoch": 0.5730343639601764, "grad_norm": 0.7021427825533433, "learning_rate": 2.372100567721006e-06, "loss": 0.5779, "step": 19627 }, { "epoch": 0.5730635601880237, "grad_norm": 0.7557922488433073, "learning_rate": 2.371938361719384e-06, "loss": 0.6771, "step": 19628 }, { "epoch": 0.5730927564158711, "grad_norm": 0.6930689306949565, "learning_rate": 2.3717761557177616e-06, "loss": 0.6018, "step": 19629 }, { "epoch": 0.5731219526437185, "grad_norm": 0.752722274581642, "learning_rate": 2.3716139497161396e-06, "loss": 0.6843, "step": 19630 }, { "epoch": 0.5731511488715658, "grad_norm": 0.7219280627892174, "learning_rate": 2.3714517437145177e-06, "loss": 0.6389, "step": 19631 }, { "epoch": 0.5731803450994132, "grad_norm": 0.7246711134582386, "learning_rate": 2.3712895377128957e-06, "loss": 0.6131, "step": 19632 }, { "epoch": 0.5732095413272605, "grad_norm": 0.7456458969843518, "learning_rate": 2.3711273317112737e-06, "loss": 0.6899, "step": 19633 }, { "epoch": 0.5732387375551079, "grad_norm": 0.755155539384699, "learning_rate": 2.3709651257096512e-06, "loss": 0.711, "step": 19634 }, { "epoch": 0.5732679337829553, "grad_norm": 0.6759030372507149, "learning_rate": 2.3708029197080293e-06, "loss": 0.572, "step": 19635 }, { "epoch": 0.5732971300108026, "grad_norm": 0.720102177787105, "learning_rate": 2.3706407137064073e-06, "loss": 0.6238, "step": 19636 }, { "epoch": 0.57332632623865, "grad_norm": 0.7414196968175487, "learning_rate": 2.3704785077047853e-06, "loss": 0.6342, "step": 19637 }, { "epoch": 0.5733555224664973, "grad_norm": 0.6864981401483348, "learning_rate": 2.3703163017031633e-06, "loss": 0.6094, "step": 19638 }, { "epoch": 0.5733847186943447, "grad_norm": 0.6953344844408864, "learning_rate": 2.3701540957015413e-06, "loss": 0.5882, "step": 19639 }, { "epoch": 0.5734139149221921, "grad_norm": 0.7068318778381445, "learning_rate": 2.3699918896999193e-06, "loss": 0.6458, "step": 19640 }, { "epoch": 0.5734431111500394, "grad_norm": 0.8359385646649514, "learning_rate": 2.369829683698297e-06, "loss": 0.8463, "step": 19641 }, { "epoch": 0.5734723073778868, "grad_norm": 0.7646920997615587, "learning_rate": 2.369667477696675e-06, "loss": 0.7028, "step": 19642 }, { "epoch": 0.5735015036057342, "grad_norm": 0.8376311128763603, "learning_rate": 2.369505271695053e-06, "loss": 0.7454, "step": 19643 }, { "epoch": 0.5735306998335815, "grad_norm": 0.6947818229130057, "learning_rate": 2.369343065693431e-06, "loss": 0.5844, "step": 19644 }, { "epoch": 0.5735598960614289, "grad_norm": 0.7234388437570263, "learning_rate": 2.3691808596918085e-06, "loss": 0.6121, "step": 19645 }, { "epoch": 0.5735890922892762, "grad_norm": 0.761650912266841, "learning_rate": 2.369018653690187e-06, "loss": 0.721, "step": 19646 }, { "epoch": 0.5736182885171236, "grad_norm": 0.7323977533131404, "learning_rate": 2.368856447688565e-06, "loss": 0.6426, "step": 19647 }, { "epoch": 0.573647484744971, "grad_norm": 0.7569740263759861, "learning_rate": 2.3686942416869425e-06, "loss": 0.6408, "step": 19648 }, { "epoch": 0.5736766809728183, "grad_norm": 0.7533247146816004, "learning_rate": 2.3685320356853205e-06, "loss": 0.6276, "step": 19649 }, { "epoch": 0.5737058772006657, "grad_norm": 0.7194473453048389, "learning_rate": 2.3683698296836985e-06, "loss": 0.6204, "step": 19650 }, { "epoch": 0.573735073428513, "grad_norm": 0.7137964025123419, "learning_rate": 2.3682076236820765e-06, "loss": 0.6316, "step": 19651 }, { "epoch": 0.5737642696563604, "grad_norm": 0.7176848843813347, "learning_rate": 2.3680454176804545e-06, "loss": 0.6095, "step": 19652 }, { "epoch": 0.5737934658842078, "grad_norm": 0.7257294253348178, "learning_rate": 2.367883211678832e-06, "loss": 0.577, "step": 19653 }, { "epoch": 0.5738226621120551, "grad_norm": 0.7365261256304676, "learning_rate": 2.36772100567721e-06, "loss": 0.588, "step": 19654 }, { "epoch": 0.5738518583399025, "grad_norm": 0.6964724167248578, "learning_rate": 2.367558799675588e-06, "loss": 0.6258, "step": 19655 }, { "epoch": 0.5738810545677498, "grad_norm": 0.7093081155384413, "learning_rate": 2.367396593673966e-06, "loss": 0.6152, "step": 19656 }, { "epoch": 0.5739102507955972, "grad_norm": 0.7281136841614386, "learning_rate": 2.367234387672344e-06, "loss": 0.6515, "step": 19657 }, { "epoch": 0.5739394470234446, "grad_norm": 0.7040850312926048, "learning_rate": 2.367072181670722e-06, "loss": 0.6091, "step": 19658 }, { "epoch": 0.5739686432512919, "grad_norm": 0.7021352426359613, "learning_rate": 2.3669099756691e-06, "loss": 0.53, "step": 19659 }, { "epoch": 0.5739978394791393, "grad_norm": 0.7207993915277969, "learning_rate": 2.3667477696674777e-06, "loss": 0.6526, "step": 19660 }, { "epoch": 0.5740270357069867, "grad_norm": 0.7653593619578519, "learning_rate": 2.3665855636658557e-06, "loss": 0.7229, "step": 19661 }, { "epoch": 0.574056231934834, "grad_norm": 0.760713657616116, "learning_rate": 2.3664233576642337e-06, "loss": 0.7181, "step": 19662 }, { "epoch": 0.5740854281626814, "grad_norm": 0.7002788460074829, "learning_rate": 2.3662611516626118e-06, "loss": 0.6334, "step": 19663 }, { "epoch": 0.5741146243905287, "grad_norm": 0.7112609984911619, "learning_rate": 2.3660989456609893e-06, "loss": 0.6015, "step": 19664 }, { "epoch": 0.5741438206183761, "grad_norm": 0.6970960367498391, "learning_rate": 2.3659367396593678e-06, "loss": 0.6277, "step": 19665 }, { "epoch": 0.5741730168462235, "grad_norm": 0.657253161896139, "learning_rate": 2.3657745336577458e-06, "loss": 0.5569, "step": 19666 }, { "epoch": 0.5742022130740708, "grad_norm": 0.7750492418721961, "learning_rate": 2.3656123276561234e-06, "loss": 0.6731, "step": 19667 }, { "epoch": 0.5742314093019182, "grad_norm": 0.7774961756682051, "learning_rate": 2.3654501216545014e-06, "loss": 0.7487, "step": 19668 }, { "epoch": 0.5742606055297655, "grad_norm": 0.7206995677815458, "learning_rate": 2.3652879156528794e-06, "loss": 0.632, "step": 19669 }, { "epoch": 0.5742898017576129, "grad_norm": 0.8611363559973149, "learning_rate": 2.3651257096512574e-06, "loss": 0.6734, "step": 19670 }, { "epoch": 0.5743189979854603, "grad_norm": 0.7706810270052417, "learning_rate": 2.3649635036496354e-06, "loss": 0.7242, "step": 19671 }, { "epoch": 0.5743481942133076, "grad_norm": 0.7214745060958146, "learning_rate": 2.364801297648013e-06, "loss": 0.6285, "step": 19672 }, { "epoch": 0.574377390441155, "grad_norm": 0.7510649826387797, "learning_rate": 2.364639091646391e-06, "loss": 0.6722, "step": 19673 }, { "epoch": 0.5744065866690024, "grad_norm": 0.7349681329796459, "learning_rate": 2.364476885644769e-06, "loss": 0.6372, "step": 19674 }, { "epoch": 0.5744357828968497, "grad_norm": 0.6681047069441343, "learning_rate": 2.364314679643147e-06, "loss": 0.5091, "step": 19675 }, { "epoch": 0.5744649791246971, "grad_norm": 0.699299891972573, "learning_rate": 2.364152473641525e-06, "loss": 0.5869, "step": 19676 }, { "epoch": 0.5744941753525444, "grad_norm": 0.7625533651635934, "learning_rate": 2.363990267639903e-06, "loss": 0.6584, "step": 19677 }, { "epoch": 0.5745233715803918, "grad_norm": 0.7160036049774451, "learning_rate": 2.363828061638281e-06, "loss": 0.6007, "step": 19678 }, { "epoch": 0.5745525678082392, "grad_norm": 0.7894913124296019, "learning_rate": 2.3636658556366586e-06, "loss": 0.6861, "step": 19679 }, { "epoch": 0.5745817640360865, "grad_norm": 0.7227295632642678, "learning_rate": 2.3635036496350366e-06, "loss": 0.6775, "step": 19680 }, { "epoch": 0.5746109602639339, "grad_norm": 0.7105829095567019, "learning_rate": 2.3633414436334146e-06, "loss": 0.6257, "step": 19681 }, { "epoch": 0.5746401564917812, "grad_norm": 0.6853628389377651, "learning_rate": 2.3631792376317926e-06, "loss": 0.5738, "step": 19682 }, { "epoch": 0.5746693527196286, "grad_norm": 0.7878601934199665, "learning_rate": 2.36301703163017e-06, "loss": 0.6266, "step": 19683 }, { "epoch": 0.574698548947476, "grad_norm": 0.7470225516453185, "learning_rate": 2.3628548256285486e-06, "loss": 0.6893, "step": 19684 }, { "epoch": 0.5747277451753233, "grad_norm": 0.7733913681552659, "learning_rate": 2.3626926196269266e-06, "loss": 0.6884, "step": 19685 }, { "epoch": 0.5747569414031707, "grad_norm": 0.7085517863807834, "learning_rate": 2.3625304136253042e-06, "loss": 0.6416, "step": 19686 }, { "epoch": 0.574786137631018, "grad_norm": 0.678295443786533, "learning_rate": 2.3623682076236822e-06, "loss": 0.5818, "step": 19687 }, { "epoch": 0.5748153338588654, "grad_norm": 0.7327097044061944, "learning_rate": 2.3622060016220602e-06, "loss": 0.6903, "step": 19688 }, { "epoch": 0.5748445300867128, "grad_norm": 0.6747447892462648, "learning_rate": 2.3620437956204382e-06, "loss": 0.5763, "step": 19689 }, { "epoch": 0.5748737263145601, "grad_norm": 0.7161133851637653, "learning_rate": 2.361881589618816e-06, "loss": 0.6247, "step": 19690 }, { "epoch": 0.5749029225424075, "grad_norm": 0.7597784904144581, "learning_rate": 2.361719383617194e-06, "loss": 0.6948, "step": 19691 }, { "epoch": 0.5749321187702549, "grad_norm": 0.7276013774973481, "learning_rate": 2.361557177615572e-06, "loss": 0.6478, "step": 19692 }, { "epoch": 0.5749613149981022, "grad_norm": 0.7583620826849804, "learning_rate": 2.36139497161395e-06, "loss": 0.6906, "step": 19693 }, { "epoch": 0.5749905112259496, "grad_norm": 0.7444213551179456, "learning_rate": 2.361232765612328e-06, "loss": 0.6474, "step": 19694 }, { "epoch": 0.5750197074537969, "grad_norm": 0.7319126535239421, "learning_rate": 2.361070559610706e-06, "loss": 0.6573, "step": 19695 }, { "epoch": 0.5750489036816443, "grad_norm": 0.7667116656396074, "learning_rate": 2.360908353609084e-06, "loss": 0.7132, "step": 19696 }, { "epoch": 0.5750780999094917, "grad_norm": 0.6831382958501675, "learning_rate": 2.360746147607462e-06, "loss": 0.6053, "step": 19697 }, { "epoch": 0.575107296137339, "grad_norm": 0.7275012093935225, "learning_rate": 2.3605839416058394e-06, "loss": 0.6446, "step": 19698 }, { "epoch": 0.5751364923651864, "grad_norm": 0.7274590520246973, "learning_rate": 2.3604217356042175e-06, "loss": 0.6137, "step": 19699 }, { "epoch": 0.5751656885930337, "grad_norm": 0.7178715388825577, "learning_rate": 2.3602595296025955e-06, "loss": 0.6682, "step": 19700 }, { "epoch": 0.5751948848208811, "grad_norm": 0.7068729426667704, "learning_rate": 2.3600973236009735e-06, "loss": 0.6108, "step": 19701 }, { "epoch": 0.5752240810487285, "grad_norm": 0.6839403805353483, "learning_rate": 2.359935117599351e-06, "loss": 0.5245, "step": 19702 }, { "epoch": 0.5752532772765758, "grad_norm": 0.6613510342269685, "learning_rate": 2.3597729115977295e-06, "loss": 0.5813, "step": 19703 }, { "epoch": 0.5752824735044232, "grad_norm": 0.7653256302937027, "learning_rate": 2.3596107055961075e-06, "loss": 0.7596, "step": 19704 }, { "epoch": 0.5753116697322705, "grad_norm": 0.753646817131585, "learning_rate": 2.359448499594485e-06, "loss": 0.6824, "step": 19705 }, { "epoch": 0.5753408659601179, "grad_norm": 0.7044216310555147, "learning_rate": 2.359286293592863e-06, "loss": 0.6087, "step": 19706 }, { "epoch": 0.5753700621879653, "grad_norm": 0.7451184657386807, "learning_rate": 2.359124087591241e-06, "loss": 0.7001, "step": 19707 }, { "epoch": 0.5753992584158126, "grad_norm": 0.716691960487828, "learning_rate": 2.358961881589619e-06, "loss": 0.6522, "step": 19708 }, { "epoch": 0.57542845464366, "grad_norm": 0.8002867399861583, "learning_rate": 2.3587996755879967e-06, "loss": 0.7815, "step": 19709 }, { "epoch": 0.5754576508715074, "grad_norm": 0.7674930350001347, "learning_rate": 2.3586374695863747e-06, "loss": 0.7156, "step": 19710 }, { "epoch": 0.5754868470993547, "grad_norm": 0.7603843167566028, "learning_rate": 2.3584752635847527e-06, "loss": 0.6935, "step": 19711 }, { "epoch": 0.5755160433272021, "grad_norm": 0.7824575324689937, "learning_rate": 2.3583130575831307e-06, "loss": 0.6551, "step": 19712 }, { "epoch": 0.5755452395550494, "grad_norm": 0.6881776346071895, "learning_rate": 2.3581508515815087e-06, "loss": 0.6176, "step": 19713 }, { "epoch": 0.5755744357828968, "grad_norm": 0.7568263404727603, "learning_rate": 2.3579886455798867e-06, "loss": 0.6829, "step": 19714 }, { "epoch": 0.5756036320107442, "grad_norm": 0.7263903528187272, "learning_rate": 2.3578264395782647e-06, "loss": 0.6744, "step": 19715 }, { "epoch": 0.5756328282385915, "grad_norm": 0.7267885670954155, "learning_rate": 2.3576642335766427e-06, "loss": 0.6328, "step": 19716 }, { "epoch": 0.5756620244664389, "grad_norm": 0.7242702282653399, "learning_rate": 2.3575020275750203e-06, "loss": 0.6489, "step": 19717 }, { "epoch": 0.5756912206942862, "grad_norm": 0.6767051638643407, "learning_rate": 2.3573398215733983e-06, "loss": 0.6098, "step": 19718 }, { "epoch": 0.5757204169221337, "grad_norm": 0.7532679089077157, "learning_rate": 2.3571776155717763e-06, "loss": 0.6674, "step": 19719 }, { "epoch": 0.5757496131499811, "grad_norm": 0.7486886956970036, "learning_rate": 2.3570154095701543e-06, "loss": 0.6791, "step": 19720 }, { "epoch": 0.5757788093778284, "grad_norm": 0.6945943188651561, "learning_rate": 2.3568532035685323e-06, "loss": 0.5687, "step": 19721 }, { "epoch": 0.5758080056056758, "grad_norm": 0.7613894782623958, "learning_rate": 2.3566909975669103e-06, "loss": 0.677, "step": 19722 }, { "epoch": 0.5758372018335232, "grad_norm": 0.7486936507000208, "learning_rate": 2.3565287915652883e-06, "loss": 0.6715, "step": 19723 }, { "epoch": 0.5758663980613705, "grad_norm": 0.6654490757429191, "learning_rate": 2.356366585563666e-06, "loss": 0.5466, "step": 19724 }, { "epoch": 0.5758955942892179, "grad_norm": 0.702831553679008, "learning_rate": 2.356204379562044e-06, "loss": 0.6169, "step": 19725 }, { "epoch": 0.5759247905170652, "grad_norm": 0.6795498902617563, "learning_rate": 2.356042173560422e-06, "loss": 0.5476, "step": 19726 }, { "epoch": 0.5759539867449126, "grad_norm": 0.7195190998386899, "learning_rate": 2.3558799675588e-06, "loss": 0.6431, "step": 19727 }, { "epoch": 0.57598318297276, "grad_norm": 0.671510047622941, "learning_rate": 2.3557177615571775e-06, "loss": 0.5601, "step": 19728 }, { "epoch": 0.5760123792006073, "grad_norm": 0.6999987228145171, "learning_rate": 2.3555555555555555e-06, "loss": 0.6294, "step": 19729 }, { "epoch": 0.5760415754284547, "grad_norm": 0.7305377843082805, "learning_rate": 2.3553933495539335e-06, "loss": 0.5715, "step": 19730 }, { "epoch": 0.576070771656302, "grad_norm": 0.6886814711390115, "learning_rate": 2.3552311435523116e-06, "loss": 0.5952, "step": 19731 }, { "epoch": 0.5760999678841494, "grad_norm": 0.6510056393449382, "learning_rate": 2.3550689375506896e-06, "loss": 0.5571, "step": 19732 }, { "epoch": 0.5761291641119968, "grad_norm": 0.6790789815419248, "learning_rate": 2.3549067315490676e-06, "loss": 0.5914, "step": 19733 }, { "epoch": 0.5761583603398441, "grad_norm": 0.7418129831174531, "learning_rate": 2.3547445255474456e-06, "loss": 0.6268, "step": 19734 }, { "epoch": 0.5761875565676915, "grad_norm": 0.7959222058262188, "learning_rate": 2.3545823195458236e-06, "loss": 0.7022, "step": 19735 }, { "epoch": 0.5762167527955389, "grad_norm": 0.6867305678811202, "learning_rate": 2.354420113544201e-06, "loss": 0.6249, "step": 19736 }, { "epoch": 0.5762459490233862, "grad_norm": 0.9042407187004468, "learning_rate": 2.354257907542579e-06, "loss": 0.656, "step": 19737 }, { "epoch": 0.5762751452512336, "grad_norm": 0.6888012521561776, "learning_rate": 2.354095701540957e-06, "loss": 0.5487, "step": 19738 }, { "epoch": 0.5763043414790809, "grad_norm": 0.7744984802644137, "learning_rate": 2.353933495539335e-06, "loss": 0.6887, "step": 19739 }, { "epoch": 0.5763335377069283, "grad_norm": 0.7422491855955543, "learning_rate": 2.353771289537713e-06, "loss": 0.604, "step": 19740 }, { "epoch": 0.5763627339347757, "grad_norm": 0.8139696948182319, "learning_rate": 2.353609083536091e-06, "loss": 0.6365, "step": 19741 }, { "epoch": 0.576391930162623, "grad_norm": 0.7196828143479365, "learning_rate": 2.353446877534469e-06, "loss": 0.6434, "step": 19742 }, { "epoch": 0.5764211263904704, "grad_norm": 0.700979370542023, "learning_rate": 2.353284671532847e-06, "loss": 0.5891, "step": 19743 }, { "epoch": 0.5764503226183177, "grad_norm": 0.760455719479214, "learning_rate": 2.353122465531225e-06, "loss": 0.6564, "step": 19744 }, { "epoch": 0.5764795188461651, "grad_norm": 0.731821962354276, "learning_rate": 2.352960259529603e-06, "loss": 0.6834, "step": 19745 }, { "epoch": 0.5765087150740125, "grad_norm": 0.7983857239490276, "learning_rate": 2.352798053527981e-06, "loss": 0.7066, "step": 19746 }, { "epoch": 0.5765379113018598, "grad_norm": 0.7664215722376962, "learning_rate": 2.3526358475263584e-06, "loss": 0.5957, "step": 19747 }, { "epoch": 0.5765671075297072, "grad_norm": 0.7970591756327637, "learning_rate": 2.3524736415247364e-06, "loss": 0.679, "step": 19748 }, { "epoch": 0.5765963037575546, "grad_norm": 0.683330365075277, "learning_rate": 2.3523114355231144e-06, "loss": 0.5555, "step": 19749 }, { "epoch": 0.5766254999854019, "grad_norm": 0.8784323367849194, "learning_rate": 2.3521492295214924e-06, "loss": 0.7127, "step": 19750 }, { "epoch": 0.5766546962132493, "grad_norm": 0.7061166720348482, "learning_rate": 2.3519870235198704e-06, "loss": 0.617, "step": 19751 }, { "epoch": 0.5766838924410966, "grad_norm": 0.6913492365661132, "learning_rate": 2.3518248175182484e-06, "loss": 0.5805, "step": 19752 }, { "epoch": 0.576713088668944, "grad_norm": 0.6991065571983117, "learning_rate": 2.3516626115166264e-06, "loss": 0.6115, "step": 19753 }, { "epoch": 0.5767422848967914, "grad_norm": 0.6487201344834488, "learning_rate": 2.3515004055150044e-06, "loss": 0.5394, "step": 19754 }, { "epoch": 0.5767714811246387, "grad_norm": 0.8087325180699452, "learning_rate": 2.351338199513382e-06, "loss": 0.7713, "step": 19755 }, { "epoch": 0.5768006773524861, "grad_norm": 0.7173515487483478, "learning_rate": 2.35117599351176e-06, "loss": 0.6179, "step": 19756 }, { "epoch": 0.5768298735803334, "grad_norm": 0.7624609521254323, "learning_rate": 2.351013787510138e-06, "loss": 0.6728, "step": 19757 }, { "epoch": 0.5768590698081808, "grad_norm": 0.6546878487673413, "learning_rate": 2.350851581508516e-06, "loss": 0.5254, "step": 19758 }, { "epoch": 0.5768882660360282, "grad_norm": 0.6782407059524048, "learning_rate": 2.350689375506894e-06, "loss": 0.5708, "step": 19759 }, { "epoch": 0.5769174622638755, "grad_norm": 0.6531976117910462, "learning_rate": 2.350527169505272e-06, "loss": 0.505, "step": 19760 }, { "epoch": 0.5769466584917229, "grad_norm": 0.7229550182672295, "learning_rate": 2.35036496350365e-06, "loss": 0.6597, "step": 19761 }, { "epoch": 0.5769758547195702, "grad_norm": 0.7843570081974518, "learning_rate": 2.3502027575020276e-06, "loss": 0.7126, "step": 19762 }, { "epoch": 0.5770050509474176, "grad_norm": 0.7130783320160342, "learning_rate": 2.3500405515004057e-06, "loss": 0.5532, "step": 19763 }, { "epoch": 0.577034247175265, "grad_norm": 0.7573671653636013, "learning_rate": 2.3498783454987837e-06, "loss": 0.6335, "step": 19764 }, { "epoch": 0.5770634434031123, "grad_norm": 0.7708579092183654, "learning_rate": 2.3497161394971617e-06, "loss": 0.7293, "step": 19765 }, { "epoch": 0.5770926396309597, "grad_norm": 0.7226933881242078, "learning_rate": 2.3495539334955393e-06, "loss": 0.6227, "step": 19766 }, { "epoch": 0.577121835858807, "grad_norm": 0.6834473772377148, "learning_rate": 2.3493917274939173e-06, "loss": 0.6361, "step": 19767 }, { "epoch": 0.5771510320866544, "grad_norm": 0.7657165723228511, "learning_rate": 2.3492295214922953e-06, "loss": 0.738, "step": 19768 }, { "epoch": 0.5771802283145018, "grad_norm": 0.7341714216452582, "learning_rate": 2.3490673154906733e-06, "loss": 0.5837, "step": 19769 }, { "epoch": 0.5772094245423491, "grad_norm": 0.662730010657957, "learning_rate": 2.3489051094890513e-06, "loss": 0.5675, "step": 19770 }, { "epoch": 0.5772386207701965, "grad_norm": 0.7366358350352615, "learning_rate": 2.3487429034874293e-06, "loss": 0.6537, "step": 19771 }, { "epoch": 0.5772678169980439, "grad_norm": 0.9585487623341433, "learning_rate": 2.3485806974858073e-06, "loss": 0.6622, "step": 19772 }, { "epoch": 0.5772970132258912, "grad_norm": 0.6882248732417514, "learning_rate": 2.3484184914841853e-06, "loss": 0.6042, "step": 19773 }, { "epoch": 0.5773262094537386, "grad_norm": 0.7098398419364709, "learning_rate": 2.348256285482563e-06, "loss": 0.6244, "step": 19774 }, { "epoch": 0.5773554056815859, "grad_norm": 0.673806541203551, "learning_rate": 2.348094079480941e-06, "loss": 0.486, "step": 19775 }, { "epoch": 0.5773846019094333, "grad_norm": 0.7289212057910618, "learning_rate": 2.347931873479319e-06, "loss": 0.6073, "step": 19776 }, { "epoch": 0.5774137981372807, "grad_norm": 0.7062822376304013, "learning_rate": 2.347769667477697e-06, "loss": 0.5528, "step": 19777 }, { "epoch": 0.577442994365128, "grad_norm": 0.7143763772508391, "learning_rate": 2.347607461476075e-06, "loss": 0.657, "step": 19778 }, { "epoch": 0.5774721905929754, "grad_norm": 0.744883953956762, "learning_rate": 2.347445255474453e-06, "loss": 0.6575, "step": 19779 }, { "epoch": 0.5775013868208227, "grad_norm": 0.7085786107843659, "learning_rate": 2.347283049472831e-06, "loss": 0.6257, "step": 19780 }, { "epoch": 0.5775305830486701, "grad_norm": 0.7543662679775747, "learning_rate": 2.3471208434712085e-06, "loss": 0.6141, "step": 19781 }, { "epoch": 0.5775597792765175, "grad_norm": 0.8023085774297527, "learning_rate": 2.3469586374695865e-06, "loss": 0.6962, "step": 19782 }, { "epoch": 0.5775889755043648, "grad_norm": 0.7115168289963654, "learning_rate": 2.3467964314679645e-06, "loss": 0.6269, "step": 19783 }, { "epoch": 0.5776181717322122, "grad_norm": 0.7459866334314866, "learning_rate": 2.3466342254663425e-06, "loss": 0.7431, "step": 19784 }, { "epoch": 0.5776473679600596, "grad_norm": 0.7178361452847268, "learning_rate": 2.34647201946472e-06, "loss": 0.6484, "step": 19785 }, { "epoch": 0.5776765641879069, "grad_norm": 0.7968001947806567, "learning_rate": 2.346309813463098e-06, "loss": 0.6344, "step": 19786 }, { "epoch": 0.5777057604157543, "grad_norm": 0.7341676898619901, "learning_rate": 2.346147607461476e-06, "loss": 0.6564, "step": 19787 }, { "epoch": 0.5777349566436016, "grad_norm": 0.7268709424873969, "learning_rate": 2.345985401459854e-06, "loss": 0.6353, "step": 19788 }, { "epoch": 0.577764152871449, "grad_norm": 0.790503008788081, "learning_rate": 2.345823195458232e-06, "loss": 0.7056, "step": 19789 }, { "epoch": 0.5777933490992964, "grad_norm": 0.7410226777951591, "learning_rate": 2.34566098945661e-06, "loss": 0.6202, "step": 19790 }, { "epoch": 0.5778225453271437, "grad_norm": 0.6983207035570959, "learning_rate": 2.345498783454988e-06, "loss": 0.6319, "step": 19791 }, { "epoch": 0.5778517415549911, "grad_norm": 0.7581707966645228, "learning_rate": 2.345336577453366e-06, "loss": 0.6674, "step": 19792 }, { "epoch": 0.5778809377828384, "grad_norm": 0.7445908306136616, "learning_rate": 2.3451743714517437e-06, "loss": 0.6558, "step": 19793 }, { "epoch": 0.5779101340106858, "grad_norm": 0.7124302181702432, "learning_rate": 2.3450121654501217e-06, "loss": 0.6291, "step": 19794 }, { "epoch": 0.5779393302385332, "grad_norm": 0.7679052609981816, "learning_rate": 2.3448499594484998e-06, "loss": 0.6952, "step": 19795 }, { "epoch": 0.5779685264663805, "grad_norm": 0.7263208762349234, "learning_rate": 2.3446877534468778e-06, "loss": 0.6225, "step": 19796 }, { "epoch": 0.5779977226942279, "grad_norm": 0.8665449711921267, "learning_rate": 2.3445255474452558e-06, "loss": 0.6868, "step": 19797 }, { "epoch": 0.5780269189220753, "grad_norm": 0.7129100858684042, "learning_rate": 2.3443633414436338e-06, "loss": 0.6602, "step": 19798 }, { "epoch": 0.5780561151499226, "grad_norm": 0.7009422606087369, "learning_rate": 2.3442011354420118e-06, "loss": 0.609, "step": 19799 }, { "epoch": 0.57808531137777, "grad_norm": 0.7633700975361558, "learning_rate": 2.3440389294403894e-06, "loss": 0.6664, "step": 19800 }, { "epoch": 0.5781145076056173, "grad_norm": 0.6955770486659321, "learning_rate": 2.3438767234387674e-06, "loss": 0.5695, "step": 19801 }, { "epoch": 0.5781437038334647, "grad_norm": 0.7730747292516675, "learning_rate": 2.3437145174371454e-06, "loss": 0.7168, "step": 19802 }, { "epoch": 0.5781729000613121, "grad_norm": 0.7641285145535958, "learning_rate": 2.3435523114355234e-06, "loss": 0.6586, "step": 19803 }, { "epoch": 0.5782020962891594, "grad_norm": 0.771518642411816, "learning_rate": 2.343390105433901e-06, "loss": 0.6797, "step": 19804 }, { "epoch": 0.5782312925170068, "grad_norm": 0.7215628037830155, "learning_rate": 2.343227899432279e-06, "loss": 0.6351, "step": 19805 }, { "epoch": 0.5782604887448541, "grad_norm": 0.7252361519147762, "learning_rate": 2.3430656934306574e-06, "loss": 0.6409, "step": 19806 }, { "epoch": 0.5782896849727015, "grad_norm": 0.7397851570775748, "learning_rate": 2.342903487429035e-06, "loss": 0.6258, "step": 19807 }, { "epoch": 0.5783188812005489, "grad_norm": 0.6740803280447482, "learning_rate": 2.342741281427413e-06, "loss": 0.61, "step": 19808 }, { "epoch": 0.5783480774283962, "grad_norm": 0.7878673842886291, "learning_rate": 2.342579075425791e-06, "loss": 0.634, "step": 19809 }, { "epoch": 0.5783772736562436, "grad_norm": 0.6948779363527188, "learning_rate": 2.342416869424169e-06, "loss": 0.6337, "step": 19810 }, { "epoch": 0.578406469884091, "grad_norm": 0.6866274548465895, "learning_rate": 2.342254663422547e-06, "loss": 0.5944, "step": 19811 }, { "epoch": 0.5784356661119383, "grad_norm": 0.7110514361653506, "learning_rate": 2.3420924574209246e-06, "loss": 0.659, "step": 19812 }, { "epoch": 0.5784648623397857, "grad_norm": 0.7243827061499298, "learning_rate": 2.3419302514193026e-06, "loss": 0.6462, "step": 19813 }, { "epoch": 0.578494058567633, "grad_norm": 0.6776522403474196, "learning_rate": 2.3417680454176806e-06, "loss": 0.5937, "step": 19814 }, { "epoch": 0.5785232547954804, "grad_norm": 0.6972223662931581, "learning_rate": 2.3416058394160586e-06, "loss": 0.6128, "step": 19815 }, { "epoch": 0.5785524510233278, "grad_norm": 0.7646701784527636, "learning_rate": 2.3414436334144366e-06, "loss": 0.7059, "step": 19816 }, { "epoch": 0.5785816472511751, "grad_norm": 0.7267979546820174, "learning_rate": 2.3412814274128146e-06, "loss": 0.6763, "step": 19817 }, { "epoch": 0.5786108434790225, "grad_norm": 0.8339982733521303, "learning_rate": 2.3411192214111926e-06, "loss": 0.8528, "step": 19818 }, { "epoch": 0.5786400397068698, "grad_norm": 0.7119335503113069, "learning_rate": 2.3409570154095702e-06, "loss": 0.6529, "step": 19819 }, { "epoch": 0.5786692359347172, "grad_norm": 0.7113492378142006, "learning_rate": 2.3407948094079482e-06, "loss": 0.5784, "step": 19820 }, { "epoch": 0.5786984321625646, "grad_norm": 0.698415860515118, "learning_rate": 2.3406326034063262e-06, "loss": 0.5941, "step": 19821 }, { "epoch": 0.5787276283904119, "grad_norm": 0.7355473137296006, "learning_rate": 2.3404703974047042e-06, "loss": 0.6694, "step": 19822 }, { "epoch": 0.5787568246182593, "grad_norm": 0.680019236836552, "learning_rate": 2.340308191403082e-06, "loss": 0.5338, "step": 19823 }, { "epoch": 0.5787860208461066, "grad_norm": 0.7230620452977806, "learning_rate": 2.34014598540146e-06, "loss": 0.6798, "step": 19824 }, { "epoch": 0.578815217073954, "grad_norm": 0.7088206674827928, "learning_rate": 2.3399837793998383e-06, "loss": 0.5748, "step": 19825 }, { "epoch": 0.5788444133018014, "grad_norm": 0.6719837130560848, "learning_rate": 2.339821573398216e-06, "loss": 0.5747, "step": 19826 }, { "epoch": 0.5788736095296487, "grad_norm": 0.7554765529565617, "learning_rate": 2.339659367396594e-06, "loss": 0.6991, "step": 19827 }, { "epoch": 0.5789028057574961, "grad_norm": 0.7093395354921076, "learning_rate": 2.339497161394972e-06, "loss": 0.6444, "step": 19828 }, { "epoch": 0.5789320019853434, "grad_norm": 0.7478553786540603, "learning_rate": 2.33933495539335e-06, "loss": 0.698, "step": 19829 }, { "epoch": 0.5789611982131908, "grad_norm": 0.6798626273876537, "learning_rate": 2.339172749391728e-06, "loss": 0.5362, "step": 19830 }, { "epoch": 0.5789903944410382, "grad_norm": 0.702274374260594, "learning_rate": 2.3390105433901055e-06, "loss": 0.635, "step": 19831 }, { "epoch": 0.5790195906688855, "grad_norm": 0.7759068128001316, "learning_rate": 2.3388483373884835e-06, "loss": 0.6791, "step": 19832 }, { "epoch": 0.5790487868967329, "grad_norm": 0.7690894010367301, "learning_rate": 2.3386861313868615e-06, "loss": 0.7243, "step": 19833 }, { "epoch": 0.5790779831245803, "grad_norm": 0.6928843516455876, "learning_rate": 2.3385239253852395e-06, "loss": 0.5892, "step": 19834 }, { "epoch": 0.5791071793524276, "grad_norm": 0.7307282857794752, "learning_rate": 2.3383617193836175e-06, "loss": 0.7021, "step": 19835 }, { "epoch": 0.579136375580275, "grad_norm": 0.7403390507129577, "learning_rate": 2.3381995133819955e-06, "loss": 0.6884, "step": 19836 }, { "epoch": 0.5791655718081223, "grad_norm": 0.7495347543289851, "learning_rate": 2.3380373073803735e-06, "loss": 0.696, "step": 19837 }, { "epoch": 0.5791947680359697, "grad_norm": 0.6823941393076168, "learning_rate": 2.337875101378751e-06, "loss": 0.576, "step": 19838 }, { "epoch": 0.5792239642638171, "grad_norm": 0.773783262291739, "learning_rate": 2.337712895377129e-06, "loss": 0.662, "step": 19839 }, { "epoch": 0.5792531604916645, "grad_norm": 0.7432808069452305, "learning_rate": 2.337550689375507e-06, "loss": 0.6221, "step": 19840 }, { "epoch": 0.5792823567195119, "grad_norm": 0.7183245862059174, "learning_rate": 2.337388483373885e-06, "loss": 0.6712, "step": 19841 }, { "epoch": 0.5793115529473593, "grad_norm": 0.7280729569240597, "learning_rate": 2.3372262773722627e-06, "loss": 0.6705, "step": 19842 }, { "epoch": 0.5793407491752066, "grad_norm": 0.7702023047542382, "learning_rate": 2.3370640713706407e-06, "loss": 0.676, "step": 19843 }, { "epoch": 0.579369945403054, "grad_norm": 0.7570298594641083, "learning_rate": 2.336901865369019e-06, "loss": 0.6848, "step": 19844 }, { "epoch": 0.5793991416309013, "grad_norm": 0.7361456251697891, "learning_rate": 2.3367396593673967e-06, "loss": 0.6908, "step": 19845 }, { "epoch": 0.5794283378587487, "grad_norm": 0.7598272369590949, "learning_rate": 2.3365774533657747e-06, "loss": 0.7491, "step": 19846 }, { "epoch": 0.5794575340865961, "grad_norm": 0.6820427057857671, "learning_rate": 2.3364152473641527e-06, "loss": 0.596, "step": 19847 }, { "epoch": 0.5794867303144434, "grad_norm": 0.7406454655856289, "learning_rate": 2.3362530413625307e-06, "loss": 0.6764, "step": 19848 }, { "epoch": 0.5795159265422908, "grad_norm": 0.7240409024433743, "learning_rate": 2.3360908353609087e-06, "loss": 0.6275, "step": 19849 }, { "epoch": 0.5795451227701381, "grad_norm": 0.6849728083830753, "learning_rate": 2.3359286293592863e-06, "loss": 0.571, "step": 19850 }, { "epoch": 0.5795743189979855, "grad_norm": 0.7208229597195421, "learning_rate": 2.3357664233576643e-06, "loss": 0.6053, "step": 19851 }, { "epoch": 0.5796035152258329, "grad_norm": 0.7416568697763453, "learning_rate": 2.3356042173560423e-06, "loss": 0.6932, "step": 19852 }, { "epoch": 0.5796327114536802, "grad_norm": 0.6965292829574139, "learning_rate": 2.3354420113544203e-06, "loss": 0.6261, "step": 19853 }, { "epoch": 0.5796619076815276, "grad_norm": 0.709599096166972, "learning_rate": 2.3352798053527983e-06, "loss": 0.6413, "step": 19854 }, { "epoch": 0.579691103909375, "grad_norm": 0.6968575414306079, "learning_rate": 2.3351175993511764e-06, "loss": 0.613, "step": 19855 }, { "epoch": 0.5797203001372223, "grad_norm": 0.7131503311155706, "learning_rate": 2.3349553933495544e-06, "loss": 0.648, "step": 19856 }, { "epoch": 0.5797494963650697, "grad_norm": 0.6387768531044838, "learning_rate": 2.334793187347932e-06, "loss": 0.506, "step": 19857 }, { "epoch": 0.579778692592917, "grad_norm": 0.6843262515018383, "learning_rate": 2.33463098134631e-06, "loss": 0.5561, "step": 19858 }, { "epoch": 0.5798078888207644, "grad_norm": 0.6977655873364567, "learning_rate": 2.334468775344688e-06, "loss": 0.5824, "step": 19859 }, { "epoch": 0.5798370850486118, "grad_norm": 0.7541456995305982, "learning_rate": 2.334306569343066e-06, "loss": 0.6558, "step": 19860 }, { "epoch": 0.5798662812764591, "grad_norm": 0.6619271415044736, "learning_rate": 2.3341443633414435e-06, "loss": 0.526, "step": 19861 }, { "epoch": 0.5798954775043065, "grad_norm": 0.7790184702706886, "learning_rate": 2.3339821573398216e-06, "loss": 0.6675, "step": 19862 }, { "epoch": 0.5799246737321538, "grad_norm": 0.748326257171488, "learning_rate": 2.3338199513382e-06, "loss": 0.6764, "step": 19863 }, { "epoch": 0.5799538699600012, "grad_norm": 0.7552483362627062, "learning_rate": 2.3336577453365776e-06, "loss": 0.7013, "step": 19864 }, { "epoch": 0.5799830661878486, "grad_norm": 0.7458292686195624, "learning_rate": 2.3334955393349556e-06, "loss": 0.6752, "step": 19865 }, { "epoch": 0.5800122624156959, "grad_norm": 0.7321184226651937, "learning_rate": 2.3333333333333336e-06, "loss": 0.593, "step": 19866 }, { "epoch": 0.5800414586435433, "grad_norm": 0.7144917690528846, "learning_rate": 2.3331711273317116e-06, "loss": 0.6073, "step": 19867 }, { "epoch": 0.5800706548713906, "grad_norm": 0.7743657770148528, "learning_rate": 2.333008921330089e-06, "loss": 0.6636, "step": 19868 }, { "epoch": 0.580099851099238, "grad_norm": 0.6589376399127637, "learning_rate": 2.332846715328467e-06, "loss": 0.5901, "step": 19869 }, { "epoch": 0.5801290473270854, "grad_norm": 0.7008247372620312, "learning_rate": 2.332684509326845e-06, "loss": 0.5947, "step": 19870 }, { "epoch": 0.5801582435549327, "grad_norm": 0.6840004860438545, "learning_rate": 2.332522303325223e-06, "loss": 0.5612, "step": 19871 }, { "epoch": 0.5801874397827801, "grad_norm": 0.7096356719559729, "learning_rate": 2.332360097323601e-06, "loss": 0.6412, "step": 19872 }, { "epoch": 0.5802166360106275, "grad_norm": 0.7301184495068185, "learning_rate": 2.332197891321979e-06, "loss": 0.6256, "step": 19873 }, { "epoch": 0.5802458322384748, "grad_norm": 0.7837551231822837, "learning_rate": 2.332035685320357e-06, "loss": 0.7799, "step": 19874 }, { "epoch": 0.5802750284663222, "grad_norm": 0.7968892307912165, "learning_rate": 2.3318734793187352e-06, "loss": 0.6889, "step": 19875 }, { "epoch": 0.5803042246941695, "grad_norm": 0.6893834122500125, "learning_rate": 2.331711273317113e-06, "loss": 0.6029, "step": 19876 }, { "epoch": 0.5803334209220169, "grad_norm": 0.7068422384935313, "learning_rate": 2.331549067315491e-06, "loss": 0.6256, "step": 19877 }, { "epoch": 0.5803626171498643, "grad_norm": 0.7272341371007569, "learning_rate": 2.331386861313869e-06, "loss": 0.6473, "step": 19878 }, { "epoch": 0.5803918133777116, "grad_norm": 0.6917648805953712, "learning_rate": 2.331224655312247e-06, "loss": 0.6188, "step": 19879 }, { "epoch": 0.580421009605559, "grad_norm": 0.8241068664860383, "learning_rate": 2.3310624493106244e-06, "loss": 0.6523, "step": 19880 }, { "epoch": 0.5804502058334063, "grad_norm": 0.7289529098853823, "learning_rate": 2.3309002433090024e-06, "loss": 0.6225, "step": 19881 }, { "epoch": 0.5804794020612537, "grad_norm": 0.6942007928999203, "learning_rate": 2.330738037307381e-06, "loss": 0.6142, "step": 19882 }, { "epoch": 0.5805085982891011, "grad_norm": 0.7427966529547254, "learning_rate": 2.3305758313057584e-06, "loss": 0.6909, "step": 19883 }, { "epoch": 0.5805377945169484, "grad_norm": 0.6794291000071216, "learning_rate": 2.3304136253041364e-06, "loss": 0.5711, "step": 19884 }, { "epoch": 0.5805669907447958, "grad_norm": 0.7169011053175195, "learning_rate": 2.3302514193025144e-06, "loss": 0.5935, "step": 19885 }, { "epoch": 0.5805961869726431, "grad_norm": 0.7970247007111564, "learning_rate": 2.3300892133008924e-06, "loss": 0.6455, "step": 19886 }, { "epoch": 0.5806253832004905, "grad_norm": 0.6647882407141966, "learning_rate": 2.32992700729927e-06, "loss": 0.5701, "step": 19887 }, { "epoch": 0.5806545794283379, "grad_norm": 0.7295824132237971, "learning_rate": 2.329764801297648e-06, "loss": 0.5924, "step": 19888 }, { "epoch": 0.5806837756561852, "grad_norm": 0.7750256369027475, "learning_rate": 2.329602595296026e-06, "loss": 0.7188, "step": 19889 }, { "epoch": 0.5807129718840326, "grad_norm": 0.6954162807469353, "learning_rate": 2.329440389294404e-06, "loss": 0.6331, "step": 19890 }, { "epoch": 0.58074216811188, "grad_norm": 0.694299499126084, "learning_rate": 2.329278183292782e-06, "loss": 0.5977, "step": 19891 }, { "epoch": 0.5807713643397273, "grad_norm": 0.7316051951918743, "learning_rate": 2.32911597729116e-06, "loss": 0.6442, "step": 19892 }, { "epoch": 0.5808005605675747, "grad_norm": 0.7364110271773406, "learning_rate": 2.328953771289538e-06, "loss": 0.6882, "step": 19893 }, { "epoch": 0.580829756795422, "grad_norm": 0.6648731195311506, "learning_rate": 2.328791565287916e-06, "loss": 0.5789, "step": 19894 }, { "epoch": 0.5808589530232694, "grad_norm": 0.7062678672542201, "learning_rate": 2.3286293592862937e-06, "loss": 0.6097, "step": 19895 }, { "epoch": 0.5808881492511168, "grad_norm": 0.7283210342606261, "learning_rate": 2.3284671532846717e-06, "loss": 0.6709, "step": 19896 }, { "epoch": 0.5809173454789641, "grad_norm": 0.7843207690726999, "learning_rate": 2.3283049472830497e-06, "loss": 0.6754, "step": 19897 }, { "epoch": 0.5809465417068115, "grad_norm": 0.7459817516909312, "learning_rate": 2.3281427412814277e-06, "loss": 0.6769, "step": 19898 }, { "epoch": 0.5809757379346588, "grad_norm": 0.6887466265199663, "learning_rate": 2.3279805352798053e-06, "loss": 0.6202, "step": 19899 }, { "epoch": 0.5810049341625062, "grad_norm": 0.7382540076184233, "learning_rate": 2.3278183292781833e-06, "loss": 0.6505, "step": 19900 }, { "epoch": 0.5810341303903536, "grad_norm": 0.7207492082824896, "learning_rate": 2.3276561232765617e-06, "loss": 0.6521, "step": 19901 }, { "epoch": 0.5810633266182009, "grad_norm": 0.6999396594221019, "learning_rate": 2.3274939172749393e-06, "loss": 0.6408, "step": 19902 }, { "epoch": 0.5810925228460483, "grad_norm": 0.7823537056938058, "learning_rate": 2.3273317112733173e-06, "loss": 0.7486, "step": 19903 }, { "epoch": 0.5811217190738956, "grad_norm": 0.7684209045566018, "learning_rate": 2.3271695052716953e-06, "loss": 0.706, "step": 19904 }, { "epoch": 0.581150915301743, "grad_norm": 0.7171531844041831, "learning_rate": 2.3270072992700733e-06, "loss": 0.6946, "step": 19905 }, { "epoch": 0.5811801115295904, "grad_norm": 0.716032211342319, "learning_rate": 2.326845093268451e-06, "loss": 0.6658, "step": 19906 }, { "epoch": 0.5812093077574377, "grad_norm": 0.6590876449754669, "learning_rate": 2.326682887266829e-06, "loss": 0.5437, "step": 19907 }, { "epoch": 0.5812385039852851, "grad_norm": 0.7363289490516163, "learning_rate": 2.326520681265207e-06, "loss": 0.63, "step": 19908 }, { "epoch": 0.5812677002131325, "grad_norm": 0.6492990216040608, "learning_rate": 2.326358475263585e-06, "loss": 0.5257, "step": 19909 }, { "epoch": 0.5812968964409798, "grad_norm": 0.680817238470152, "learning_rate": 2.326196269261963e-06, "loss": 0.6299, "step": 19910 }, { "epoch": 0.5813260926688272, "grad_norm": 0.6886113339481181, "learning_rate": 2.326034063260341e-06, "loss": 0.543, "step": 19911 }, { "epoch": 0.5813552888966745, "grad_norm": 0.669447095693067, "learning_rate": 2.325871857258719e-06, "loss": 0.5616, "step": 19912 }, { "epoch": 0.5813844851245219, "grad_norm": 0.7186702959203596, "learning_rate": 2.325709651257097e-06, "loss": 0.6385, "step": 19913 }, { "epoch": 0.5814136813523693, "grad_norm": 0.685799140343849, "learning_rate": 2.3255474452554745e-06, "loss": 0.5836, "step": 19914 }, { "epoch": 0.5814428775802166, "grad_norm": 0.6818751612288373, "learning_rate": 2.3253852392538525e-06, "loss": 0.6065, "step": 19915 }, { "epoch": 0.581472073808064, "grad_norm": 0.6991779092290349, "learning_rate": 2.3252230332522305e-06, "loss": 0.5979, "step": 19916 }, { "epoch": 0.5815012700359113, "grad_norm": 0.7233105574926431, "learning_rate": 2.3250608272506085e-06, "loss": 0.6276, "step": 19917 }, { "epoch": 0.5815304662637587, "grad_norm": 0.7123724282050978, "learning_rate": 2.324898621248986e-06, "loss": 0.6321, "step": 19918 }, { "epoch": 0.5815596624916061, "grad_norm": 0.688669783193714, "learning_rate": 2.324736415247364e-06, "loss": 0.5691, "step": 19919 }, { "epoch": 0.5815888587194534, "grad_norm": 0.7291553853525484, "learning_rate": 2.3245742092457426e-06, "loss": 0.648, "step": 19920 }, { "epoch": 0.5816180549473008, "grad_norm": 0.7022208419111522, "learning_rate": 2.32441200324412e-06, "loss": 0.6326, "step": 19921 }, { "epoch": 0.5816472511751482, "grad_norm": 0.7333785719729529, "learning_rate": 2.324249797242498e-06, "loss": 0.6146, "step": 19922 }, { "epoch": 0.5816764474029955, "grad_norm": 0.7422111584048341, "learning_rate": 2.324087591240876e-06, "loss": 0.6399, "step": 19923 }, { "epoch": 0.5817056436308429, "grad_norm": 0.7088521200675484, "learning_rate": 2.323925385239254e-06, "loss": 0.6255, "step": 19924 }, { "epoch": 0.5817348398586902, "grad_norm": 0.676767186273162, "learning_rate": 2.3237631792376317e-06, "loss": 0.6526, "step": 19925 }, { "epoch": 0.5817640360865376, "grad_norm": 0.7247409649347734, "learning_rate": 2.3236009732360098e-06, "loss": 0.6772, "step": 19926 }, { "epoch": 0.581793232314385, "grad_norm": 0.7512834760886052, "learning_rate": 2.3234387672343878e-06, "loss": 0.6286, "step": 19927 }, { "epoch": 0.5818224285422323, "grad_norm": 0.7109966596521258, "learning_rate": 2.3232765612327658e-06, "loss": 0.6467, "step": 19928 }, { "epoch": 0.5818516247700797, "grad_norm": 0.7353851712571229, "learning_rate": 2.3231143552311438e-06, "loss": 0.6662, "step": 19929 }, { "epoch": 0.581880820997927, "grad_norm": 0.689811886136594, "learning_rate": 2.3229521492295218e-06, "loss": 0.5525, "step": 19930 }, { "epoch": 0.5819100172257744, "grad_norm": 0.7264033941566934, "learning_rate": 2.3227899432278998e-06, "loss": 0.6596, "step": 19931 }, { "epoch": 0.5819392134536218, "grad_norm": 0.7648325315722524, "learning_rate": 2.322627737226278e-06, "loss": 0.6757, "step": 19932 }, { "epoch": 0.5819684096814691, "grad_norm": 0.718525883330679, "learning_rate": 2.3224655312246554e-06, "loss": 0.6234, "step": 19933 }, { "epoch": 0.5819976059093165, "grad_norm": 0.7652098642477987, "learning_rate": 2.3223033252230334e-06, "loss": 0.7867, "step": 19934 }, { "epoch": 0.5820268021371638, "grad_norm": 0.664356123454512, "learning_rate": 2.3221411192214114e-06, "loss": 0.5695, "step": 19935 }, { "epoch": 0.5820559983650112, "grad_norm": 0.7344550161427845, "learning_rate": 2.3219789132197894e-06, "loss": 0.6795, "step": 19936 }, { "epoch": 0.5820851945928586, "grad_norm": 0.7386001656204869, "learning_rate": 2.321816707218167e-06, "loss": 0.6504, "step": 19937 }, { "epoch": 0.5821143908207059, "grad_norm": 0.7087219890640922, "learning_rate": 2.321654501216545e-06, "loss": 0.6217, "step": 19938 }, { "epoch": 0.5821435870485533, "grad_norm": 0.7414037665858394, "learning_rate": 2.3214922952149234e-06, "loss": 0.6954, "step": 19939 }, { "epoch": 0.5821727832764007, "grad_norm": 0.7747319516068103, "learning_rate": 2.321330089213301e-06, "loss": 0.6888, "step": 19940 }, { "epoch": 0.582201979504248, "grad_norm": 0.7286782323985392, "learning_rate": 2.321167883211679e-06, "loss": 0.6194, "step": 19941 }, { "epoch": 0.5822311757320954, "grad_norm": 0.7791311180058883, "learning_rate": 2.321005677210057e-06, "loss": 0.6916, "step": 19942 }, { "epoch": 0.5822603719599427, "grad_norm": 0.6608403771787428, "learning_rate": 2.320843471208435e-06, "loss": 0.5409, "step": 19943 }, { "epoch": 0.5822895681877901, "grad_norm": 0.6733849519077257, "learning_rate": 2.3206812652068126e-06, "loss": 0.5828, "step": 19944 }, { "epoch": 0.5823187644156375, "grad_norm": 0.7007411548325526, "learning_rate": 2.3205190592051906e-06, "loss": 0.6048, "step": 19945 }, { "epoch": 0.5823479606434848, "grad_norm": 0.821788085541779, "learning_rate": 2.3203568532035686e-06, "loss": 0.6465, "step": 19946 }, { "epoch": 0.5823771568713322, "grad_norm": 0.7435440631818313, "learning_rate": 2.3201946472019466e-06, "loss": 0.644, "step": 19947 }, { "epoch": 0.5824063530991795, "grad_norm": 0.7042333177507525, "learning_rate": 2.3200324412003246e-06, "loss": 0.6204, "step": 19948 }, { "epoch": 0.5824355493270269, "grad_norm": 0.7831600807731337, "learning_rate": 2.3198702351987026e-06, "loss": 0.6602, "step": 19949 }, { "epoch": 0.5824647455548743, "grad_norm": 0.8018126167787634, "learning_rate": 2.3197080291970806e-06, "loss": 0.6842, "step": 19950 }, { "epoch": 0.5824939417827216, "grad_norm": 0.7453033015267363, "learning_rate": 2.3195458231954587e-06, "loss": 0.6649, "step": 19951 }, { "epoch": 0.582523138010569, "grad_norm": 0.7323000132513623, "learning_rate": 2.3193836171938362e-06, "loss": 0.6665, "step": 19952 }, { "epoch": 0.5825523342384163, "grad_norm": 0.7345936432066986, "learning_rate": 2.3192214111922142e-06, "loss": 0.6562, "step": 19953 }, { "epoch": 0.5825815304662637, "grad_norm": 0.7421664903158307, "learning_rate": 2.3190592051905922e-06, "loss": 0.6363, "step": 19954 }, { "epoch": 0.5826107266941111, "grad_norm": 0.6812750044080491, "learning_rate": 2.3188969991889703e-06, "loss": 0.6003, "step": 19955 }, { "epoch": 0.5826399229219584, "grad_norm": 0.7562629290604532, "learning_rate": 2.318734793187348e-06, "loss": 0.686, "step": 19956 }, { "epoch": 0.5826691191498058, "grad_norm": 0.7494891606015874, "learning_rate": 2.3185725871857263e-06, "loss": 0.6299, "step": 19957 }, { "epoch": 0.5826983153776532, "grad_norm": 0.7978684336172057, "learning_rate": 2.3184103811841043e-06, "loss": 0.7455, "step": 19958 }, { "epoch": 0.5827275116055005, "grad_norm": 0.7236708112008635, "learning_rate": 2.318248175182482e-06, "loss": 0.633, "step": 19959 }, { "epoch": 0.582756707833348, "grad_norm": 0.7402786635756201, "learning_rate": 2.31808596918086e-06, "loss": 0.6134, "step": 19960 }, { "epoch": 0.5827859040611953, "grad_norm": 0.7223375002198344, "learning_rate": 2.317923763179238e-06, "loss": 0.6225, "step": 19961 }, { "epoch": 0.5828151002890427, "grad_norm": 0.7534327660424364, "learning_rate": 2.317761557177616e-06, "loss": 0.6662, "step": 19962 }, { "epoch": 0.5828442965168901, "grad_norm": 0.6619096927553244, "learning_rate": 2.3175993511759935e-06, "loss": 0.6003, "step": 19963 }, { "epoch": 0.5828734927447374, "grad_norm": 0.6675569582220147, "learning_rate": 2.3174371451743715e-06, "loss": 0.5608, "step": 19964 }, { "epoch": 0.5829026889725848, "grad_norm": 0.7597063099778777, "learning_rate": 2.3172749391727495e-06, "loss": 0.7367, "step": 19965 }, { "epoch": 0.5829318852004322, "grad_norm": 0.769925250011001, "learning_rate": 2.3171127331711275e-06, "loss": 0.6728, "step": 19966 }, { "epoch": 0.5829610814282795, "grad_norm": 0.9601557644658302, "learning_rate": 2.3169505271695055e-06, "loss": 0.6665, "step": 19967 }, { "epoch": 0.5829902776561269, "grad_norm": 0.7559213927199278, "learning_rate": 2.3167883211678835e-06, "loss": 0.6744, "step": 19968 }, { "epoch": 0.5830194738839742, "grad_norm": 0.7406788153261356, "learning_rate": 2.3166261151662615e-06, "loss": 0.6912, "step": 19969 }, { "epoch": 0.5830486701118216, "grad_norm": 0.780000403208097, "learning_rate": 2.3164639091646395e-06, "loss": 0.7399, "step": 19970 }, { "epoch": 0.583077866339669, "grad_norm": 0.6956732892026734, "learning_rate": 2.316301703163017e-06, "loss": 0.5992, "step": 19971 }, { "epoch": 0.5831070625675163, "grad_norm": 0.6773761569768568, "learning_rate": 2.316139497161395e-06, "loss": 0.6109, "step": 19972 }, { "epoch": 0.5831362587953637, "grad_norm": 0.675234370129852, "learning_rate": 2.315977291159773e-06, "loss": 0.5711, "step": 19973 }, { "epoch": 0.583165455023211, "grad_norm": 0.7139354485368877, "learning_rate": 2.315815085158151e-06, "loss": 0.6196, "step": 19974 }, { "epoch": 0.5831946512510584, "grad_norm": 0.7406383814921128, "learning_rate": 2.3156528791565287e-06, "loss": 0.6674, "step": 19975 }, { "epoch": 0.5832238474789058, "grad_norm": 0.6827203032749208, "learning_rate": 2.315490673154907e-06, "loss": 0.5829, "step": 19976 }, { "epoch": 0.5832530437067531, "grad_norm": 0.764669861119544, "learning_rate": 2.315328467153285e-06, "loss": 0.7006, "step": 19977 }, { "epoch": 0.5832822399346005, "grad_norm": 0.7154107584950147, "learning_rate": 2.3151662611516627e-06, "loss": 0.6233, "step": 19978 }, { "epoch": 0.5833114361624478, "grad_norm": 0.740307672695249, "learning_rate": 2.3150040551500407e-06, "loss": 0.6937, "step": 19979 }, { "epoch": 0.5833406323902952, "grad_norm": 0.808893840602729, "learning_rate": 2.3148418491484187e-06, "loss": 0.6719, "step": 19980 }, { "epoch": 0.5833698286181426, "grad_norm": 0.7119897382914734, "learning_rate": 2.3146796431467967e-06, "loss": 0.5956, "step": 19981 }, { "epoch": 0.5833990248459899, "grad_norm": 0.7471476582229737, "learning_rate": 2.3145174371451743e-06, "loss": 0.6921, "step": 19982 }, { "epoch": 0.5834282210738373, "grad_norm": 0.7114585271760022, "learning_rate": 2.3143552311435523e-06, "loss": 0.5956, "step": 19983 }, { "epoch": 0.5834574173016847, "grad_norm": 0.7073357139224434, "learning_rate": 2.3141930251419303e-06, "loss": 0.6469, "step": 19984 }, { "epoch": 0.583486613529532, "grad_norm": 0.7627579430670621, "learning_rate": 2.3140308191403083e-06, "loss": 0.6816, "step": 19985 }, { "epoch": 0.5835158097573794, "grad_norm": 0.7890397840768634, "learning_rate": 2.3138686131386863e-06, "loss": 0.6993, "step": 19986 }, { "epoch": 0.5835450059852267, "grad_norm": 0.7175405550031514, "learning_rate": 2.3137064071370644e-06, "loss": 0.6543, "step": 19987 }, { "epoch": 0.5835742022130741, "grad_norm": 0.6775584015644053, "learning_rate": 2.3135442011354424e-06, "loss": 0.5245, "step": 19988 }, { "epoch": 0.5836033984409215, "grad_norm": 0.681181286858956, "learning_rate": 2.3133819951338204e-06, "loss": 0.5986, "step": 19989 }, { "epoch": 0.5836325946687688, "grad_norm": 0.6910522839382668, "learning_rate": 2.313219789132198e-06, "loss": 0.5886, "step": 19990 }, { "epoch": 0.5836617908966162, "grad_norm": 0.732834697904071, "learning_rate": 2.313057583130576e-06, "loss": 0.6659, "step": 19991 }, { "epoch": 0.5836909871244635, "grad_norm": 0.79055012259398, "learning_rate": 2.312895377128954e-06, "loss": 0.7451, "step": 19992 }, { "epoch": 0.5837201833523109, "grad_norm": 0.7498690930020212, "learning_rate": 2.312733171127332e-06, "loss": 0.6962, "step": 19993 }, { "epoch": 0.5837493795801583, "grad_norm": 0.666966179045141, "learning_rate": 2.3125709651257096e-06, "loss": 0.5318, "step": 19994 }, { "epoch": 0.5837785758080056, "grad_norm": 0.7360824008551201, "learning_rate": 2.312408759124088e-06, "loss": 0.6692, "step": 19995 }, { "epoch": 0.583807772035853, "grad_norm": 0.7286464720288726, "learning_rate": 2.312246553122466e-06, "loss": 0.626, "step": 19996 }, { "epoch": 0.5838369682637004, "grad_norm": 0.7878539972515678, "learning_rate": 2.3120843471208436e-06, "loss": 0.6579, "step": 19997 }, { "epoch": 0.5838661644915477, "grad_norm": 0.7778661009185314, "learning_rate": 2.3119221411192216e-06, "loss": 0.7067, "step": 19998 }, { "epoch": 0.5838953607193951, "grad_norm": 0.7483567675270596, "learning_rate": 2.3117599351175996e-06, "loss": 0.6879, "step": 19999 }, { "epoch": 0.5839245569472424, "grad_norm": 0.7600921138407548, "learning_rate": 2.3115977291159776e-06, "loss": 0.6355, "step": 20000 }, { "epoch": 0.5839537531750898, "grad_norm": 0.6815390021478391, "learning_rate": 2.311435523114355e-06, "loss": 0.5302, "step": 20001 }, { "epoch": 0.5839829494029372, "grad_norm": 0.6929952309511681, "learning_rate": 2.311273317112733e-06, "loss": 0.5593, "step": 20002 }, { "epoch": 0.5840121456307845, "grad_norm": 0.6865072652221696, "learning_rate": 2.311111111111111e-06, "loss": 0.6071, "step": 20003 }, { "epoch": 0.5840413418586319, "grad_norm": 0.6272276108710204, "learning_rate": 2.310948905109489e-06, "loss": 0.4993, "step": 20004 }, { "epoch": 0.5840705380864792, "grad_norm": 0.7297407880067142, "learning_rate": 2.310786699107867e-06, "loss": 0.6604, "step": 20005 }, { "epoch": 0.5840997343143266, "grad_norm": 0.733494336195059, "learning_rate": 2.3106244931062452e-06, "loss": 0.6447, "step": 20006 }, { "epoch": 0.584128930542174, "grad_norm": 0.7412813262418673, "learning_rate": 2.3104622871046232e-06, "loss": 0.6347, "step": 20007 }, { "epoch": 0.5841581267700213, "grad_norm": 0.7452601887714243, "learning_rate": 2.3103000811030012e-06, "loss": 0.6687, "step": 20008 }, { "epoch": 0.5841873229978687, "grad_norm": 0.7515713110311305, "learning_rate": 2.310137875101379e-06, "loss": 0.6362, "step": 20009 }, { "epoch": 0.584216519225716, "grad_norm": 0.7216832668982485, "learning_rate": 2.309975669099757e-06, "loss": 0.6812, "step": 20010 }, { "epoch": 0.5842457154535634, "grad_norm": 0.709061700529003, "learning_rate": 2.309813463098135e-06, "loss": 0.6087, "step": 20011 }, { "epoch": 0.5842749116814108, "grad_norm": 0.6966032085056758, "learning_rate": 2.309651257096513e-06, "loss": 0.6477, "step": 20012 }, { "epoch": 0.5843041079092581, "grad_norm": 0.6867644002685181, "learning_rate": 2.3094890510948904e-06, "loss": 0.5569, "step": 20013 }, { "epoch": 0.5843333041371055, "grad_norm": 0.6808489467851518, "learning_rate": 2.309326845093269e-06, "loss": 0.6264, "step": 20014 }, { "epoch": 0.5843625003649529, "grad_norm": 0.726297016943131, "learning_rate": 2.309164639091647e-06, "loss": 0.6657, "step": 20015 }, { "epoch": 0.5843916965928002, "grad_norm": 0.7628106605675068, "learning_rate": 2.3090024330900244e-06, "loss": 0.6661, "step": 20016 }, { "epoch": 0.5844208928206476, "grad_norm": 0.7887472889351981, "learning_rate": 2.3088402270884024e-06, "loss": 0.6768, "step": 20017 }, { "epoch": 0.5844500890484949, "grad_norm": 0.7531671634866853, "learning_rate": 2.3086780210867804e-06, "loss": 0.7211, "step": 20018 }, { "epoch": 0.5844792852763423, "grad_norm": 0.7028281386889293, "learning_rate": 2.3085158150851585e-06, "loss": 0.6081, "step": 20019 }, { "epoch": 0.5845084815041897, "grad_norm": 0.8031630753787241, "learning_rate": 2.308353609083536e-06, "loss": 0.7069, "step": 20020 }, { "epoch": 0.584537677732037, "grad_norm": 0.6967848851944322, "learning_rate": 2.308191403081914e-06, "loss": 0.5866, "step": 20021 }, { "epoch": 0.5845668739598844, "grad_norm": 0.8408803813947946, "learning_rate": 2.308029197080292e-06, "loss": 0.7303, "step": 20022 }, { "epoch": 0.5845960701877317, "grad_norm": 0.7052906720592639, "learning_rate": 2.30786699107867e-06, "loss": 0.5845, "step": 20023 }, { "epoch": 0.5846252664155791, "grad_norm": 0.7112968249705245, "learning_rate": 2.307704785077048e-06, "loss": 0.5731, "step": 20024 }, { "epoch": 0.5846544626434265, "grad_norm": 0.6985321378163193, "learning_rate": 2.307542579075426e-06, "loss": 0.6359, "step": 20025 }, { "epoch": 0.5846836588712738, "grad_norm": 0.8049858690340385, "learning_rate": 2.307380373073804e-06, "loss": 0.6383, "step": 20026 }, { "epoch": 0.5847128550991212, "grad_norm": 0.7481194591618565, "learning_rate": 2.307218167072182e-06, "loss": 0.6255, "step": 20027 }, { "epoch": 0.5847420513269685, "grad_norm": 0.7415563750534528, "learning_rate": 2.3070559610705597e-06, "loss": 0.5861, "step": 20028 }, { "epoch": 0.5847712475548159, "grad_norm": 0.6980476673583215, "learning_rate": 2.3068937550689377e-06, "loss": 0.577, "step": 20029 }, { "epoch": 0.5848004437826633, "grad_norm": 0.7039515390985535, "learning_rate": 2.3067315490673157e-06, "loss": 0.5762, "step": 20030 }, { "epoch": 0.5848296400105106, "grad_norm": 0.6767607361270374, "learning_rate": 2.3065693430656937e-06, "loss": 0.5563, "step": 20031 }, { "epoch": 0.584858836238358, "grad_norm": 0.7392454773850216, "learning_rate": 2.3064071370640713e-06, "loss": 0.6609, "step": 20032 }, { "epoch": 0.5848880324662054, "grad_norm": 0.7423206918033709, "learning_rate": 2.3062449310624497e-06, "loss": 0.6902, "step": 20033 }, { "epoch": 0.5849172286940527, "grad_norm": 0.7591669322234579, "learning_rate": 2.3060827250608277e-06, "loss": 0.698, "step": 20034 }, { "epoch": 0.5849464249219001, "grad_norm": 0.7382708719515517, "learning_rate": 2.3059205190592053e-06, "loss": 0.6416, "step": 20035 }, { "epoch": 0.5849756211497474, "grad_norm": 0.7417873796333031, "learning_rate": 2.3057583130575833e-06, "loss": 0.6281, "step": 20036 }, { "epoch": 0.5850048173775948, "grad_norm": 0.756971569199655, "learning_rate": 2.3055961070559613e-06, "loss": 0.6639, "step": 20037 }, { "epoch": 0.5850340136054422, "grad_norm": 0.6881968139860565, "learning_rate": 2.3054339010543393e-06, "loss": 0.5338, "step": 20038 }, { "epoch": 0.5850632098332895, "grad_norm": 0.7583583265046093, "learning_rate": 2.305271695052717e-06, "loss": 0.6706, "step": 20039 }, { "epoch": 0.5850924060611369, "grad_norm": 0.7648498891881904, "learning_rate": 2.305109489051095e-06, "loss": 0.6361, "step": 20040 }, { "epoch": 0.5851216022889842, "grad_norm": 0.7287670143853788, "learning_rate": 2.304947283049473e-06, "loss": 0.6424, "step": 20041 }, { "epoch": 0.5851507985168316, "grad_norm": 0.7150996649354239, "learning_rate": 2.304785077047851e-06, "loss": 0.5922, "step": 20042 }, { "epoch": 0.585179994744679, "grad_norm": 0.7371906034752537, "learning_rate": 2.304622871046229e-06, "loss": 0.5882, "step": 20043 }, { "epoch": 0.5852091909725263, "grad_norm": 0.7157554527582797, "learning_rate": 2.304460665044607e-06, "loss": 0.6619, "step": 20044 }, { "epoch": 0.5852383872003737, "grad_norm": 0.7339990060278954, "learning_rate": 2.304298459042985e-06, "loss": 0.6839, "step": 20045 }, { "epoch": 0.585267583428221, "grad_norm": 0.7150538077836064, "learning_rate": 2.304136253041363e-06, "loss": 0.6392, "step": 20046 }, { "epoch": 0.5852967796560684, "grad_norm": 0.7190615659544457, "learning_rate": 2.3039740470397405e-06, "loss": 0.6387, "step": 20047 }, { "epoch": 0.5853259758839158, "grad_norm": 0.7042063726070624, "learning_rate": 2.3038118410381185e-06, "loss": 0.6427, "step": 20048 }, { "epoch": 0.5853551721117631, "grad_norm": 0.7711477188375259, "learning_rate": 2.3036496350364965e-06, "loss": 0.5889, "step": 20049 }, { "epoch": 0.5853843683396105, "grad_norm": 0.6786892166550584, "learning_rate": 2.3034874290348745e-06, "loss": 0.5864, "step": 20050 }, { "epoch": 0.5854135645674579, "grad_norm": 0.6967157501124976, "learning_rate": 2.303325223033252e-06, "loss": 0.6494, "step": 20051 }, { "epoch": 0.5854427607953052, "grad_norm": 0.7310373633907794, "learning_rate": 2.3031630170316306e-06, "loss": 0.6334, "step": 20052 }, { "epoch": 0.5854719570231526, "grad_norm": 0.6747450870278017, "learning_rate": 2.3030008110300086e-06, "loss": 0.5825, "step": 20053 }, { "epoch": 0.5855011532509999, "grad_norm": 0.7754109681079452, "learning_rate": 2.302838605028386e-06, "loss": 0.6634, "step": 20054 }, { "epoch": 0.5855303494788473, "grad_norm": 0.7132603122181836, "learning_rate": 2.302676399026764e-06, "loss": 0.7121, "step": 20055 }, { "epoch": 0.5855595457066947, "grad_norm": 0.6768200252396946, "learning_rate": 2.302514193025142e-06, "loss": 0.5841, "step": 20056 }, { "epoch": 0.585588741934542, "grad_norm": 0.687082053020229, "learning_rate": 2.30235198702352e-06, "loss": 0.5951, "step": 20057 }, { "epoch": 0.5856179381623894, "grad_norm": 0.8228069964124578, "learning_rate": 2.3021897810218978e-06, "loss": 0.7573, "step": 20058 }, { "epoch": 0.5856471343902367, "grad_norm": 0.7687083712999904, "learning_rate": 2.3020275750202758e-06, "loss": 0.7664, "step": 20059 }, { "epoch": 0.5856763306180841, "grad_norm": 0.6965236693886591, "learning_rate": 2.3018653690186538e-06, "loss": 0.5876, "step": 20060 }, { "epoch": 0.5857055268459315, "grad_norm": 0.6686507905244027, "learning_rate": 2.3017031630170318e-06, "loss": 0.5865, "step": 20061 }, { "epoch": 0.5857347230737788, "grad_norm": 0.7181227309085174, "learning_rate": 2.3015409570154098e-06, "loss": 0.6699, "step": 20062 }, { "epoch": 0.5857639193016262, "grad_norm": 0.7743760999565861, "learning_rate": 2.301378751013788e-06, "loss": 0.6661, "step": 20063 }, { "epoch": 0.5857931155294736, "grad_norm": 0.7003644581906411, "learning_rate": 2.301216545012166e-06, "loss": 0.64, "step": 20064 }, { "epoch": 0.5858223117573209, "grad_norm": 0.7064397072859756, "learning_rate": 2.3010543390105434e-06, "loss": 0.6007, "step": 20065 }, { "epoch": 0.5858515079851683, "grad_norm": 0.7123733761723094, "learning_rate": 2.3008921330089214e-06, "loss": 0.5885, "step": 20066 }, { "epoch": 0.5858807042130156, "grad_norm": 0.7487999592771164, "learning_rate": 2.3007299270072994e-06, "loss": 0.6563, "step": 20067 }, { "epoch": 0.585909900440863, "grad_norm": 0.7194281706291106, "learning_rate": 2.3005677210056774e-06, "loss": 0.6635, "step": 20068 }, { "epoch": 0.5859390966687104, "grad_norm": 0.7360997261172454, "learning_rate": 2.3004055150040554e-06, "loss": 0.6144, "step": 20069 }, { "epoch": 0.5859682928965577, "grad_norm": 0.7257091276392561, "learning_rate": 2.300243309002433e-06, "loss": 0.677, "step": 20070 }, { "epoch": 0.5859974891244051, "grad_norm": 0.715148909704718, "learning_rate": 2.3000811030008114e-06, "loss": 0.5905, "step": 20071 }, { "epoch": 0.5860266853522524, "grad_norm": 0.7176581741776741, "learning_rate": 2.2999188969991894e-06, "loss": 0.6294, "step": 20072 }, { "epoch": 0.5860558815800998, "grad_norm": 0.7606103521894911, "learning_rate": 2.299756690997567e-06, "loss": 0.7004, "step": 20073 }, { "epoch": 0.5860850778079472, "grad_norm": 0.7159755030162137, "learning_rate": 2.299594484995945e-06, "loss": 0.705, "step": 20074 }, { "epoch": 0.5861142740357945, "grad_norm": 0.7777542946025156, "learning_rate": 2.299432278994323e-06, "loss": 0.7174, "step": 20075 }, { "epoch": 0.5861434702636419, "grad_norm": 0.7061701424186035, "learning_rate": 2.299270072992701e-06, "loss": 0.6349, "step": 20076 }, { "epoch": 0.5861726664914892, "grad_norm": 0.6767518941891048, "learning_rate": 2.2991078669910786e-06, "loss": 0.5682, "step": 20077 }, { "epoch": 0.5862018627193366, "grad_norm": 0.7656425633182735, "learning_rate": 2.2989456609894566e-06, "loss": 0.7122, "step": 20078 }, { "epoch": 0.586231058947184, "grad_norm": 0.6708273127131905, "learning_rate": 2.2987834549878346e-06, "loss": 0.5659, "step": 20079 }, { "epoch": 0.5862602551750313, "grad_norm": 0.7095077670345559, "learning_rate": 2.2986212489862126e-06, "loss": 0.5894, "step": 20080 }, { "epoch": 0.5862894514028788, "grad_norm": 0.7223083837571397, "learning_rate": 2.2984590429845906e-06, "loss": 0.631, "step": 20081 }, { "epoch": 0.5863186476307262, "grad_norm": 0.6585441123752479, "learning_rate": 2.2982968369829686e-06, "loss": 0.5565, "step": 20082 }, { "epoch": 0.5863478438585735, "grad_norm": 0.7483601135858471, "learning_rate": 2.2981346309813467e-06, "loss": 0.6544, "step": 20083 }, { "epoch": 0.5863770400864209, "grad_norm": 0.7289457758371732, "learning_rate": 2.2979724249797242e-06, "loss": 0.6349, "step": 20084 }, { "epoch": 0.5864062363142682, "grad_norm": 0.7631949820309893, "learning_rate": 2.2978102189781022e-06, "loss": 0.669, "step": 20085 }, { "epoch": 0.5864354325421156, "grad_norm": 0.7239411769073806, "learning_rate": 2.2976480129764803e-06, "loss": 0.6223, "step": 20086 }, { "epoch": 0.586464628769963, "grad_norm": 0.7123557558690906, "learning_rate": 2.2974858069748583e-06, "loss": 0.5768, "step": 20087 }, { "epoch": 0.5864938249978103, "grad_norm": 0.7463208189057056, "learning_rate": 2.2973236009732363e-06, "loss": 0.6845, "step": 20088 }, { "epoch": 0.5865230212256577, "grad_norm": 0.7651127534648173, "learning_rate": 2.297161394971614e-06, "loss": 0.6377, "step": 20089 }, { "epoch": 0.586552217453505, "grad_norm": 0.7065221548882721, "learning_rate": 2.2969991889699923e-06, "loss": 0.5881, "step": 20090 }, { "epoch": 0.5865814136813524, "grad_norm": 0.7690863991071868, "learning_rate": 2.2968369829683703e-06, "loss": 0.6787, "step": 20091 }, { "epoch": 0.5866106099091998, "grad_norm": 0.7677484602697564, "learning_rate": 2.296674776966748e-06, "loss": 0.7194, "step": 20092 }, { "epoch": 0.5866398061370471, "grad_norm": 0.7787444963404245, "learning_rate": 2.296512570965126e-06, "loss": 0.6948, "step": 20093 }, { "epoch": 0.5866690023648945, "grad_norm": 0.7085869409743363, "learning_rate": 2.296350364963504e-06, "loss": 0.5965, "step": 20094 }, { "epoch": 0.5866981985927419, "grad_norm": 0.6872343950605088, "learning_rate": 2.296188158961882e-06, "loss": 0.5987, "step": 20095 }, { "epoch": 0.5867273948205892, "grad_norm": 0.6724300593976152, "learning_rate": 2.2960259529602595e-06, "loss": 0.5559, "step": 20096 }, { "epoch": 0.5867565910484366, "grad_norm": 0.6896485687439126, "learning_rate": 2.2958637469586375e-06, "loss": 0.5729, "step": 20097 }, { "epoch": 0.5867857872762839, "grad_norm": 0.678680150924522, "learning_rate": 2.2957015409570155e-06, "loss": 0.5328, "step": 20098 }, { "epoch": 0.5868149835041313, "grad_norm": 0.7682026130558031, "learning_rate": 2.2955393349553935e-06, "loss": 0.7038, "step": 20099 }, { "epoch": 0.5868441797319787, "grad_norm": 0.7402472393856663, "learning_rate": 2.2953771289537715e-06, "loss": 0.6976, "step": 20100 }, { "epoch": 0.586873375959826, "grad_norm": 0.8736723193218778, "learning_rate": 2.2952149229521495e-06, "loss": 0.6846, "step": 20101 }, { "epoch": 0.5869025721876734, "grad_norm": 0.7323505756186698, "learning_rate": 2.2950527169505275e-06, "loss": 0.6242, "step": 20102 }, { "epoch": 0.5869317684155207, "grad_norm": 0.6750815661138718, "learning_rate": 2.294890510948905e-06, "loss": 0.5808, "step": 20103 }, { "epoch": 0.5869609646433681, "grad_norm": 0.6920068138564058, "learning_rate": 2.294728304947283e-06, "loss": 0.5752, "step": 20104 }, { "epoch": 0.5869901608712155, "grad_norm": 0.725500899148911, "learning_rate": 2.294566098945661e-06, "loss": 0.6118, "step": 20105 }, { "epoch": 0.5870193570990628, "grad_norm": 0.7383253712382143, "learning_rate": 2.294403892944039e-06, "loss": 0.6391, "step": 20106 }, { "epoch": 0.5870485533269102, "grad_norm": 0.7655444404457912, "learning_rate": 2.294241686942417e-06, "loss": 0.681, "step": 20107 }, { "epoch": 0.5870777495547576, "grad_norm": 0.69011007551998, "learning_rate": 2.294079480940795e-06, "loss": 0.591, "step": 20108 }, { "epoch": 0.5871069457826049, "grad_norm": 0.6633532532469238, "learning_rate": 2.293917274939173e-06, "loss": 0.537, "step": 20109 }, { "epoch": 0.5871361420104523, "grad_norm": 0.7372131543468013, "learning_rate": 2.293755068937551e-06, "loss": 0.687, "step": 20110 }, { "epoch": 0.5871653382382996, "grad_norm": 0.745394516810496, "learning_rate": 2.2935928629359287e-06, "loss": 0.6584, "step": 20111 }, { "epoch": 0.587194534466147, "grad_norm": 0.7108818309076932, "learning_rate": 2.2934306569343067e-06, "loss": 0.6654, "step": 20112 }, { "epoch": 0.5872237306939944, "grad_norm": 0.7232297956999436, "learning_rate": 2.2932684509326847e-06, "loss": 0.5889, "step": 20113 }, { "epoch": 0.5872529269218417, "grad_norm": 0.7109113851943064, "learning_rate": 2.2931062449310627e-06, "loss": 0.6194, "step": 20114 }, { "epoch": 0.5872821231496891, "grad_norm": 0.7156933691832381, "learning_rate": 2.2929440389294403e-06, "loss": 0.6322, "step": 20115 }, { "epoch": 0.5873113193775364, "grad_norm": 0.7228646168146978, "learning_rate": 2.2927818329278183e-06, "loss": 0.6353, "step": 20116 }, { "epoch": 0.5873405156053838, "grad_norm": 0.7594133168372347, "learning_rate": 2.2926196269261963e-06, "loss": 0.6991, "step": 20117 }, { "epoch": 0.5873697118332312, "grad_norm": 0.6844641103034317, "learning_rate": 2.2924574209245744e-06, "loss": 0.6317, "step": 20118 }, { "epoch": 0.5873989080610785, "grad_norm": 0.7321664391261267, "learning_rate": 2.2922952149229524e-06, "loss": 0.7369, "step": 20119 }, { "epoch": 0.5874281042889259, "grad_norm": 0.6951970241266948, "learning_rate": 2.2921330089213304e-06, "loss": 0.6129, "step": 20120 }, { "epoch": 0.5874573005167733, "grad_norm": 0.7307426099045996, "learning_rate": 2.2919708029197084e-06, "loss": 0.6057, "step": 20121 }, { "epoch": 0.5874864967446206, "grad_norm": 0.6818673226647275, "learning_rate": 2.291808596918086e-06, "loss": 0.6026, "step": 20122 }, { "epoch": 0.587515692972468, "grad_norm": 0.7011620613690129, "learning_rate": 2.291646390916464e-06, "loss": 0.5748, "step": 20123 }, { "epoch": 0.5875448892003153, "grad_norm": 0.7826714586741388, "learning_rate": 2.291484184914842e-06, "loss": 0.6257, "step": 20124 }, { "epoch": 0.5875740854281627, "grad_norm": 0.6746904971490274, "learning_rate": 2.29132197891322e-06, "loss": 0.5599, "step": 20125 }, { "epoch": 0.5876032816560101, "grad_norm": 0.673976960635623, "learning_rate": 2.291159772911598e-06, "loss": 0.5878, "step": 20126 }, { "epoch": 0.5876324778838574, "grad_norm": 0.6822110753271597, "learning_rate": 2.290997566909976e-06, "loss": 0.5789, "step": 20127 }, { "epoch": 0.5876616741117048, "grad_norm": 0.7381996311448172, "learning_rate": 2.290835360908354e-06, "loss": 0.6671, "step": 20128 }, { "epoch": 0.5876908703395521, "grad_norm": 0.6848459749195629, "learning_rate": 2.290673154906732e-06, "loss": 0.5904, "step": 20129 }, { "epoch": 0.5877200665673995, "grad_norm": 0.7432756420072885, "learning_rate": 2.2905109489051096e-06, "loss": 0.636, "step": 20130 }, { "epoch": 0.5877492627952469, "grad_norm": 0.7222284188931769, "learning_rate": 2.2903487429034876e-06, "loss": 0.6369, "step": 20131 }, { "epoch": 0.5877784590230942, "grad_norm": 0.7706118385097396, "learning_rate": 2.2901865369018656e-06, "loss": 0.7298, "step": 20132 }, { "epoch": 0.5878076552509416, "grad_norm": 0.7187052808368816, "learning_rate": 2.2900243309002436e-06, "loss": 0.6404, "step": 20133 }, { "epoch": 0.587836851478789, "grad_norm": 0.8189788543213335, "learning_rate": 2.289862124898621e-06, "loss": 0.7218, "step": 20134 }, { "epoch": 0.5878660477066363, "grad_norm": 0.8001868785026931, "learning_rate": 2.289699918896999e-06, "loss": 0.7142, "step": 20135 }, { "epoch": 0.5878952439344837, "grad_norm": 0.7582601380532017, "learning_rate": 2.289537712895377e-06, "loss": 0.7258, "step": 20136 }, { "epoch": 0.587924440162331, "grad_norm": 0.6924047520244785, "learning_rate": 2.2893755068937552e-06, "loss": 0.5671, "step": 20137 }, { "epoch": 0.5879536363901784, "grad_norm": 0.7893752540967751, "learning_rate": 2.2892133008921332e-06, "loss": 0.6928, "step": 20138 }, { "epoch": 0.5879828326180258, "grad_norm": 0.7221489306783726, "learning_rate": 2.2890510948905112e-06, "loss": 0.6112, "step": 20139 }, { "epoch": 0.5880120288458731, "grad_norm": 0.7246951079298162, "learning_rate": 2.2888888888888892e-06, "loss": 0.595, "step": 20140 }, { "epoch": 0.5880412250737205, "grad_norm": 0.696561916650422, "learning_rate": 2.288726682887267e-06, "loss": 0.6283, "step": 20141 }, { "epoch": 0.5880704213015678, "grad_norm": 0.7184146704576767, "learning_rate": 2.288564476885645e-06, "loss": 0.6268, "step": 20142 }, { "epoch": 0.5880996175294152, "grad_norm": 0.7425680527162664, "learning_rate": 2.288402270884023e-06, "loss": 0.6344, "step": 20143 }, { "epoch": 0.5881288137572626, "grad_norm": 0.770149813296312, "learning_rate": 2.288240064882401e-06, "loss": 0.6869, "step": 20144 }, { "epoch": 0.5881580099851099, "grad_norm": 0.7086865672929614, "learning_rate": 2.288077858880779e-06, "loss": 0.6364, "step": 20145 }, { "epoch": 0.5881872062129573, "grad_norm": 0.7842567907926307, "learning_rate": 2.287915652879157e-06, "loss": 0.702, "step": 20146 }, { "epoch": 0.5882164024408046, "grad_norm": 0.7687218074071145, "learning_rate": 2.287753446877535e-06, "loss": 0.7081, "step": 20147 }, { "epoch": 0.588245598668652, "grad_norm": 0.7238430237175868, "learning_rate": 2.287591240875913e-06, "loss": 0.5326, "step": 20148 }, { "epoch": 0.5882747948964994, "grad_norm": 0.6757454288046959, "learning_rate": 2.2874290348742904e-06, "loss": 0.5065, "step": 20149 }, { "epoch": 0.5883039911243467, "grad_norm": 0.6856264086850452, "learning_rate": 2.2872668288726685e-06, "loss": 0.625, "step": 20150 }, { "epoch": 0.5883331873521941, "grad_norm": 0.721488270194003, "learning_rate": 2.2871046228710465e-06, "loss": 0.5801, "step": 20151 }, { "epoch": 0.5883623835800414, "grad_norm": 0.803760119438409, "learning_rate": 2.2869424168694245e-06, "loss": 0.7409, "step": 20152 }, { "epoch": 0.5883915798078888, "grad_norm": 0.7126986727423087, "learning_rate": 2.286780210867802e-06, "loss": 0.6537, "step": 20153 }, { "epoch": 0.5884207760357362, "grad_norm": 0.7220417859943534, "learning_rate": 2.28661800486618e-06, "loss": 0.634, "step": 20154 }, { "epoch": 0.5884499722635835, "grad_norm": 0.738359256835518, "learning_rate": 2.286455798864558e-06, "loss": 0.6816, "step": 20155 }, { "epoch": 0.5884791684914309, "grad_norm": 0.7254557328362559, "learning_rate": 2.286293592862936e-06, "loss": 0.5966, "step": 20156 }, { "epoch": 0.5885083647192783, "grad_norm": 0.7798864184076924, "learning_rate": 2.286131386861314e-06, "loss": 0.7017, "step": 20157 }, { "epoch": 0.5885375609471256, "grad_norm": 0.6743810329121499, "learning_rate": 2.285969180859692e-06, "loss": 0.531, "step": 20158 }, { "epoch": 0.588566757174973, "grad_norm": 0.7438843507398555, "learning_rate": 2.28580697485807e-06, "loss": 0.6601, "step": 20159 }, { "epoch": 0.5885959534028203, "grad_norm": 0.7084085099378535, "learning_rate": 2.2856447688564477e-06, "loss": 0.5845, "step": 20160 }, { "epoch": 0.5886251496306677, "grad_norm": 0.7420678002719435, "learning_rate": 2.2854825628548257e-06, "loss": 0.7046, "step": 20161 }, { "epoch": 0.5886543458585151, "grad_norm": 0.6856711095106092, "learning_rate": 2.2853203568532037e-06, "loss": 0.5772, "step": 20162 }, { "epoch": 0.5886835420863624, "grad_norm": 0.6959861302778353, "learning_rate": 2.2851581508515817e-06, "loss": 0.6262, "step": 20163 }, { "epoch": 0.5887127383142098, "grad_norm": 0.7164068799817416, "learning_rate": 2.2849959448499597e-06, "loss": 0.6549, "step": 20164 }, { "epoch": 0.5887419345420571, "grad_norm": 0.7507579428144392, "learning_rate": 2.2848337388483377e-06, "loss": 0.6309, "step": 20165 }, { "epoch": 0.5887711307699045, "grad_norm": 0.7230653826251912, "learning_rate": 2.2846715328467157e-06, "loss": 0.617, "step": 20166 }, { "epoch": 0.5888003269977519, "grad_norm": 0.7642942605014111, "learning_rate": 2.2845093268450937e-06, "loss": 0.7076, "step": 20167 }, { "epoch": 0.5888295232255992, "grad_norm": 0.7307335902460312, "learning_rate": 2.2843471208434713e-06, "loss": 0.6599, "step": 20168 }, { "epoch": 0.5888587194534466, "grad_norm": 0.7518921560861908, "learning_rate": 2.2841849148418493e-06, "loss": 0.669, "step": 20169 }, { "epoch": 0.588887915681294, "grad_norm": 0.6902976723959938, "learning_rate": 2.2840227088402273e-06, "loss": 0.6169, "step": 20170 }, { "epoch": 0.5889171119091413, "grad_norm": 0.6549960662567677, "learning_rate": 2.2838605028386053e-06, "loss": 0.5045, "step": 20171 }, { "epoch": 0.5889463081369887, "grad_norm": 0.7459318044752193, "learning_rate": 2.283698296836983e-06, "loss": 0.6597, "step": 20172 }, { "epoch": 0.588975504364836, "grad_norm": 0.736026905744802, "learning_rate": 2.283536090835361e-06, "loss": 0.6612, "step": 20173 }, { "epoch": 0.5890047005926834, "grad_norm": 0.7022983535911252, "learning_rate": 2.2833738848337393e-06, "loss": 0.605, "step": 20174 }, { "epoch": 0.5890338968205308, "grad_norm": 0.7479431336848134, "learning_rate": 2.283211678832117e-06, "loss": 0.6678, "step": 20175 }, { "epoch": 0.5890630930483781, "grad_norm": 0.6824975879687463, "learning_rate": 2.283049472830495e-06, "loss": 0.6229, "step": 20176 }, { "epoch": 0.5890922892762255, "grad_norm": 0.7698762374230439, "learning_rate": 2.282887266828873e-06, "loss": 0.7311, "step": 20177 }, { "epoch": 0.5891214855040728, "grad_norm": 0.7516763253868554, "learning_rate": 2.282725060827251e-06, "loss": 0.6463, "step": 20178 }, { "epoch": 0.5891506817319202, "grad_norm": 0.743908241870841, "learning_rate": 2.2825628548256285e-06, "loss": 0.6966, "step": 20179 }, { "epoch": 0.5891798779597676, "grad_norm": 0.7152049434468403, "learning_rate": 2.2824006488240065e-06, "loss": 0.6162, "step": 20180 }, { "epoch": 0.5892090741876149, "grad_norm": 0.8351116944819057, "learning_rate": 2.2822384428223845e-06, "loss": 0.6384, "step": 20181 }, { "epoch": 0.5892382704154623, "grad_norm": 0.7378656318767098, "learning_rate": 2.2820762368207626e-06, "loss": 0.5794, "step": 20182 }, { "epoch": 0.5892674666433096, "grad_norm": 0.7090263150822192, "learning_rate": 2.2819140308191406e-06, "loss": 0.5653, "step": 20183 }, { "epoch": 0.589296662871157, "grad_norm": 0.6350886038239976, "learning_rate": 2.2817518248175186e-06, "loss": 0.5409, "step": 20184 }, { "epoch": 0.5893258590990044, "grad_norm": 0.7506245433516721, "learning_rate": 2.2815896188158966e-06, "loss": 0.6874, "step": 20185 }, { "epoch": 0.5893550553268517, "grad_norm": 0.7108700390689647, "learning_rate": 2.2814274128142746e-06, "loss": 0.6431, "step": 20186 }, { "epoch": 0.5893842515546991, "grad_norm": 0.7077203621679743, "learning_rate": 2.281265206812652e-06, "loss": 0.6578, "step": 20187 }, { "epoch": 0.5894134477825465, "grad_norm": 0.7307572458850514, "learning_rate": 2.28110300081103e-06, "loss": 0.6572, "step": 20188 }, { "epoch": 0.5894426440103938, "grad_norm": 0.7548027712426348, "learning_rate": 2.280940794809408e-06, "loss": 0.7332, "step": 20189 }, { "epoch": 0.5894718402382412, "grad_norm": 0.6917052099843715, "learning_rate": 2.280778588807786e-06, "loss": 0.5911, "step": 20190 }, { "epoch": 0.5895010364660885, "grad_norm": 0.6684417609001331, "learning_rate": 2.2806163828061638e-06, "loss": 0.5473, "step": 20191 }, { "epoch": 0.5895302326939359, "grad_norm": 0.811600202813758, "learning_rate": 2.2804541768045418e-06, "loss": 0.705, "step": 20192 }, { "epoch": 0.5895594289217833, "grad_norm": 0.6933915808832838, "learning_rate": 2.28029197080292e-06, "loss": 0.6027, "step": 20193 }, { "epoch": 0.5895886251496306, "grad_norm": 0.7337408823174678, "learning_rate": 2.2801297648012978e-06, "loss": 0.7204, "step": 20194 }, { "epoch": 0.589617821377478, "grad_norm": 0.6876879596930188, "learning_rate": 2.279967558799676e-06, "loss": 0.5848, "step": 20195 }, { "epoch": 0.5896470176053253, "grad_norm": 0.7091605573571397, "learning_rate": 2.279805352798054e-06, "loss": 0.6573, "step": 20196 }, { "epoch": 0.5896762138331727, "grad_norm": 0.7758352710609037, "learning_rate": 2.279643146796432e-06, "loss": 0.7179, "step": 20197 }, { "epoch": 0.5897054100610201, "grad_norm": 0.6907684931826593, "learning_rate": 2.2794809407948094e-06, "loss": 0.6179, "step": 20198 }, { "epoch": 0.5897346062888674, "grad_norm": 0.767031658162328, "learning_rate": 2.2793187347931874e-06, "loss": 0.694, "step": 20199 }, { "epoch": 0.5897638025167148, "grad_norm": 0.6757662681597684, "learning_rate": 2.2791565287915654e-06, "loss": 0.5724, "step": 20200 }, { "epoch": 0.5897929987445623, "grad_norm": 0.686454067405846, "learning_rate": 2.2789943227899434e-06, "loss": 0.6093, "step": 20201 }, { "epoch": 0.5898221949724096, "grad_norm": 0.6686806945896275, "learning_rate": 2.2788321167883214e-06, "loss": 0.5722, "step": 20202 }, { "epoch": 0.589851391200257, "grad_norm": 0.7623978674403703, "learning_rate": 2.2786699107866994e-06, "loss": 0.6545, "step": 20203 }, { "epoch": 0.5898805874281043, "grad_norm": 0.7268117756609326, "learning_rate": 2.2785077047850774e-06, "loss": 0.5869, "step": 20204 }, { "epoch": 0.5899097836559517, "grad_norm": 0.702548128251673, "learning_rate": 2.2783454987834554e-06, "loss": 0.6304, "step": 20205 }, { "epoch": 0.5899389798837991, "grad_norm": 0.8235513115002635, "learning_rate": 2.278183292781833e-06, "loss": 0.6715, "step": 20206 }, { "epoch": 0.5899681761116464, "grad_norm": 0.7544048441306154, "learning_rate": 2.278021086780211e-06, "loss": 0.6183, "step": 20207 }, { "epoch": 0.5899973723394938, "grad_norm": 0.7439474582954672, "learning_rate": 2.277858880778589e-06, "loss": 0.6892, "step": 20208 }, { "epoch": 0.5900265685673411, "grad_norm": 0.6794640900478274, "learning_rate": 2.277696674776967e-06, "loss": 0.5733, "step": 20209 }, { "epoch": 0.5900557647951885, "grad_norm": 0.7364775160156365, "learning_rate": 2.2775344687753446e-06, "loss": 0.6017, "step": 20210 }, { "epoch": 0.5900849610230359, "grad_norm": 0.7377290380775979, "learning_rate": 2.2773722627737226e-06, "loss": 0.6609, "step": 20211 }, { "epoch": 0.5901141572508832, "grad_norm": 0.6964541471223492, "learning_rate": 2.277210056772101e-06, "loss": 0.6633, "step": 20212 }, { "epoch": 0.5901433534787306, "grad_norm": 0.7882066438080675, "learning_rate": 2.2770478507704786e-06, "loss": 0.6704, "step": 20213 }, { "epoch": 0.590172549706578, "grad_norm": 0.6971207581705237, "learning_rate": 2.2768856447688567e-06, "loss": 0.5807, "step": 20214 }, { "epoch": 0.5902017459344253, "grad_norm": 0.7606370745273988, "learning_rate": 2.2767234387672347e-06, "loss": 0.6584, "step": 20215 }, { "epoch": 0.5902309421622727, "grad_norm": 0.6573372419310903, "learning_rate": 2.2765612327656127e-06, "loss": 0.5806, "step": 20216 }, { "epoch": 0.59026013839012, "grad_norm": 0.7647615671348733, "learning_rate": 2.2763990267639903e-06, "loss": 0.7588, "step": 20217 }, { "epoch": 0.5902893346179674, "grad_norm": 0.705236101152285, "learning_rate": 2.2762368207623683e-06, "loss": 0.5999, "step": 20218 }, { "epoch": 0.5903185308458148, "grad_norm": 0.7064132510438641, "learning_rate": 2.2760746147607463e-06, "loss": 0.5843, "step": 20219 }, { "epoch": 0.5903477270736621, "grad_norm": 0.6870697922892199, "learning_rate": 2.2759124087591243e-06, "loss": 0.5985, "step": 20220 }, { "epoch": 0.5903769233015095, "grad_norm": 0.791097912727327, "learning_rate": 2.2757502027575023e-06, "loss": 0.6916, "step": 20221 }, { "epoch": 0.5904061195293568, "grad_norm": 0.7187229519756532, "learning_rate": 2.2755879967558803e-06, "loss": 0.6277, "step": 20222 }, { "epoch": 0.5904353157572042, "grad_norm": 0.7260213556055513, "learning_rate": 2.2754257907542583e-06, "loss": 0.6115, "step": 20223 }, { "epoch": 0.5904645119850516, "grad_norm": 0.7140043563341691, "learning_rate": 2.2752635847526363e-06, "loss": 0.6162, "step": 20224 }, { "epoch": 0.5904937082128989, "grad_norm": 0.7261489224102708, "learning_rate": 2.275101378751014e-06, "loss": 0.6621, "step": 20225 }, { "epoch": 0.5905229044407463, "grad_norm": 0.7066842462580933, "learning_rate": 2.274939172749392e-06, "loss": 0.6102, "step": 20226 }, { "epoch": 0.5905521006685936, "grad_norm": 0.6924144509325483, "learning_rate": 2.27477696674777e-06, "loss": 0.6313, "step": 20227 }, { "epoch": 0.590581296896441, "grad_norm": 0.7291365937577057, "learning_rate": 2.274614760746148e-06, "loss": 0.5936, "step": 20228 }, { "epoch": 0.5906104931242884, "grad_norm": 0.7833964106661049, "learning_rate": 2.2744525547445255e-06, "loss": 0.6217, "step": 20229 }, { "epoch": 0.5906396893521357, "grad_norm": 0.7222770075398472, "learning_rate": 2.2742903487429035e-06, "loss": 0.6285, "step": 20230 }, { "epoch": 0.5906688855799831, "grad_norm": 0.757356590713975, "learning_rate": 2.274128142741282e-06, "loss": 0.6971, "step": 20231 }, { "epoch": 0.5906980818078305, "grad_norm": 0.6842172731344129, "learning_rate": 2.2739659367396595e-06, "loss": 0.6033, "step": 20232 }, { "epoch": 0.5907272780356778, "grad_norm": 0.7110636520933649, "learning_rate": 2.2738037307380375e-06, "loss": 0.6629, "step": 20233 }, { "epoch": 0.5907564742635252, "grad_norm": 0.6527431063536143, "learning_rate": 2.2736415247364155e-06, "loss": 0.5563, "step": 20234 }, { "epoch": 0.5907856704913725, "grad_norm": 0.7422102468552201, "learning_rate": 2.2734793187347935e-06, "loss": 0.666, "step": 20235 }, { "epoch": 0.5908148667192199, "grad_norm": 0.7657223203756709, "learning_rate": 2.273317112733171e-06, "loss": 0.7294, "step": 20236 }, { "epoch": 0.5908440629470673, "grad_norm": 0.6499554592277531, "learning_rate": 2.273154906731549e-06, "loss": 0.5382, "step": 20237 }, { "epoch": 0.5908732591749146, "grad_norm": 0.6851193476153324, "learning_rate": 2.272992700729927e-06, "loss": 0.6084, "step": 20238 }, { "epoch": 0.590902455402762, "grad_norm": 0.69408712861941, "learning_rate": 2.272830494728305e-06, "loss": 0.5601, "step": 20239 }, { "epoch": 0.5909316516306093, "grad_norm": 0.6924798386368967, "learning_rate": 2.272668288726683e-06, "loss": 0.6607, "step": 20240 }, { "epoch": 0.5909608478584567, "grad_norm": 0.7071552907922871, "learning_rate": 2.272506082725061e-06, "loss": 0.6435, "step": 20241 }, { "epoch": 0.5909900440863041, "grad_norm": 0.8861919729233199, "learning_rate": 2.272343876723439e-06, "loss": 0.6494, "step": 20242 }, { "epoch": 0.5910192403141514, "grad_norm": 0.6789234268317016, "learning_rate": 2.272181670721817e-06, "loss": 0.5231, "step": 20243 }, { "epoch": 0.5910484365419988, "grad_norm": 0.8774716881277779, "learning_rate": 2.2720194647201947e-06, "loss": 0.6837, "step": 20244 }, { "epoch": 0.5910776327698462, "grad_norm": 0.7581997850763564, "learning_rate": 2.2718572587185727e-06, "loss": 0.6975, "step": 20245 }, { "epoch": 0.5911068289976935, "grad_norm": 0.7026884553467416, "learning_rate": 2.2716950527169508e-06, "loss": 0.6001, "step": 20246 }, { "epoch": 0.5911360252255409, "grad_norm": 0.6565487564413925, "learning_rate": 2.2715328467153288e-06, "loss": 0.5407, "step": 20247 }, { "epoch": 0.5911652214533882, "grad_norm": 0.7761214876609064, "learning_rate": 2.2713706407137063e-06, "loss": 0.7316, "step": 20248 }, { "epoch": 0.5911944176812356, "grad_norm": 0.704688279193922, "learning_rate": 2.2712084347120844e-06, "loss": 0.629, "step": 20249 }, { "epoch": 0.591223613909083, "grad_norm": 0.7428455404722841, "learning_rate": 2.2710462287104628e-06, "loss": 0.635, "step": 20250 }, { "epoch": 0.5912528101369303, "grad_norm": 0.6638107620207697, "learning_rate": 2.2708840227088404e-06, "loss": 0.5767, "step": 20251 }, { "epoch": 0.5912820063647777, "grad_norm": 0.8217875752703766, "learning_rate": 2.2707218167072184e-06, "loss": 0.6964, "step": 20252 }, { "epoch": 0.591311202592625, "grad_norm": 0.7231577230722271, "learning_rate": 2.2705596107055964e-06, "loss": 0.6328, "step": 20253 }, { "epoch": 0.5913403988204724, "grad_norm": 0.6672457775780444, "learning_rate": 2.2703974047039744e-06, "loss": 0.5894, "step": 20254 }, { "epoch": 0.5913695950483198, "grad_norm": 0.7641567139009727, "learning_rate": 2.270235198702352e-06, "loss": 0.6305, "step": 20255 }, { "epoch": 0.5913987912761671, "grad_norm": 0.7441664749060087, "learning_rate": 2.27007299270073e-06, "loss": 0.6593, "step": 20256 }, { "epoch": 0.5914279875040145, "grad_norm": 0.6981095444628743, "learning_rate": 2.269910786699108e-06, "loss": 0.6042, "step": 20257 }, { "epoch": 0.5914571837318618, "grad_norm": 0.6962898623435007, "learning_rate": 2.269748580697486e-06, "loss": 0.5862, "step": 20258 }, { "epoch": 0.5914863799597092, "grad_norm": 0.7620379233553056, "learning_rate": 2.269586374695864e-06, "loss": 0.6707, "step": 20259 }, { "epoch": 0.5915155761875566, "grad_norm": 0.700558090537968, "learning_rate": 2.269424168694242e-06, "loss": 0.6264, "step": 20260 }, { "epoch": 0.5915447724154039, "grad_norm": 0.6683220469905521, "learning_rate": 2.26926196269262e-06, "loss": 0.5443, "step": 20261 }, { "epoch": 0.5915739686432513, "grad_norm": 0.6470320834787212, "learning_rate": 2.2690997566909976e-06, "loss": 0.5544, "step": 20262 }, { "epoch": 0.5916031648710987, "grad_norm": 0.7343393940256498, "learning_rate": 2.2689375506893756e-06, "loss": 0.6196, "step": 20263 }, { "epoch": 0.591632361098946, "grad_norm": 0.7943176959606127, "learning_rate": 2.2687753446877536e-06, "loss": 0.7794, "step": 20264 }, { "epoch": 0.5916615573267934, "grad_norm": 0.6782095306711261, "learning_rate": 2.2686131386861316e-06, "loss": 0.5927, "step": 20265 }, { "epoch": 0.5916907535546407, "grad_norm": 0.7823428472506568, "learning_rate": 2.2684509326845096e-06, "loss": 0.6651, "step": 20266 }, { "epoch": 0.5917199497824881, "grad_norm": 0.6923561859897909, "learning_rate": 2.268288726682887e-06, "loss": 0.5817, "step": 20267 }, { "epoch": 0.5917491460103355, "grad_norm": 0.7771901076048007, "learning_rate": 2.268126520681265e-06, "loss": 0.6775, "step": 20268 }, { "epoch": 0.5917783422381828, "grad_norm": 0.6767553158701954, "learning_rate": 2.2679643146796436e-06, "loss": 0.5749, "step": 20269 }, { "epoch": 0.5918075384660302, "grad_norm": 0.6867410226570074, "learning_rate": 2.2678021086780212e-06, "loss": 0.563, "step": 20270 }, { "epoch": 0.5918367346938775, "grad_norm": 0.7627653843959834, "learning_rate": 2.2676399026763992e-06, "loss": 0.7249, "step": 20271 }, { "epoch": 0.5918659309217249, "grad_norm": 0.7360372727225017, "learning_rate": 2.2674776966747772e-06, "loss": 0.6443, "step": 20272 }, { "epoch": 0.5918951271495723, "grad_norm": 0.7124347777552458, "learning_rate": 2.2673154906731552e-06, "loss": 0.6222, "step": 20273 }, { "epoch": 0.5919243233774196, "grad_norm": 0.7154488930007832, "learning_rate": 2.267153284671533e-06, "loss": 0.6238, "step": 20274 }, { "epoch": 0.591953519605267, "grad_norm": 0.7403574410753792, "learning_rate": 2.266991078669911e-06, "loss": 0.6993, "step": 20275 }, { "epoch": 0.5919827158331143, "grad_norm": 0.675912880889227, "learning_rate": 2.266828872668289e-06, "loss": 0.5809, "step": 20276 }, { "epoch": 0.5920119120609617, "grad_norm": 0.7034632514083099, "learning_rate": 2.266666666666667e-06, "loss": 0.5961, "step": 20277 }, { "epoch": 0.5920411082888091, "grad_norm": 0.6576456390069016, "learning_rate": 2.266504460665045e-06, "loss": 0.517, "step": 20278 }, { "epoch": 0.5920703045166564, "grad_norm": 0.7959473374539613, "learning_rate": 2.266342254663423e-06, "loss": 0.6997, "step": 20279 }, { "epoch": 0.5920995007445038, "grad_norm": 0.6944591822094174, "learning_rate": 2.266180048661801e-06, "loss": 0.5904, "step": 20280 }, { "epoch": 0.5921286969723512, "grad_norm": 0.7486639249276098, "learning_rate": 2.2660178426601785e-06, "loss": 0.6263, "step": 20281 }, { "epoch": 0.5921578932001985, "grad_norm": 0.7391411044914031, "learning_rate": 2.2658556366585565e-06, "loss": 0.6579, "step": 20282 }, { "epoch": 0.5921870894280459, "grad_norm": 0.8079320385092545, "learning_rate": 2.2656934306569345e-06, "loss": 0.7456, "step": 20283 }, { "epoch": 0.5922162856558932, "grad_norm": 0.7721051653955363, "learning_rate": 2.2655312246553125e-06, "loss": 0.7017, "step": 20284 }, { "epoch": 0.5922454818837406, "grad_norm": 0.6555398560697446, "learning_rate": 2.2653690186536905e-06, "loss": 0.5795, "step": 20285 }, { "epoch": 0.592274678111588, "grad_norm": 0.9040484151125048, "learning_rate": 2.265206812652068e-06, "loss": 0.6962, "step": 20286 }, { "epoch": 0.5923038743394353, "grad_norm": 0.7943776016296111, "learning_rate": 2.265044606650446e-06, "loss": 0.6647, "step": 20287 }, { "epoch": 0.5923330705672827, "grad_norm": 0.7957753779262888, "learning_rate": 2.2648824006488245e-06, "loss": 0.7065, "step": 20288 }, { "epoch": 0.59236226679513, "grad_norm": 0.7386488212429834, "learning_rate": 2.264720194647202e-06, "loss": 0.6674, "step": 20289 }, { "epoch": 0.5923914630229774, "grad_norm": 0.7311496000381693, "learning_rate": 2.26455798864558e-06, "loss": 0.6474, "step": 20290 }, { "epoch": 0.5924206592508248, "grad_norm": 0.6940152075214117, "learning_rate": 2.264395782643958e-06, "loss": 0.5969, "step": 20291 }, { "epoch": 0.5924498554786721, "grad_norm": 0.6924726908114057, "learning_rate": 2.264233576642336e-06, "loss": 0.6435, "step": 20292 }, { "epoch": 0.5924790517065195, "grad_norm": 0.6995892424127336, "learning_rate": 2.2640713706407137e-06, "loss": 0.6051, "step": 20293 }, { "epoch": 0.5925082479343668, "grad_norm": 0.8854113337656274, "learning_rate": 2.2639091646390917e-06, "loss": 0.6788, "step": 20294 }, { "epoch": 0.5925374441622142, "grad_norm": 0.7160367817750196, "learning_rate": 2.2637469586374697e-06, "loss": 0.5878, "step": 20295 }, { "epoch": 0.5925666403900616, "grad_norm": 0.7355202504621702, "learning_rate": 2.2635847526358477e-06, "loss": 0.6422, "step": 20296 }, { "epoch": 0.5925958366179089, "grad_norm": 0.7314471403473191, "learning_rate": 2.2634225466342257e-06, "loss": 0.683, "step": 20297 }, { "epoch": 0.5926250328457563, "grad_norm": 0.6751936132001674, "learning_rate": 2.2632603406326037e-06, "loss": 0.5697, "step": 20298 }, { "epoch": 0.5926542290736037, "grad_norm": 0.7059975164397968, "learning_rate": 2.2630981346309817e-06, "loss": 0.58, "step": 20299 }, { "epoch": 0.592683425301451, "grad_norm": 0.6701394066211702, "learning_rate": 2.2629359286293593e-06, "loss": 0.586, "step": 20300 }, { "epoch": 0.5927126215292984, "grad_norm": 0.7006259277620841, "learning_rate": 2.2627737226277373e-06, "loss": 0.5591, "step": 20301 }, { "epoch": 0.5927418177571457, "grad_norm": 0.7186107185874686, "learning_rate": 2.2626115166261153e-06, "loss": 0.6252, "step": 20302 }, { "epoch": 0.5927710139849931, "grad_norm": 0.8141505345049946, "learning_rate": 2.2624493106244933e-06, "loss": 0.7409, "step": 20303 }, { "epoch": 0.5928002102128405, "grad_norm": 0.7315009767675568, "learning_rate": 2.2622871046228713e-06, "loss": 0.6801, "step": 20304 }, { "epoch": 0.5928294064406878, "grad_norm": 0.7050769419859432, "learning_rate": 2.262124898621249e-06, "loss": 0.5826, "step": 20305 }, { "epoch": 0.5928586026685352, "grad_norm": 0.7722246867491223, "learning_rate": 2.261962692619627e-06, "loss": 0.6647, "step": 20306 }, { "epoch": 0.5928877988963825, "grad_norm": 0.7641065904446167, "learning_rate": 2.2618004866180054e-06, "loss": 0.6857, "step": 20307 }, { "epoch": 0.5929169951242299, "grad_norm": 0.7052379285470846, "learning_rate": 2.261638280616383e-06, "loss": 0.6149, "step": 20308 }, { "epoch": 0.5929461913520773, "grad_norm": 0.7149198809364838, "learning_rate": 2.261476074614761e-06, "loss": 0.5978, "step": 20309 }, { "epoch": 0.5929753875799246, "grad_norm": 0.7201923811743904, "learning_rate": 2.261313868613139e-06, "loss": 0.599, "step": 20310 }, { "epoch": 0.593004583807772, "grad_norm": 0.7409319050251468, "learning_rate": 2.261151662611517e-06, "loss": 0.6371, "step": 20311 }, { "epoch": 0.5930337800356194, "grad_norm": 0.7599967291116244, "learning_rate": 2.2609894566098945e-06, "loss": 0.6655, "step": 20312 }, { "epoch": 0.5930629762634667, "grad_norm": 0.7939546051947711, "learning_rate": 2.2608272506082726e-06, "loss": 0.6262, "step": 20313 }, { "epoch": 0.5930921724913141, "grad_norm": 0.7332271212574538, "learning_rate": 2.2606650446066506e-06, "loss": 0.6277, "step": 20314 }, { "epoch": 0.5931213687191614, "grad_norm": 0.6772960099089564, "learning_rate": 2.2605028386050286e-06, "loss": 0.5468, "step": 20315 }, { "epoch": 0.5931505649470088, "grad_norm": 0.7168189165525578, "learning_rate": 2.2603406326034066e-06, "loss": 0.5729, "step": 20316 }, { "epoch": 0.5931797611748562, "grad_norm": 0.7836241132166748, "learning_rate": 2.2601784266017846e-06, "loss": 0.7042, "step": 20317 }, { "epoch": 0.5932089574027035, "grad_norm": 0.7196811923104109, "learning_rate": 2.2600162206001626e-06, "loss": 0.6365, "step": 20318 }, { "epoch": 0.5932381536305509, "grad_norm": 0.8594621528820218, "learning_rate": 2.25985401459854e-06, "loss": 0.7416, "step": 20319 }, { "epoch": 0.5932673498583982, "grad_norm": 0.6944779156214251, "learning_rate": 2.259691808596918e-06, "loss": 0.6154, "step": 20320 }, { "epoch": 0.5932965460862456, "grad_norm": 0.7081461339073025, "learning_rate": 2.259529602595296e-06, "loss": 0.6833, "step": 20321 }, { "epoch": 0.5933257423140931, "grad_norm": 0.7139085222132322, "learning_rate": 2.259367396593674e-06, "loss": 0.6548, "step": 20322 }, { "epoch": 0.5933549385419404, "grad_norm": 0.6958492306997478, "learning_rate": 2.259205190592052e-06, "loss": 0.6334, "step": 20323 }, { "epoch": 0.5933841347697878, "grad_norm": 0.6887814033631011, "learning_rate": 2.2590429845904298e-06, "loss": 0.5858, "step": 20324 }, { "epoch": 0.5934133309976352, "grad_norm": 1.118079832928421, "learning_rate": 2.2588807785888078e-06, "loss": 0.7891, "step": 20325 }, { "epoch": 0.5934425272254825, "grad_norm": 0.7564442122199839, "learning_rate": 2.2587185725871862e-06, "loss": 0.7043, "step": 20326 }, { "epoch": 0.5934717234533299, "grad_norm": 0.6651163810650579, "learning_rate": 2.258556366585564e-06, "loss": 0.5669, "step": 20327 }, { "epoch": 0.5935009196811772, "grad_norm": 0.756199840644724, "learning_rate": 2.258394160583942e-06, "loss": 0.6659, "step": 20328 }, { "epoch": 0.5935301159090246, "grad_norm": 0.7234742251517454, "learning_rate": 2.25823195458232e-06, "loss": 0.6083, "step": 20329 }, { "epoch": 0.593559312136872, "grad_norm": 0.7136175990861052, "learning_rate": 2.258069748580698e-06, "loss": 0.6419, "step": 20330 }, { "epoch": 0.5935885083647193, "grad_norm": 0.7251602824269218, "learning_rate": 2.2579075425790754e-06, "loss": 0.6135, "step": 20331 }, { "epoch": 0.5936177045925667, "grad_norm": 0.6990876772460566, "learning_rate": 2.2577453365774534e-06, "loss": 0.5703, "step": 20332 }, { "epoch": 0.593646900820414, "grad_norm": 0.809395937954271, "learning_rate": 2.2575831305758314e-06, "loss": 0.6817, "step": 20333 }, { "epoch": 0.5936760970482614, "grad_norm": 0.7310713399966043, "learning_rate": 2.2574209245742094e-06, "loss": 0.6592, "step": 20334 }, { "epoch": 0.5937052932761088, "grad_norm": 0.7706915397613266, "learning_rate": 2.2572587185725874e-06, "loss": 0.6642, "step": 20335 }, { "epoch": 0.5937344895039561, "grad_norm": 0.7305709420254047, "learning_rate": 2.2570965125709654e-06, "loss": 0.6547, "step": 20336 }, { "epoch": 0.5937636857318035, "grad_norm": 0.7562145323687385, "learning_rate": 2.2569343065693434e-06, "loss": 0.6564, "step": 20337 }, { "epoch": 0.5937928819596509, "grad_norm": 0.7398081369139126, "learning_rate": 2.256772100567721e-06, "loss": 0.6755, "step": 20338 }, { "epoch": 0.5938220781874982, "grad_norm": 0.7485516543601831, "learning_rate": 2.256609894566099e-06, "loss": 0.7005, "step": 20339 }, { "epoch": 0.5938512744153456, "grad_norm": 0.7282548023872032, "learning_rate": 2.256447688564477e-06, "loss": 0.6316, "step": 20340 }, { "epoch": 0.5938804706431929, "grad_norm": 0.7702356628150778, "learning_rate": 2.256285482562855e-06, "loss": 0.7224, "step": 20341 }, { "epoch": 0.5939096668710403, "grad_norm": 0.7114759452409893, "learning_rate": 2.256123276561233e-06, "loss": 0.5811, "step": 20342 }, { "epoch": 0.5939388630988877, "grad_norm": 0.692908570859339, "learning_rate": 2.2559610705596106e-06, "loss": 0.5966, "step": 20343 }, { "epoch": 0.593968059326735, "grad_norm": 0.7294007825439529, "learning_rate": 2.255798864557989e-06, "loss": 0.63, "step": 20344 }, { "epoch": 0.5939972555545824, "grad_norm": 0.7232134054866289, "learning_rate": 2.255636658556367e-06, "loss": 0.6537, "step": 20345 }, { "epoch": 0.5940264517824297, "grad_norm": 0.6757558585063799, "learning_rate": 2.2554744525547447e-06, "loss": 0.5957, "step": 20346 }, { "epoch": 0.5940556480102771, "grad_norm": 0.7982457431608398, "learning_rate": 2.2553122465531227e-06, "loss": 0.6802, "step": 20347 }, { "epoch": 0.5940848442381245, "grad_norm": 0.6874511383224053, "learning_rate": 2.2551500405515007e-06, "loss": 0.5971, "step": 20348 }, { "epoch": 0.5941140404659718, "grad_norm": 0.6970723312375355, "learning_rate": 2.2549878345498787e-06, "loss": 0.6327, "step": 20349 }, { "epoch": 0.5941432366938192, "grad_norm": 0.7169703374018297, "learning_rate": 2.2548256285482563e-06, "loss": 0.6611, "step": 20350 }, { "epoch": 0.5941724329216665, "grad_norm": 0.7614749224756496, "learning_rate": 2.2546634225466343e-06, "loss": 0.6416, "step": 20351 }, { "epoch": 0.5942016291495139, "grad_norm": 0.7361631147484935, "learning_rate": 2.2545012165450123e-06, "loss": 0.6558, "step": 20352 }, { "epoch": 0.5942308253773613, "grad_norm": 0.7107042935724441, "learning_rate": 2.2543390105433903e-06, "loss": 0.6145, "step": 20353 }, { "epoch": 0.5942600216052086, "grad_norm": 0.7275213453753875, "learning_rate": 2.2541768045417683e-06, "loss": 0.6529, "step": 20354 }, { "epoch": 0.594289217833056, "grad_norm": 0.7314441346604303, "learning_rate": 2.2540145985401463e-06, "loss": 0.6818, "step": 20355 }, { "epoch": 0.5943184140609034, "grad_norm": 0.7736150759796142, "learning_rate": 2.2538523925385243e-06, "loss": 0.7523, "step": 20356 }, { "epoch": 0.5943476102887507, "grad_norm": 0.7112631015138755, "learning_rate": 2.253690186536902e-06, "loss": 0.6634, "step": 20357 }, { "epoch": 0.5943768065165981, "grad_norm": 0.705283806463998, "learning_rate": 2.25352798053528e-06, "loss": 0.6337, "step": 20358 }, { "epoch": 0.5944060027444454, "grad_norm": 0.711917232920193, "learning_rate": 2.253365774533658e-06, "loss": 0.614, "step": 20359 }, { "epoch": 0.5944351989722928, "grad_norm": 0.7071028376689552, "learning_rate": 2.253203568532036e-06, "loss": 0.6494, "step": 20360 }, { "epoch": 0.5944643952001402, "grad_norm": 0.6985582830075198, "learning_rate": 2.253041362530414e-06, "loss": 0.58, "step": 20361 }, { "epoch": 0.5944935914279875, "grad_norm": 0.6848167579336926, "learning_rate": 2.2528791565287915e-06, "loss": 0.5881, "step": 20362 }, { "epoch": 0.5945227876558349, "grad_norm": 0.6892040217633499, "learning_rate": 2.25271695052717e-06, "loss": 0.5876, "step": 20363 }, { "epoch": 0.5945519838836822, "grad_norm": 0.7330633736119787, "learning_rate": 2.252554744525548e-06, "loss": 0.6999, "step": 20364 }, { "epoch": 0.5945811801115296, "grad_norm": 0.7319146650344955, "learning_rate": 2.2523925385239255e-06, "loss": 0.6446, "step": 20365 }, { "epoch": 0.594610376339377, "grad_norm": 0.7242610337572856, "learning_rate": 2.2522303325223035e-06, "loss": 0.6138, "step": 20366 }, { "epoch": 0.5946395725672243, "grad_norm": 0.7677539252625513, "learning_rate": 2.2520681265206815e-06, "loss": 0.6075, "step": 20367 }, { "epoch": 0.5946687687950717, "grad_norm": 0.9128484231647271, "learning_rate": 2.2519059205190595e-06, "loss": 0.7751, "step": 20368 }, { "epoch": 0.594697965022919, "grad_norm": 0.7067357841536298, "learning_rate": 2.251743714517437e-06, "loss": 0.6796, "step": 20369 }, { "epoch": 0.5947271612507664, "grad_norm": 0.749744847940763, "learning_rate": 2.251581508515815e-06, "loss": 0.6829, "step": 20370 }, { "epoch": 0.5947563574786138, "grad_norm": 0.8092893935556439, "learning_rate": 2.251419302514193e-06, "loss": 0.7114, "step": 20371 }, { "epoch": 0.5947855537064611, "grad_norm": 0.7553487172140223, "learning_rate": 2.251257096512571e-06, "loss": 0.6998, "step": 20372 }, { "epoch": 0.5948147499343085, "grad_norm": 0.7003398544178454, "learning_rate": 2.251094890510949e-06, "loss": 0.647, "step": 20373 }, { "epoch": 0.5948439461621559, "grad_norm": 0.7500905174620676, "learning_rate": 2.250932684509327e-06, "loss": 0.6936, "step": 20374 }, { "epoch": 0.5948731423900032, "grad_norm": 0.6725456831228671, "learning_rate": 2.250770478507705e-06, "loss": 0.576, "step": 20375 }, { "epoch": 0.5949023386178506, "grad_norm": 0.742252000239746, "learning_rate": 2.2506082725060827e-06, "loss": 0.6386, "step": 20376 }, { "epoch": 0.5949315348456979, "grad_norm": 0.8652817470792522, "learning_rate": 2.2504460665044608e-06, "loss": 0.5792, "step": 20377 }, { "epoch": 0.5949607310735453, "grad_norm": 0.7424886098245593, "learning_rate": 2.2502838605028388e-06, "loss": 0.6867, "step": 20378 }, { "epoch": 0.5949899273013927, "grad_norm": 0.732573807919331, "learning_rate": 2.2501216545012168e-06, "loss": 0.6583, "step": 20379 }, { "epoch": 0.59501912352924, "grad_norm": 0.773948358837164, "learning_rate": 2.2499594484995948e-06, "loss": 0.6738, "step": 20380 }, { "epoch": 0.5950483197570874, "grad_norm": 0.806097559998952, "learning_rate": 2.2497972424979724e-06, "loss": 0.6823, "step": 20381 }, { "epoch": 0.5950775159849347, "grad_norm": 0.6961724800075998, "learning_rate": 2.2496350364963508e-06, "loss": 0.6186, "step": 20382 }, { "epoch": 0.5951067122127821, "grad_norm": 0.6886924834382102, "learning_rate": 2.249472830494729e-06, "loss": 0.6291, "step": 20383 }, { "epoch": 0.5951359084406295, "grad_norm": 0.7380790225789553, "learning_rate": 2.2493106244931064e-06, "loss": 0.6537, "step": 20384 }, { "epoch": 0.5951651046684768, "grad_norm": 0.6804700973813104, "learning_rate": 2.2491484184914844e-06, "loss": 0.5592, "step": 20385 }, { "epoch": 0.5951943008963242, "grad_norm": 0.7330182284644965, "learning_rate": 2.2489862124898624e-06, "loss": 0.6432, "step": 20386 }, { "epoch": 0.5952234971241716, "grad_norm": 0.8236990365491671, "learning_rate": 2.2488240064882404e-06, "loss": 0.6621, "step": 20387 }, { "epoch": 0.5952526933520189, "grad_norm": 0.7327958911953246, "learning_rate": 2.248661800486618e-06, "loss": 0.6337, "step": 20388 }, { "epoch": 0.5952818895798663, "grad_norm": 0.6910539965590532, "learning_rate": 2.248499594484996e-06, "loss": 0.5535, "step": 20389 }, { "epoch": 0.5953110858077136, "grad_norm": 0.7490593168240359, "learning_rate": 2.248337388483374e-06, "loss": 0.6213, "step": 20390 }, { "epoch": 0.595340282035561, "grad_norm": 0.6890616004401152, "learning_rate": 2.248175182481752e-06, "loss": 0.6128, "step": 20391 }, { "epoch": 0.5953694782634084, "grad_norm": 0.7522365520649762, "learning_rate": 2.24801297648013e-06, "loss": 0.6685, "step": 20392 }, { "epoch": 0.5953986744912557, "grad_norm": 0.8007578151711959, "learning_rate": 2.247850770478508e-06, "loss": 0.7167, "step": 20393 }, { "epoch": 0.5954278707191031, "grad_norm": 0.7467712199411112, "learning_rate": 2.247688564476886e-06, "loss": 0.6834, "step": 20394 }, { "epoch": 0.5954570669469504, "grad_norm": 0.7556177702720298, "learning_rate": 2.2475263584752636e-06, "loss": 0.6775, "step": 20395 }, { "epoch": 0.5954862631747978, "grad_norm": 0.734597746676197, "learning_rate": 2.2473641524736416e-06, "loss": 0.6637, "step": 20396 }, { "epoch": 0.5955154594026452, "grad_norm": 0.6820896941164736, "learning_rate": 2.2472019464720196e-06, "loss": 0.5587, "step": 20397 }, { "epoch": 0.5955446556304925, "grad_norm": 0.728225598814072, "learning_rate": 2.2470397404703976e-06, "loss": 0.6415, "step": 20398 }, { "epoch": 0.5955738518583399, "grad_norm": 0.727790280935696, "learning_rate": 2.2468775344687756e-06, "loss": 0.6356, "step": 20399 }, { "epoch": 0.5956030480861872, "grad_norm": 0.7953669131767493, "learning_rate": 2.2467153284671532e-06, "loss": 0.7845, "step": 20400 }, { "epoch": 0.5956322443140346, "grad_norm": 0.7830843715219009, "learning_rate": 2.2465531224655316e-06, "loss": 0.62, "step": 20401 }, { "epoch": 0.595661440541882, "grad_norm": 0.7842075307550468, "learning_rate": 2.2463909164639096e-06, "loss": 0.7477, "step": 20402 }, { "epoch": 0.5956906367697293, "grad_norm": 0.691848298097129, "learning_rate": 2.2462287104622872e-06, "loss": 0.5559, "step": 20403 }, { "epoch": 0.5957198329975767, "grad_norm": 0.8476786961290779, "learning_rate": 2.2460665044606652e-06, "loss": 0.6963, "step": 20404 }, { "epoch": 0.595749029225424, "grad_norm": 0.7587247060888721, "learning_rate": 2.2459042984590432e-06, "loss": 0.7193, "step": 20405 }, { "epoch": 0.5957782254532714, "grad_norm": 0.7707692799645165, "learning_rate": 2.2457420924574213e-06, "loss": 0.7282, "step": 20406 }, { "epoch": 0.5958074216811188, "grad_norm": 0.7546793355254958, "learning_rate": 2.245579886455799e-06, "loss": 0.675, "step": 20407 }, { "epoch": 0.5958366179089661, "grad_norm": 0.8149162753978159, "learning_rate": 2.245417680454177e-06, "loss": 0.6365, "step": 20408 }, { "epoch": 0.5958658141368135, "grad_norm": 0.7258136231374442, "learning_rate": 2.245255474452555e-06, "loss": 0.595, "step": 20409 }, { "epoch": 0.5958950103646609, "grad_norm": 0.7355014976894503, "learning_rate": 2.245093268450933e-06, "loss": 0.7614, "step": 20410 }, { "epoch": 0.5959242065925082, "grad_norm": 0.7117522280927844, "learning_rate": 2.244931062449311e-06, "loss": 0.6183, "step": 20411 }, { "epoch": 0.5959534028203556, "grad_norm": 0.7566503864985338, "learning_rate": 2.244768856447689e-06, "loss": 0.6301, "step": 20412 }, { "epoch": 0.5959825990482029, "grad_norm": 0.7271459273654921, "learning_rate": 2.244606650446067e-06, "loss": 0.5384, "step": 20413 }, { "epoch": 0.5960117952760503, "grad_norm": 0.7997526294345441, "learning_rate": 2.2444444444444445e-06, "loss": 0.7654, "step": 20414 }, { "epoch": 0.5960409915038977, "grad_norm": 0.7242550892591971, "learning_rate": 2.2442822384428225e-06, "loss": 0.652, "step": 20415 }, { "epoch": 0.596070187731745, "grad_norm": 0.6918217257843018, "learning_rate": 2.2441200324412005e-06, "loss": 0.56, "step": 20416 }, { "epoch": 0.5960993839595924, "grad_norm": 0.7272380109415787, "learning_rate": 2.2439578264395785e-06, "loss": 0.6472, "step": 20417 }, { "epoch": 0.5961285801874397, "grad_norm": 0.729337824333098, "learning_rate": 2.2437956204379565e-06, "loss": 0.5817, "step": 20418 }, { "epoch": 0.5961577764152871, "grad_norm": 0.7445963281841337, "learning_rate": 2.243633414436334e-06, "loss": 0.7182, "step": 20419 }, { "epoch": 0.5961869726431345, "grad_norm": 0.7651281066366727, "learning_rate": 2.2434712084347125e-06, "loss": 0.6947, "step": 20420 }, { "epoch": 0.5962161688709818, "grad_norm": 0.8019244583112534, "learning_rate": 2.2433090024330905e-06, "loss": 0.7055, "step": 20421 }, { "epoch": 0.5962453650988292, "grad_norm": 0.7250362996934087, "learning_rate": 2.243146796431468e-06, "loss": 0.642, "step": 20422 }, { "epoch": 0.5962745613266766, "grad_norm": 0.679203259432932, "learning_rate": 2.242984590429846e-06, "loss": 0.5599, "step": 20423 }, { "epoch": 0.5963037575545239, "grad_norm": 0.7090774437948412, "learning_rate": 2.242822384428224e-06, "loss": 0.6441, "step": 20424 }, { "epoch": 0.5963329537823713, "grad_norm": 0.7539525080083301, "learning_rate": 2.242660178426602e-06, "loss": 0.6611, "step": 20425 }, { "epoch": 0.5963621500102186, "grad_norm": 0.755285220401354, "learning_rate": 2.2424979724249797e-06, "loss": 0.6729, "step": 20426 }, { "epoch": 0.596391346238066, "grad_norm": 0.743745924649896, "learning_rate": 2.2423357664233577e-06, "loss": 0.6388, "step": 20427 }, { "epoch": 0.5964205424659134, "grad_norm": 0.7059854444228959, "learning_rate": 2.2421735604217357e-06, "loss": 0.6617, "step": 20428 }, { "epoch": 0.5964497386937607, "grad_norm": 0.715065635052286, "learning_rate": 2.2420113544201137e-06, "loss": 0.6283, "step": 20429 }, { "epoch": 0.5964789349216081, "grad_norm": 0.7496547083488688, "learning_rate": 2.2418491484184917e-06, "loss": 0.7029, "step": 20430 }, { "epoch": 0.5965081311494554, "grad_norm": 0.6887576271857835, "learning_rate": 2.2416869424168697e-06, "loss": 0.6173, "step": 20431 }, { "epoch": 0.5965373273773028, "grad_norm": 0.729305873009294, "learning_rate": 2.2415247364152477e-06, "loss": 0.6511, "step": 20432 }, { "epoch": 0.5965665236051502, "grad_norm": 0.6752809833043193, "learning_rate": 2.2413625304136253e-06, "loss": 0.584, "step": 20433 }, { "epoch": 0.5965957198329975, "grad_norm": 0.751971481523822, "learning_rate": 2.2412003244120033e-06, "loss": 0.6916, "step": 20434 }, { "epoch": 0.5966249160608449, "grad_norm": 0.753718228054215, "learning_rate": 2.2410381184103813e-06, "loss": 0.6586, "step": 20435 }, { "epoch": 0.5966541122886923, "grad_norm": 0.7718386784286994, "learning_rate": 2.2408759124087593e-06, "loss": 0.7297, "step": 20436 }, { "epoch": 0.5966833085165396, "grad_norm": 0.762291056991037, "learning_rate": 2.2407137064071373e-06, "loss": 0.6757, "step": 20437 }, { "epoch": 0.596712504744387, "grad_norm": 0.7180147422799383, "learning_rate": 2.240551500405515e-06, "loss": 0.633, "step": 20438 }, { "epoch": 0.5967417009722343, "grad_norm": 0.7293583799927336, "learning_rate": 2.2403892944038934e-06, "loss": 0.6667, "step": 20439 }, { "epoch": 0.5967708972000817, "grad_norm": 0.7737799305964828, "learning_rate": 2.2402270884022714e-06, "loss": 0.7038, "step": 20440 }, { "epoch": 0.5968000934279291, "grad_norm": 0.6777846701710372, "learning_rate": 2.240064882400649e-06, "loss": 0.5509, "step": 20441 }, { "epoch": 0.5968292896557764, "grad_norm": 0.7200870197222227, "learning_rate": 2.239902676399027e-06, "loss": 0.6533, "step": 20442 }, { "epoch": 0.5968584858836239, "grad_norm": 0.7195863390442786, "learning_rate": 2.239740470397405e-06, "loss": 0.6726, "step": 20443 }, { "epoch": 0.5968876821114713, "grad_norm": 0.7536848826820434, "learning_rate": 2.239578264395783e-06, "loss": 0.7298, "step": 20444 }, { "epoch": 0.5969168783393186, "grad_norm": 0.7437768767359823, "learning_rate": 2.2394160583941606e-06, "loss": 0.6776, "step": 20445 }, { "epoch": 0.596946074567166, "grad_norm": 0.7681052104380854, "learning_rate": 2.2392538523925386e-06, "loss": 0.7024, "step": 20446 }, { "epoch": 0.5969752707950133, "grad_norm": 0.6717563493684978, "learning_rate": 2.2390916463909166e-06, "loss": 0.6046, "step": 20447 }, { "epoch": 0.5970044670228607, "grad_norm": 0.7171038893890959, "learning_rate": 2.2389294403892946e-06, "loss": 0.6987, "step": 20448 }, { "epoch": 0.5970336632507081, "grad_norm": 0.7147518199032593, "learning_rate": 2.2387672343876726e-06, "loss": 0.5689, "step": 20449 }, { "epoch": 0.5970628594785554, "grad_norm": 0.7223897619819892, "learning_rate": 2.2386050283860506e-06, "loss": 0.6519, "step": 20450 }, { "epoch": 0.5970920557064028, "grad_norm": 0.7539758343886946, "learning_rate": 2.2384428223844286e-06, "loss": 0.7051, "step": 20451 }, { "epoch": 0.5971212519342501, "grad_norm": 0.708269334680102, "learning_rate": 2.238280616382806e-06, "loss": 0.6151, "step": 20452 }, { "epoch": 0.5971504481620975, "grad_norm": 0.6688009066688569, "learning_rate": 2.238118410381184e-06, "loss": 0.5623, "step": 20453 }, { "epoch": 0.5971796443899449, "grad_norm": 0.6203788361282437, "learning_rate": 2.237956204379562e-06, "loss": 0.4969, "step": 20454 }, { "epoch": 0.5972088406177922, "grad_norm": 0.7354771577439441, "learning_rate": 2.23779399837794e-06, "loss": 0.6191, "step": 20455 }, { "epoch": 0.5972380368456396, "grad_norm": 0.7962349677878554, "learning_rate": 2.237631792376318e-06, "loss": 0.7041, "step": 20456 }, { "epoch": 0.597267233073487, "grad_norm": 0.7823497995128793, "learning_rate": 2.237469586374696e-06, "loss": 0.6263, "step": 20457 }, { "epoch": 0.5972964293013343, "grad_norm": 0.7270275936087923, "learning_rate": 2.2373073803730742e-06, "loss": 0.6574, "step": 20458 }, { "epoch": 0.5973256255291817, "grad_norm": 0.7053144455919282, "learning_rate": 2.237145174371452e-06, "loss": 0.6384, "step": 20459 }, { "epoch": 0.597354821757029, "grad_norm": 0.7337221039711139, "learning_rate": 2.23698296836983e-06, "loss": 0.6493, "step": 20460 }, { "epoch": 0.5973840179848764, "grad_norm": 0.7763660457143549, "learning_rate": 2.236820762368208e-06, "loss": 0.7168, "step": 20461 }, { "epoch": 0.5974132142127238, "grad_norm": 0.7297381569410856, "learning_rate": 2.236658556366586e-06, "loss": 0.6675, "step": 20462 }, { "epoch": 0.5974424104405711, "grad_norm": 0.737953103904113, "learning_rate": 2.236496350364964e-06, "loss": 0.6659, "step": 20463 }, { "epoch": 0.5974716066684185, "grad_norm": 0.7384330118951431, "learning_rate": 2.2363341443633414e-06, "loss": 0.6526, "step": 20464 }, { "epoch": 0.5975008028962658, "grad_norm": 0.7167194167576324, "learning_rate": 2.2361719383617194e-06, "loss": 0.6017, "step": 20465 }, { "epoch": 0.5975299991241132, "grad_norm": 0.7249099998746505, "learning_rate": 2.2360097323600974e-06, "loss": 0.629, "step": 20466 }, { "epoch": 0.5975591953519606, "grad_norm": 0.6791076308639572, "learning_rate": 2.2358475263584754e-06, "loss": 0.5733, "step": 20467 }, { "epoch": 0.5975883915798079, "grad_norm": 0.7344554496363306, "learning_rate": 2.2356853203568534e-06, "loss": 0.6353, "step": 20468 }, { "epoch": 0.5976175878076553, "grad_norm": 0.6807901103902857, "learning_rate": 2.2355231143552314e-06, "loss": 0.6158, "step": 20469 }, { "epoch": 0.5976467840355026, "grad_norm": 0.7507617686486736, "learning_rate": 2.2353609083536095e-06, "loss": 0.6826, "step": 20470 }, { "epoch": 0.59767598026335, "grad_norm": 0.7775222171623637, "learning_rate": 2.235198702351987e-06, "loss": 0.6829, "step": 20471 }, { "epoch": 0.5977051764911974, "grad_norm": 0.7551986514763755, "learning_rate": 2.235036496350365e-06, "loss": 0.5991, "step": 20472 }, { "epoch": 0.5977343727190447, "grad_norm": 0.7224657786811675, "learning_rate": 2.234874290348743e-06, "loss": 0.6459, "step": 20473 }, { "epoch": 0.5977635689468921, "grad_norm": 0.7603927069924399, "learning_rate": 2.234712084347121e-06, "loss": 0.6674, "step": 20474 }, { "epoch": 0.5977927651747394, "grad_norm": 0.7438284868029964, "learning_rate": 2.234549878345499e-06, "loss": 0.6805, "step": 20475 }, { "epoch": 0.5978219614025868, "grad_norm": 0.7484046980222585, "learning_rate": 2.2343876723438766e-06, "loss": 0.7082, "step": 20476 }, { "epoch": 0.5978511576304342, "grad_norm": 0.7025553683220521, "learning_rate": 2.234225466342255e-06, "loss": 0.5568, "step": 20477 }, { "epoch": 0.5978803538582815, "grad_norm": 0.9312757661187334, "learning_rate": 2.2340632603406327e-06, "loss": 0.6842, "step": 20478 }, { "epoch": 0.5979095500861289, "grad_norm": 0.7278562156670396, "learning_rate": 2.2339010543390107e-06, "loss": 0.6547, "step": 20479 }, { "epoch": 0.5979387463139763, "grad_norm": 0.6495262995429384, "learning_rate": 2.2337388483373887e-06, "loss": 0.5801, "step": 20480 }, { "epoch": 0.5979679425418236, "grad_norm": 1.417811611125351, "learning_rate": 2.2335766423357667e-06, "loss": 0.6654, "step": 20481 }, { "epoch": 0.597997138769671, "grad_norm": 0.7307074956436759, "learning_rate": 2.2334144363341447e-06, "loss": 0.6283, "step": 20482 }, { "epoch": 0.5980263349975183, "grad_norm": 0.7267749776771699, "learning_rate": 2.2332522303325223e-06, "loss": 0.6581, "step": 20483 }, { "epoch": 0.5980555312253657, "grad_norm": 0.6992059440853207, "learning_rate": 2.2330900243309003e-06, "loss": 0.5892, "step": 20484 }, { "epoch": 0.5980847274532131, "grad_norm": 0.7018666425913606, "learning_rate": 2.2329278183292783e-06, "loss": 0.6113, "step": 20485 }, { "epoch": 0.5981139236810604, "grad_norm": 0.7326090735095918, "learning_rate": 2.2327656123276563e-06, "loss": 0.6852, "step": 20486 }, { "epoch": 0.5981431199089078, "grad_norm": 0.7174592574555486, "learning_rate": 2.2326034063260343e-06, "loss": 0.5672, "step": 20487 }, { "epoch": 0.5981723161367551, "grad_norm": 0.7226157572833056, "learning_rate": 2.2324412003244123e-06, "loss": 0.6496, "step": 20488 }, { "epoch": 0.5982015123646025, "grad_norm": 0.7564570150995672, "learning_rate": 2.2322789943227903e-06, "loss": 0.6744, "step": 20489 }, { "epoch": 0.5982307085924499, "grad_norm": 0.7142754048525364, "learning_rate": 2.232116788321168e-06, "loss": 0.6611, "step": 20490 }, { "epoch": 0.5982599048202972, "grad_norm": 0.7739543728415671, "learning_rate": 2.231954582319546e-06, "loss": 0.7083, "step": 20491 }, { "epoch": 0.5982891010481446, "grad_norm": 1.3308748586523464, "learning_rate": 2.231792376317924e-06, "loss": 0.6592, "step": 20492 }, { "epoch": 0.598318297275992, "grad_norm": 0.686862637071101, "learning_rate": 2.231630170316302e-06, "loss": 0.554, "step": 20493 }, { "epoch": 0.5983474935038393, "grad_norm": 0.6928776107062633, "learning_rate": 2.23146796431468e-06, "loss": 0.5925, "step": 20494 }, { "epoch": 0.5983766897316867, "grad_norm": 0.753413745863432, "learning_rate": 2.231305758313058e-06, "loss": 0.7234, "step": 20495 }, { "epoch": 0.598405885959534, "grad_norm": 0.805478214140161, "learning_rate": 2.231143552311436e-06, "loss": 0.6991, "step": 20496 }, { "epoch": 0.5984350821873814, "grad_norm": 0.7390418112353155, "learning_rate": 2.2309813463098135e-06, "loss": 0.6451, "step": 20497 }, { "epoch": 0.5984642784152288, "grad_norm": 0.7615138619456062, "learning_rate": 2.2308191403081915e-06, "loss": 0.725, "step": 20498 }, { "epoch": 0.5984934746430761, "grad_norm": 0.7250957433306593, "learning_rate": 2.2306569343065695e-06, "loss": 0.6549, "step": 20499 }, { "epoch": 0.5985226708709235, "grad_norm": 0.6846671425171443, "learning_rate": 2.2304947283049475e-06, "loss": 0.5855, "step": 20500 }, { "epoch": 0.5985518670987708, "grad_norm": 0.7603668289217251, "learning_rate": 2.2303325223033255e-06, "loss": 0.7238, "step": 20501 }, { "epoch": 0.5985810633266182, "grad_norm": 0.8064352377585855, "learning_rate": 2.230170316301703e-06, "loss": 0.6694, "step": 20502 }, { "epoch": 0.5986102595544656, "grad_norm": 0.7059534578100781, "learning_rate": 2.230008110300081e-06, "loss": 0.6375, "step": 20503 }, { "epoch": 0.5986394557823129, "grad_norm": 0.695713474976253, "learning_rate": 2.229845904298459e-06, "loss": 0.6294, "step": 20504 }, { "epoch": 0.5986686520101603, "grad_norm": 0.7539358239213598, "learning_rate": 2.229683698296837e-06, "loss": 0.6043, "step": 20505 }, { "epoch": 0.5986978482380076, "grad_norm": 0.7716904977735654, "learning_rate": 2.229521492295215e-06, "loss": 0.672, "step": 20506 }, { "epoch": 0.598727044465855, "grad_norm": 0.7243250995373186, "learning_rate": 2.229359286293593e-06, "loss": 0.6928, "step": 20507 }, { "epoch": 0.5987562406937024, "grad_norm": 0.722708091473564, "learning_rate": 2.229197080291971e-06, "loss": 0.6424, "step": 20508 }, { "epoch": 0.5987854369215497, "grad_norm": 0.7246717503989633, "learning_rate": 2.2290348742903488e-06, "loss": 0.6211, "step": 20509 }, { "epoch": 0.5988146331493971, "grad_norm": 0.7280003445756287, "learning_rate": 2.2288726682887268e-06, "loss": 0.6109, "step": 20510 }, { "epoch": 0.5988438293772445, "grad_norm": 0.7439864779492554, "learning_rate": 2.2287104622871048e-06, "loss": 0.6535, "step": 20511 }, { "epoch": 0.5988730256050918, "grad_norm": 0.7490776973867691, "learning_rate": 2.2285482562854828e-06, "loss": 0.6868, "step": 20512 }, { "epoch": 0.5989022218329392, "grad_norm": 0.8139524926426002, "learning_rate": 2.2283860502838604e-06, "loss": 0.7445, "step": 20513 }, { "epoch": 0.5989314180607865, "grad_norm": 0.6852773437680924, "learning_rate": 2.2282238442822388e-06, "loss": 0.5956, "step": 20514 }, { "epoch": 0.5989606142886339, "grad_norm": 0.7549516259447782, "learning_rate": 2.228061638280617e-06, "loss": 0.7372, "step": 20515 }, { "epoch": 0.5989898105164813, "grad_norm": 0.780192113771851, "learning_rate": 2.2278994322789944e-06, "loss": 0.6927, "step": 20516 }, { "epoch": 0.5990190067443286, "grad_norm": 0.7928067167207961, "learning_rate": 2.2277372262773724e-06, "loss": 0.7324, "step": 20517 }, { "epoch": 0.599048202972176, "grad_norm": 0.7788912274019808, "learning_rate": 2.2275750202757504e-06, "loss": 0.7278, "step": 20518 }, { "epoch": 0.5990773992000233, "grad_norm": 0.7263077002280298, "learning_rate": 2.2274128142741284e-06, "loss": 0.6614, "step": 20519 }, { "epoch": 0.5991065954278707, "grad_norm": 0.7172335042955678, "learning_rate": 2.2272506082725064e-06, "loss": 0.6541, "step": 20520 }, { "epoch": 0.5991357916557181, "grad_norm": 0.7830315247300881, "learning_rate": 2.227088402270884e-06, "loss": 0.619, "step": 20521 }, { "epoch": 0.5991649878835654, "grad_norm": 0.7310817465646317, "learning_rate": 2.226926196269262e-06, "loss": 0.6324, "step": 20522 }, { "epoch": 0.5991941841114128, "grad_norm": 0.6694926125620299, "learning_rate": 2.22676399026764e-06, "loss": 0.5491, "step": 20523 }, { "epoch": 0.5992233803392601, "grad_norm": 0.7031713990849013, "learning_rate": 2.226601784266018e-06, "loss": 0.6556, "step": 20524 }, { "epoch": 0.5992525765671075, "grad_norm": 0.7053542835048074, "learning_rate": 2.226439578264396e-06, "loss": 0.6488, "step": 20525 }, { "epoch": 0.5992817727949549, "grad_norm": 0.7547607976879032, "learning_rate": 2.226277372262774e-06, "loss": 0.6967, "step": 20526 }, { "epoch": 0.5993109690228022, "grad_norm": 0.7438358899489251, "learning_rate": 2.226115166261152e-06, "loss": 0.6946, "step": 20527 }, { "epoch": 0.5993401652506496, "grad_norm": 0.6754196726317937, "learning_rate": 2.2259529602595296e-06, "loss": 0.5635, "step": 20528 }, { "epoch": 0.599369361478497, "grad_norm": 0.726713925613175, "learning_rate": 2.2257907542579076e-06, "loss": 0.6776, "step": 20529 }, { "epoch": 0.5993985577063443, "grad_norm": 0.704667623479809, "learning_rate": 2.2256285482562856e-06, "loss": 0.6223, "step": 20530 }, { "epoch": 0.5994277539341917, "grad_norm": 0.7238560379025372, "learning_rate": 2.2254663422546636e-06, "loss": 0.6099, "step": 20531 }, { "epoch": 0.599456950162039, "grad_norm": 0.7622275623847572, "learning_rate": 2.2253041362530412e-06, "loss": 0.6562, "step": 20532 }, { "epoch": 0.5994861463898864, "grad_norm": 0.7492775510114136, "learning_rate": 2.2251419302514196e-06, "loss": 0.7104, "step": 20533 }, { "epoch": 0.5995153426177338, "grad_norm": 0.6468594572056352, "learning_rate": 2.2249797242497977e-06, "loss": 0.5369, "step": 20534 }, { "epoch": 0.5995445388455811, "grad_norm": 0.7219584183752208, "learning_rate": 2.2248175182481752e-06, "loss": 0.6485, "step": 20535 }, { "epoch": 0.5995737350734285, "grad_norm": 0.7451930293116563, "learning_rate": 2.2246553122465532e-06, "loss": 0.6921, "step": 20536 }, { "epoch": 0.5996029313012758, "grad_norm": 0.7450281035482149, "learning_rate": 2.2244931062449312e-06, "loss": 0.6662, "step": 20537 }, { "epoch": 0.5996321275291232, "grad_norm": 0.6901207508409952, "learning_rate": 2.2243309002433093e-06, "loss": 0.6084, "step": 20538 }, { "epoch": 0.5996613237569706, "grad_norm": 0.7050608556653214, "learning_rate": 2.2241686942416873e-06, "loss": 0.5965, "step": 20539 }, { "epoch": 0.5996905199848179, "grad_norm": 0.7275137136280153, "learning_rate": 2.224006488240065e-06, "loss": 0.6571, "step": 20540 }, { "epoch": 0.5997197162126653, "grad_norm": 0.7364958234866834, "learning_rate": 2.223844282238443e-06, "loss": 0.6511, "step": 20541 }, { "epoch": 0.5997489124405126, "grad_norm": 0.7092166116161016, "learning_rate": 2.223682076236821e-06, "loss": 0.6357, "step": 20542 }, { "epoch": 0.59977810866836, "grad_norm": 0.7631931883553823, "learning_rate": 2.223519870235199e-06, "loss": 0.6785, "step": 20543 }, { "epoch": 0.5998073048962074, "grad_norm": 0.7787328133011697, "learning_rate": 2.223357664233577e-06, "loss": 0.6857, "step": 20544 }, { "epoch": 0.5998365011240547, "grad_norm": 0.701100001244393, "learning_rate": 2.223195458231955e-06, "loss": 0.5757, "step": 20545 }, { "epoch": 0.5998656973519021, "grad_norm": 0.7276720882494983, "learning_rate": 2.223033252230333e-06, "loss": 0.6723, "step": 20546 }, { "epoch": 0.5998948935797495, "grad_norm": 0.6855922777109799, "learning_rate": 2.2228710462287105e-06, "loss": 0.5769, "step": 20547 }, { "epoch": 0.5999240898075968, "grad_norm": 0.7418822687538481, "learning_rate": 2.2227088402270885e-06, "loss": 0.683, "step": 20548 }, { "epoch": 0.5999532860354442, "grad_norm": 0.7558700920005698, "learning_rate": 2.2225466342254665e-06, "loss": 0.698, "step": 20549 }, { "epoch": 0.5999824822632915, "grad_norm": 0.7599547264120158, "learning_rate": 2.2223844282238445e-06, "loss": 0.682, "step": 20550 }, { "epoch": 0.6000116784911389, "grad_norm": 0.6923224801497601, "learning_rate": 2.222222222222222e-06, "loss": 0.5686, "step": 20551 }, { "epoch": 0.6000408747189863, "grad_norm": 0.7401554229758893, "learning_rate": 2.2220600162206005e-06, "loss": 0.6118, "step": 20552 }, { "epoch": 0.6000700709468336, "grad_norm": 0.7140040590668129, "learning_rate": 2.2218978102189785e-06, "loss": 0.6118, "step": 20553 }, { "epoch": 0.600099267174681, "grad_norm": 0.7151474680804829, "learning_rate": 2.221735604217356e-06, "loss": 0.6668, "step": 20554 }, { "epoch": 0.6001284634025283, "grad_norm": 0.7368655580516514, "learning_rate": 2.221573398215734e-06, "loss": 0.6464, "step": 20555 }, { "epoch": 0.6001576596303757, "grad_norm": 0.7396823419893735, "learning_rate": 2.221411192214112e-06, "loss": 0.6376, "step": 20556 }, { "epoch": 0.6001868558582231, "grad_norm": 0.7269154331545474, "learning_rate": 2.22124898621249e-06, "loss": 0.636, "step": 20557 }, { "epoch": 0.6002160520860704, "grad_norm": 0.73427569794034, "learning_rate": 2.221086780210868e-06, "loss": 0.655, "step": 20558 }, { "epoch": 0.6002452483139178, "grad_norm": 0.7068032874219549, "learning_rate": 2.2209245742092457e-06, "loss": 0.6407, "step": 20559 }, { "epoch": 0.6002744445417652, "grad_norm": 0.6510687660086122, "learning_rate": 2.2207623682076237e-06, "loss": 0.5365, "step": 20560 }, { "epoch": 0.6003036407696125, "grad_norm": 0.7446710502722242, "learning_rate": 2.220600162206002e-06, "loss": 0.6466, "step": 20561 }, { "epoch": 0.6003328369974599, "grad_norm": 0.736683806081055, "learning_rate": 2.2204379562043797e-06, "loss": 0.6415, "step": 20562 }, { "epoch": 0.6003620332253073, "grad_norm": 0.7691400054931943, "learning_rate": 2.2202757502027577e-06, "loss": 0.6678, "step": 20563 }, { "epoch": 0.6003912294531547, "grad_norm": 0.8225588346207893, "learning_rate": 2.2201135442011357e-06, "loss": 0.6984, "step": 20564 }, { "epoch": 0.6004204256810021, "grad_norm": 0.6985155365360234, "learning_rate": 2.2199513381995137e-06, "loss": 0.6511, "step": 20565 }, { "epoch": 0.6004496219088494, "grad_norm": 0.7632427144345111, "learning_rate": 2.2197891321978913e-06, "loss": 0.6263, "step": 20566 }, { "epoch": 0.6004788181366968, "grad_norm": 0.7396627966403699, "learning_rate": 2.2196269261962693e-06, "loss": 0.6934, "step": 20567 }, { "epoch": 0.6005080143645442, "grad_norm": 0.7570757125498577, "learning_rate": 2.2194647201946473e-06, "loss": 0.6296, "step": 20568 }, { "epoch": 0.6005372105923915, "grad_norm": 0.7059591755541867, "learning_rate": 2.2193025141930253e-06, "loss": 0.6063, "step": 20569 }, { "epoch": 0.6005664068202389, "grad_norm": 0.7571950931535855, "learning_rate": 2.219140308191403e-06, "loss": 0.6789, "step": 20570 }, { "epoch": 0.6005956030480862, "grad_norm": 0.66342210423362, "learning_rate": 2.2189781021897814e-06, "loss": 0.5585, "step": 20571 }, { "epoch": 0.6006247992759336, "grad_norm": 0.7292383001282647, "learning_rate": 2.2188158961881594e-06, "loss": 0.6362, "step": 20572 }, { "epoch": 0.600653995503781, "grad_norm": 0.7246362026640376, "learning_rate": 2.218653690186537e-06, "loss": 0.6406, "step": 20573 }, { "epoch": 0.6006831917316283, "grad_norm": 0.7272395569942878, "learning_rate": 2.218491484184915e-06, "loss": 0.6363, "step": 20574 }, { "epoch": 0.6007123879594757, "grad_norm": 0.7356951526715957, "learning_rate": 2.218329278183293e-06, "loss": 0.6763, "step": 20575 }, { "epoch": 0.600741584187323, "grad_norm": 0.8655504381139122, "learning_rate": 2.218167072181671e-06, "loss": 0.6508, "step": 20576 }, { "epoch": 0.6007707804151704, "grad_norm": 0.7442243727853498, "learning_rate": 2.218004866180049e-06, "loss": 0.6437, "step": 20577 }, { "epoch": 0.6007999766430178, "grad_norm": 0.7619260437599207, "learning_rate": 2.2178426601784266e-06, "loss": 0.7232, "step": 20578 }, { "epoch": 0.6008291728708651, "grad_norm": 0.7852001158525007, "learning_rate": 2.2176804541768046e-06, "loss": 0.6573, "step": 20579 }, { "epoch": 0.6008583690987125, "grad_norm": 0.7738609779950558, "learning_rate": 2.217518248175183e-06, "loss": 0.7524, "step": 20580 }, { "epoch": 0.6008875653265598, "grad_norm": 0.6489994684757914, "learning_rate": 2.2173560421735606e-06, "loss": 0.5281, "step": 20581 }, { "epoch": 0.6009167615544072, "grad_norm": 0.7291262680331924, "learning_rate": 2.2171938361719386e-06, "loss": 0.6303, "step": 20582 }, { "epoch": 0.6009459577822546, "grad_norm": 0.7121323941988748, "learning_rate": 2.2170316301703166e-06, "loss": 0.6504, "step": 20583 }, { "epoch": 0.6009751540101019, "grad_norm": 0.7564798006525273, "learning_rate": 2.2168694241686946e-06, "loss": 0.7237, "step": 20584 }, { "epoch": 0.6010043502379493, "grad_norm": 0.7302807893526718, "learning_rate": 2.216707218167072e-06, "loss": 0.6568, "step": 20585 }, { "epoch": 0.6010335464657967, "grad_norm": 0.7427216393458479, "learning_rate": 2.21654501216545e-06, "loss": 0.6882, "step": 20586 }, { "epoch": 0.601062742693644, "grad_norm": 0.7189772492216486, "learning_rate": 2.216382806163828e-06, "loss": 0.6635, "step": 20587 }, { "epoch": 0.6010919389214914, "grad_norm": 0.6752290899721346, "learning_rate": 2.216220600162206e-06, "loss": 0.5688, "step": 20588 }, { "epoch": 0.6011211351493387, "grad_norm": 0.7330924225264513, "learning_rate": 2.216058394160584e-06, "loss": 0.6196, "step": 20589 }, { "epoch": 0.6011503313771861, "grad_norm": 0.6845842892534949, "learning_rate": 2.2158961881589622e-06, "loss": 0.5978, "step": 20590 }, { "epoch": 0.6011795276050335, "grad_norm": 0.7811463141304231, "learning_rate": 2.2157339821573402e-06, "loss": 0.6898, "step": 20591 }, { "epoch": 0.6012087238328808, "grad_norm": 0.6929427357465278, "learning_rate": 2.215571776155718e-06, "loss": 0.593, "step": 20592 }, { "epoch": 0.6012379200607282, "grad_norm": 0.7112496819094202, "learning_rate": 2.215409570154096e-06, "loss": 0.6013, "step": 20593 }, { "epoch": 0.6012671162885755, "grad_norm": 0.7119748036702089, "learning_rate": 2.215247364152474e-06, "loss": 0.6047, "step": 20594 }, { "epoch": 0.6012963125164229, "grad_norm": 0.6964234851704195, "learning_rate": 2.215085158150852e-06, "loss": 0.6154, "step": 20595 }, { "epoch": 0.6013255087442703, "grad_norm": 0.7507607729113775, "learning_rate": 2.21492295214923e-06, "loss": 0.6338, "step": 20596 }, { "epoch": 0.6013547049721176, "grad_norm": 0.6833755676703575, "learning_rate": 2.2147607461476074e-06, "loss": 0.5912, "step": 20597 }, { "epoch": 0.601383901199965, "grad_norm": 0.7577937858800172, "learning_rate": 2.2145985401459854e-06, "loss": 0.6513, "step": 20598 }, { "epoch": 0.6014130974278123, "grad_norm": 0.7044143435792575, "learning_rate": 2.214436334144364e-06, "loss": 0.6178, "step": 20599 }, { "epoch": 0.6014422936556597, "grad_norm": 0.7426268084079599, "learning_rate": 2.2142741281427414e-06, "loss": 0.6912, "step": 20600 }, { "epoch": 0.6014714898835071, "grad_norm": 0.7342083229979165, "learning_rate": 2.2141119221411194e-06, "loss": 0.6685, "step": 20601 }, { "epoch": 0.6015006861113544, "grad_norm": 0.7049276259613763, "learning_rate": 2.2139497161394975e-06, "loss": 0.6033, "step": 20602 }, { "epoch": 0.6015298823392018, "grad_norm": 0.7456406059914514, "learning_rate": 2.2137875101378755e-06, "loss": 0.7132, "step": 20603 }, { "epoch": 0.6015590785670492, "grad_norm": 0.6552390449788136, "learning_rate": 2.213625304136253e-06, "loss": 0.5549, "step": 20604 }, { "epoch": 0.6015882747948965, "grad_norm": 0.6706520252534947, "learning_rate": 2.213463098134631e-06, "loss": 0.5246, "step": 20605 }, { "epoch": 0.6016174710227439, "grad_norm": 0.730727609201222, "learning_rate": 2.213300892133009e-06, "loss": 0.592, "step": 20606 }, { "epoch": 0.6016466672505912, "grad_norm": 0.7525952439613592, "learning_rate": 2.213138686131387e-06, "loss": 0.6882, "step": 20607 }, { "epoch": 0.6016758634784386, "grad_norm": 0.709364062981248, "learning_rate": 2.2129764801297647e-06, "loss": 0.6163, "step": 20608 }, { "epoch": 0.601705059706286, "grad_norm": 0.7521064652757818, "learning_rate": 2.212814274128143e-06, "loss": 0.7069, "step": 20609 }, { "epoch": 0.6017342559341333, "grad_norm": 0.7177021721223706, "learning_rate": 2.212652068126521e-06, "loss": 0.6605, "step": 20610 }, { "epoch": 0.6017634521619807, "grad_norm": 0.699973029451892, "learning_rate": 2.2124898621248987e-06, "loss": 0.6387, "step": 20611 }, { "epoch": 0.601792648389828, "grad_norm": 0.7525551426211657, "learning_rate": 2.2123276561232767e-06, "loss": 0.67, "step": 20612 }, { "epoch": 0.6018218446176754, "grad_norm": 0.7592116738993528, "learning_rate": 2.2121654501216547e-06, "loss": 0.7108, "step": 20613 }, { "epoch": 0.6018510408455228, "grad_norm": 0.6884243627912762, "learning_rate": 2.2120032441200327e-06, "loss": 0.5957, "step": 20614 }, { "epoch": 0.6018802370733701, "grad_norm": 0.7134575049894233, "learning_rate": 2.2118410381184107e-06, "loss": 0.6209, "step": 20615 }, { "epoch": 0.6019094333012175, "grad_norm": 0.7392191525066899, "learning_rate": 2.2116788321167883e-06, "loss": 0.6894, "step": 20616 }, { "epoch": 0.6019386295290649, "grad_norm": 0.7423412701694165, "learning_rate": 2.2115166261151663e-06, "loss": 0.6735, "step": 20617 }, { "epoch": 0.6019678257569122, "grad_norm": 0.7428088402912919, "learning_rate": 2.2113544201135447e-06, "loss": 0.6645, "step": 20618 }, { "epoch": 0.6019970219847596, "grad_norm": 0.7697990733637926, "learning_rate": 2.2111922141119223e-06, "loss": 0.6555, "step": 20619 }, { "epoch": 0.6020262182126069, "grad_norm": 0.6787497741964162, "learning_rate": 2.2110300081103003e-06, "loss": 0.5971, "step": 20620 }, { "epoch": 0.6020554144404543, "grad_norm": 0.74332952183246, "learning_rate": 2.2108678021086783e-06, "loss": 0.7146, "step": 20621 }, { "epoch": 0.6020846106683017, "grad_norm": 0.7261050507997924, "learning_rate": 2.2107055961070563e-06, "loss": 0.6385, "step": 20622 }, { "epoch": 0.602113806896149, "grad_norm": 0.7471535748252194, "learning_rate": 2.210543390105434e-06, "loss": 0.6519, "step": 20623 }, { "epoch": 0.6021430031239964, "grad_norm": 0.7289994404906371, "learning_rate": 2.210381184103812e-06, "loss": 0.6517, "step": 20624 }, { "epoch": 0.6021721993518437, "grad_norm": 0.7437729140632149, "learning_rate": 2.21021897810219e-06, "loss": 0.6653, "step": 20625 }, { "epoch": 0.6022013955796911, "grad_norm": 0.7861267392592916, "learning_rate": 2.210056772100568e-06, "loss": 0.6632, "step": 20626 }, { "epoch": 0.6022305918075385, "grad_norm": 0.8057372167045393, "learning_rate": 2.2098945660989455e-06, "loss": 0.6906, "step": 20627 }, { "epoch": 0.6022597880353858, "grad_norm": 0.7693825542464584, "learning_rate": 2.209732360097324e-06, "loss": 0.6758, "step": 20628 }, { "epoch": 0.6022889842632332, "grad_norm": 0.7099901316643852, "learning_rate": 2.209570154095702e-06, "loss": 0.6201, "step": 20629 }, { "epoch": 0.6023181804910805, "grad_norm": 0.7869002559781185, "learning_rate": 2.2094079480940795e-06, "loss": 0.6654, "step": 20630 }, { "epoch": 0.6023473767189279, "grad_norm": 0.7043360029421806, "learning_rate": 2.2092457420924575e-06, "loss": 0.6272, "step": 20631 }, { "epoch": 0.6023765729467753, "grad_norm": 0.7615373332628815, "learning_rate": 2.2090835360908355e-06, "loss": 0.7054, "step": 20632 }, { "epoch": 0.6024057691746226, "grad_norm": 0.9940231415721962, "learning_rate": 2.2089213300892135e-06, "loss": 0.6459, "step": 20633 }, { "epoch": 0.60243496540247, "grad_norm": 0.784824098676723, "learning_rate": 2.2087591240875916e-06, "loss": 0.7092, "step": 20634 }, { "epoch": 0.6024641616303174, "grad_norm": 0.7201128051795933, "learning_rate": 2.208596918085969e-06, "loss": 0.6361, "step": 20635 }, { "epoch": 0.6024933578581647, "grad_norm": 0.746849933765165, "learning_rate": 2.208434712084347e-06, "loss": 0.7159, "step": 20636 }, { "epoch": 0.6025225540860121, "grad_norm": 0.7382582060670175, "learning_rate": 2.208272506082725e-06, "loss": 0.6184, "step": 20637 }, { "epoch": 0.6025517503138594, "grad_norm": 0.6525834207084575, "learning_rate": 2.208110300081103e-06, "loss": 0.5196, "step": 20638 }, { "epoch": 0.6025809465417068, "grad_norm": 0.7437928546681999, "learning_rate": 2.207948094079481e-06, "loss": 0.6647, "step": 20639 }, { "epoch": 0.6026101427695542, "grad_norm": 0.8024966402257137, "learning_rate": 2.207785888077859e-06, "loss": 0.662, "step": 20640 }, { "epoch": 0.6026393389974015, "grad_norm": 0.7063650308991263, "learning_rate": 2.207623682076237e-06, "loss": 0.6371, "step": 20641 }, { "epoch": 0.6026685352252489, "grad_norm": 0.7461873291110114, "learning_rate": 2.2074614760746148e-06, "loss": 0.6378, "step": 20642 }, { "epoch": 0.6026977314530962, "grad_norm": 0.7819970260145289, "learning_rate": 2.2072992700729928e-06, "loss": 0.7344, "step": 20643 }, { "epoch": 0.6027269276809436, "grad_norm": 0.6615935850334042, "learning_rate": 2.2071370640713708e-06, "loss": 0.5075, "step": 20644 }, { "epoch": 0.602756123908791, "grad_norm": 0.6935200927625133, "learning_rate": 2.2069748580697488e-06, "loss": 0.5904, "step": 20645 }, { "epoch": 0.6027853201366383, "grad_norm": 0.7077259628615644, "learning_rate": 2.206812652068127e-06, "loss": 0.6312, "step": 20646 }, { "epoch": 0.6028145163644857, "grad_norm": 0.7029529993243386, "learning_rate": 2.206650446066505e-06, "loss": 0.6491, "step": 20647 }, { "epoch": 0.602843712592333, "grad_norm": 0.694466151105908, "learning_rate": 2.206488240064883e-06, "loss": 0.5975, "step": 20648 }, { "epoch": 0.6028729088201804, "grad_norm": 0.7077841950619076, "learning_rate": 2.2063260340632604e-06, "loss": 0.6443, "step": 20649 }, { "epoch": 0.6029021050480278, "grad_norm": 0.743163469176598, "learning_rate": 2.2061638280616384e-06, "loss": 0.7096, "step": 20650 }, { "epoch": 0.6029313012758751, "grad_norm": 0.7372725827142217, "learning_rate": 2.2060016220600164e-06, "loss": 0.6618, "step": 20651 }, { "epoch": 0.6029604975037225, "grad_norm": 0.7557269374949783, "learning_rate": 2.2058394160583944e-06, "loss": 0.621, "step": 20652 }, { "epoch": 0.6029896937315699, "grad_norm": 0.7038870361672581, "learning_rate": 2.2056772100567724e-06, "loss": 0.6056, "step": 20653 }, { "epoch": 0.6030188899594172, "grad_norm": 0.7654541913843816, "learning_rate": 2.20551500405515e-06, "loss": 0.6909, "step": 20654 }, { "epoch": 0.6030480861872646, "grad_norm": 0.7360877786899935, "learning_rate": 2.205352798053528e-06, "loss": 0.6142, "step": 20655 }, { "epoch": 0.6030772824151119, "grad_norm": 0.7686601947588161, "learning_rate": 2.205190592051906e-06, "loss": 0.7189, "step": 20656 }, { "epoch": 0.6031064786429593, "grad_norm": 0.7934101240373439, "learning_rate": 2.205028386050284e-06, "loss": 0.7548, "step": 20657 }, { "epoch": 0.6031356748708067, "grad_norm": 0.7405069668396517, "learning_rate": 2.204866180048662e-06, "loss": 0.7083, "step": 20658 }, { "epoch": 0.603164871098654, "grad_norm": 0.7836216667609189, "learning_rate": 2.20470397404704e-06, "loss": 0.7029, "step": 20659 }, { "epoch": 0.6031940673265014, "grad_norm": 0.7166588376359389, "learning_rate": 2.204541768045418e-06, "loss": 0.6243, "step": 20660 }, { "epoch": 0.6032232635543487, "grad_norm": 0.7120401920791444, "learning_rate": 2.2043795620437956e-06, "loss": 0.6594, "step": 20661 }, { "epoch": 0.6032524597821961, "grad_norm": 0.7366222492770738, "learning_rate": 2.2042173560421736e-06, "loss": 0.6531, "step": 20662 }, { "epoch": 0.6032816560100435, "grad_norm": 0.6733629477803398, "learning_rate": 2.2040551500405516e-06, "loss": 0.5752, "step": 20663 }, { "epoch": 0.6033108522378908, "grad_norm": 0.7287311558915843, "learning_rate": 2.2038929440389296e-06, "loss": 0.634, "step": 20664 }, { "epoch": 0.6033400484657382, "grad_norm": 0.7162284743884592, "learning_rate": 2.2037307380373076e-06, "loss": 0.673, "step": 20665 }, { "epoch": 0.6033692446935855, "grad_norm": 0.7345046565515806, "learning_rate": 2.2035685320356857e-06, "loss": 0.679, "step": 20666 }, { "epoch": 0.6033984409214329, "grad_norm": 0.715451223902591, "learning_rate": 2.2034063260340637e-06, "loss": 0.644, "step": 20667 }, { "epoch": 0.6034276371492803, "grad_norm": 0.6486940054025997, "learning_rate": 2.2032441200324412e-06, "loss": 0.5376, "step": 20668 }, { "epoch": 0.6034568333771276, "grad_norm": 0.7113914641365006, "learning_rate": 2.2030819140308193e-06, "loss": 0.6133, "step": 20669 }, { "epoch": 0.603486029604975, "grad_norm": 0.7113415793238704, "learning_rate": 2.2029197080291973e-06, "loss": 0.6458, "step": 20670 }, { "epoch": 0.6035152258328224, "grad_norm": 0.756033802756522, "learning_rate": 2.2027575020275753e-06, "loss": 0.6097, "step": 20671 }, { "epoch": 0.6035444220606697, "grad_norm": 0.8044200500489116, "learning_rate": 2.2025952960259533e-06, "loss": 0.7477, "step": 20672 }, { "epoch": 0.6035736182885171, "grad_norm": 0.6926841641385008, "learning_rate": 2.202433090024331e-06, "loss": 0.6157, "step": 20673 }, { "epoch": 0.6036028145163644, "grad_norm": 0.7493076044966057, "learning_rate": 2.202270884022709e-06, "loss": 0.5945, "step": 20674 }, { "epoch": 0.6036320107442118, "grad_norm": 0.7708127724071778, "learning_rate": 2.202108678021087e-06, "loss": 0.6592, "step": 20675 }, { "epoch": 0.6036612069720592, "grad_norm": 0.7429938911370499, "learning_rate": 2.201946472019465e-06, "loss": 0.6873, "step": 20676 }, { "epoch": 0.6036904031999065, "grad_norm": 0.7211094198833243, "learning_rate": 2.201784266017843e-06, "loss": 0.5932, "step": 20677 }, { "epoch": 0.6037195994277539, "grad_norm": 0.7507795225526355, "learning_rate": 2.201622060016221e-06, "loss": 0.6786, "step": 20678 }, { "epoch": 0.6037487956556012, "grad_norm": 0.7069402807568126, "learning_rate": 2.201459854014599e-06, "loss": 0.6328, "step": 20679 }, { "epoch": 0.6037779918834486, "grad_norm": 0.7531747054271025, "learning_rate": 2.2012976480129765e-06, "loss": 0.7123, "step": 20680 }, { "epoch": 0.603807188111296, "grad_norm": 0.7138548785193565, "learning_rate": 2.2011354420113545e-06, "loss": 0.5921, "step": 20681 }, { "epoch": 0.6038363843391433, "grad_norm": 0.7656511535417269, "learning_rate": 2.2009732360097325e-06, "loss": 0.677, "step": 20682 }, { "epoch": 0.6038655805669907, "grad_norm": 0.690048644560609, "learning_rate": 2.2008110300081105e-06, "loss": 0.5973, "step": 20683 }, { "epoch": 0.6038947767948382, "grad_norm": 0.7150249962834044, "learning_rate": 2.2006488240064885e-06, "loss": 0.6344, "step": 20684 }, { "epoch": 0.6039239730226855, "grad_norm": 0.7177643366265898, "learning_rate": 2.2004866180048665e-06, "loss": 0.6613, "step": 20685 }, { "epoch": 0.6039531692505329, "grad_norm": 0.7092284885931666, "learning_rate": 2.2003244120032445e-06, "loss": 0.6176, "step": 20686 }, { "epoch": 0.6039823654783802, "grad_norm": 0.7025244649973761, "learning_rate": 2.200162206001622e-06, "loss": 0.5774, "step": 20687 }, { "epoch": 0.6040115617062276, "grad_norm": 0.6955974203741949, "learning_rate": 2.2e-06, "loss": 0.6006, "step": 20688 }, { "epoch": 0.604040757934075, "grad_norm": 0.7430192344458945, "learning_rate": 2.199837793998378e-06, "loss": 0.639, "step": 20689 }, { "epoch": 0.6040699541619223, "grad_norm": 0.7425512328279913, "learning_rate": 2.199675587996756e-06, "loss": 0.6219, "step": 20690 }, { "epoch": 0.6040991503897697, "grad_norm": 0.7448371154048166, "learning_rate": 2.199513381995134e-06, "loss": 0.6584, "step": 20691 }, { "epoch": 0.604128346617617, "grad_norm": 0.7223311445024636, "learning_rate": 2.1993511759935117e-06, "loss": 0.6216, "step": 20692 }, { "epoch": 0.6041575428454644, "grad_norm": 0.7165816021039151, "learning_rate": 2.1991889699918897e-06, "loss": 0.6231, "step": 20693 }, { "epoch": 0.6041867390733118, "grad_norm": 0.6889809998562955, "learning_rate": 2.1990267639902677e-06, "loss": 0.5987, "step": 20694 }, { "epoch": 0.6042159353011591, "grad_norm": 0.7209407788953391, "learning_rate": 2.1988645579886457e-06, "loss": 0.6593, "step": 20695 }, { "epoch": 0.6042451315290065, "grad_norm": 0.7288684650516235, "learning_rate": 2.1987023519870237e-06, "loss": 0.7073, "step": 20696 }, { "epoch": 0.6042743277568539, "grad_norm": 0.704244129138691, "learning_rate": 2.1985401459854017e-06, "loss": 0.6468, "step": 20697 }, { "epoch": 0.6043035239847012, "grad_norm": 0.7137153368895753, "learning_rate": 2.1983779399837798e-06, "loss": 0.5904, "step": 20698 }, { "epoch": 0.6043327202125486, "grad_norm": 0.7355235227543098, "learning_rate": 2.1982157339821573e-06, "loss": 0.67, "step": 20699 }, { "epoch": 0.6043619164403959, "grad_norm": 0.6968197068812869, "learning_rate": 2.1980535279805353e-06, "loss": 0.6023, "step": 20700 }, { "epoch": 0.6043911126682433, "grad_norm": 0.7123727079461254, "learning_rate": 2.1978913219789134e-06, "loss": 0.6454, "step": 20701 }, { "epoch": 0.6044203088960907, "grad_norm": 0.7408069852270197, "learning_rate": 2.1977291159772914e-06, "loss": 0.6989, "step": 20702 }, { "epoch": 0.604449505123938, "grad_norm": 0.6926432453101207, "learning_rate": 2.1975669099756694e-06, "loss": 0.5387, "step": 20703 }, { "epoch": 0.6044787013517854, "grad_norm": 0.7345148738093296, "learning_rate": 2.1974047039740474e-06, "loss": 0.6359, "step": 20704 }, { "epoch": 0.6045078975796327, "grad_norm": 0.6809811572220381, "learning_rate": 2.1972424979724254e-06, "loss": 0.6262, "step": 20705 }, { "epoch": 0.6045370938074801, "grad_norm": 0.7433622716340946, "learning_rate": 2.197080291970803e-06, "loss": 0.6548, "step": 20706 }, { "epoch": 0.6045662900353275, "grad_norm": 0.7424065460062915, "learning_rate": 2.196918085969181e-06, "loss": 0.6409, "step": 20707 }, { "epoch": 0.6045954862631748, "grad_norm": 0.6614473775370721, "learning_rate": 2.196755879967559e-06, "loss": 0.5699, "step": 20708 }, { "epoch": 0.6046246824910222, "grad_norm": 0.859952340003637, "learning_rate": 2.196593673965937e-06, "loss": 0.8434, "step": 20709 }, { "epoch": 0.6046538787188696, "grad_norm": 0.735546682842283, "learning_rate": 2.1964314679643146e-06, "loss": 0.6346, "step": 20710 }, { "epoch": 0.6046830749467169, "grad_norm": 0.712788505804028, "learning_rate": 2.1962692619626926e-06, "loss": 0.6324, "step": 20711 }, { "epoch": 0.6047122711745643, "grad_norm": 0.7412198550771761, "learning_rate": 2.196107055961071e-06, "loss": 0.6821, "step": 20712 }, { "epoch": 0.6047414674024116, "grad_norm": 0.6988346181128192, "learning_rate": 2.1959448499594486e-06, "loss": 0.5979, "step": 20713 }, { "epoch": 0.604770663630259, "grad_norm": 0.7384305408894413, "learning_rate": 2.1957826439578266e-06, "loss": 0.6355, "step": 20714 }, { "epoch": 0.6047998598581064, "grad_norm": 0.6910395306669492, "learning_rate": 2.1956204379562046e-06, "loss": 0.5906, "step": 20715 }, { "epoch": 0.6048290560859537, "grad_norm": 0.9072811161668533, "learning_rate": 2.1954582319545826e-06, "loss": 0.6928, "step": 20716 }, { "epoch": 0.6048582523138011, "grad_norm": 0.7524313267154733, "learning_rate": 2.1952960259529606e-06, "loss": 0.6802, "step": 20717 }, { "epoch": 0.6048874485416484, "grad_norm": 0.7721646611122054, "learning_rate": 2.195133819951338e-06, "loss": 0.6999, "step": 20718 }, { "epoch": 0.6049166447694958, "grad_norm": 0.7522812811490251, "learning_rate": 2.194971613949716e-06, "loss": 0.6438, "step": 20719 }, { "epoch": 0.6049458409973432, "grad_norm": 0.7450176393379562, "learning_rate": 2.1948094079480942e-06, "loss": 0.6068, "step": 20720 }, { "epoch": 0.6049750372251905, "grad_norm": 0.7943918966115012, "learning_rate": 2.1946472019464722e-06, "loss": 0.6071, "step": 20721 }, { "epoch": 0.6050042334530379, "grad_norm": 0.7622638021740552, "learning_rate": 2.1944849959448502e-06, "loss": 0.679, "step": 20722 }, { "epoch": 0.6050334296808852, "grad_norm": 0.7022589619416051, "learning_rate": 2.1943227899432282e-06, "loss": 0.6128, "step": 20723 }, { "epoch": 0.6050626259087326, "grad_norm": 0.7498773018177923, "learning_rate": 2.1941605839416062e-06, "loss": 0.676, "step": 20724 }, { "epoch": 0.60509182213658, "grad_norm": 0.8043974581731715, "learning_rate": 2.193998377939984e-06, "loss": 0.7206, "step": 20725 }, { "epoch": 0.6051210183644273, "grad_norm": 0.6681012877635176, "learning_rate": 2.193836171938362e-06, "loss": 0.5358, "step": 20726 }, { "epoch": 0.6051502145922747, "grad_norm": 0.715653090050876, "learning_rate": 2.19367396593674e-06, "loss": 0.6475, "step": 20727 }, { "epoch": 0.605179410820122, "grad_norm": 0.7165783839577223, "learning_rate": 2.193511759935118e-06, "loss": 0.6371, "step": 20728 }, { "epoch": 0.6052086070479694, "grad_norm": 0.75913838059941, "learning_rate": 2.1933495539334954e-06, "loss": 0.6116, "step": 20729 }, { "epoch": 0.6052378032758168, "grad_norm": 0.7073772832867659, "learning_rate": 2.1931873479318734e-06, "loss": 0.6346, "step": 20730 }, { "epoch": 0.6052669995036641, "grad_norm": 0.7472018617726585, "learning_rate": 2.193025141930252e-06, "loss": 0.6373, "step": 20731 }, { "epoch": 0.6052961957315115, "grad_norm": 0.6940944213333609, "learning_rate": 2.1928629359286294e-06, "loss": 0.6183, "step": 20732 }, { "epoch": 0.6053253919593589, "grad_norm": 0.723828277895924, "learning_rate": 2.1927007299270075e-06, "loss": 0.621, "step": 20733 }, { "epoch": 0.6053545881872062, "grad_norm": 0.7447442797087178, "learning_rate": 2.1925385239253855e-06, "loss": 0.6839, "step": 20734 }, { "epoch": 0.6053837844150536, "grad_norm": 0.7198668619584733, "learning_rate": 2.1923763179237635e-06, "loss": 0.6286, "step": 20735 }, { "epoch": 0.6054129806429009, "grad_norm": 0.7034131581927032, "learning_rate": 2.1922141119221415e-06, "loss": 0.6161, "step": 20736 }, { "epoch": 0.6054421768707483, "grad_norm": 0.7061864872108349, "learning_rate": 2.192051905920519e-06, "loss": 0.632, "step": 20737 }, { "epoch": 0.6054713730985957, "grad_norm": 0.6884535789452713, "learning_rate": 2.191889699918897e-06, "loss": 0.6078, "step": 20738 }, { "epoch": 0.605500569326443, "grad_norm": 0.7300005630509749, "learning_rate": 2.191727493917275e-06, "loss": 0.6302, "step": 20739 }, { "epoch": 0.6055297655542904, "grad_norm": 0.7445924186884529, "learning_rate": 2.191565287915653e-06, "loss": 0.6535, "step": 20740 }, { "epoch": 0.6055589617821378, "grad_norm": 0.7714934729941849, "learning_rate": 2.191403081914031e-06, "loss": 0.6772, "step": 20741 }, { "epoch": 0.6055881580099851, "grad_norm": 0.7658953459564547, "learning_rate": 2.191240875912409e-06, "loss": 0.6492, "step": 20742 }, { "epoch": 0.6056173542378325, "grad_norm": 0.785025295932139, "learning_rate": 2.191078669910787e-06, "loss": 0.7037, "step": 20743 }, { "epoch": 0.6056465504656798, "grad_norm": 0.7480643506038341, "learning_rate": 2.1909164639091647e-06, "loss": 0.6008, "step": 20744 }, { "epoch": 0.6056757466935272, "grad_norm": 0.6989807683925958, "learning_rate": 2.1907542579075427e-06, "loss": 0.588, "step": 20745 }, { "epoch": 0.6057049429213746, "grad_norm": 0.6875876974800622, "learning_rate": 2.1905920519059207e-06, "loss": 0.6258, "step": 20746 }, { "epoch": 0.6057341391492219, "grad_norm": 0.8468178701060886, "learning_rate": 2.1904298459042987e-06, "loss": 0.7277, "step": 20747 }, { "epoch": 0.6057633353770693, "grad_norm": 0.73241328195468, "learning_rate": 2.1902676399026763e-06, "loss": 0.6399, "step": 20748 }, { "epoch": 0.6057925316049166, "grad_norm": 0.7491900098353979, "learning_rate": 2.1901054339010543e-06, "loss": 0.7366, "step": 20749 }, { "epoch": 0.605821727832764, "grad_norm": 0.7518392733833146, "learning_rate": 2.1899432278994327e-06, "loss": 0.6852, "step": 20750 }, { "epoch": 0.6058509240606114, "grad_norm": 0.734352655339098, "learning_rate": 2.1897810218978103e-06, "loss": 0.6249, "step": 20751 }, { "epoch": 0.6058801202884587, "grad_norm": 0.7083202791711305, "learning_rate": 2.1896188158961883e-06, "loss": 0.6323, "step": 20752 }, { "epoch": 0.6059093165163061, "grad_norm": 0.8071654480189516, "learning_rate": 2.1894566098945663e-06, "loss": 0.6452, "step": 20753 }, { "epoch": 0.6059385127441534, "grad_norm": 0.7146528926401038, "learning_rate": 2.1892944038929443e-06, "loss": 0.6354, "step": 20754 }, { "epoch": 0.6059677089720008, "grad_norm": 0.7451810763192541, "learning_rate": 2.1891321978913223e-06, "loss": 0.6793, "step": 20755 }, { "epoch": 0.6059969051998482, "grad_norm": 0.7321439655630225, "learning_rate": 2.1889699918897e-06, "loss": 0.6549, "step": 20756 }, { "epoch": 0.6060261014276955, "grad_norm": 0.6419021532338336, "learning_rate": 2.188807785888078e-06, "loss": 0.5025, "step": 20757 }, { "epoch": 0.6060552976555429, "grad_norm": 0.7319775064593635, "learning_rate": 2.188645579886456e-06, "loss": 0.656, "step": 20758 }, { "epoch": 0.6060844938833903, "grad_norm": 0.6885515454954206, "learning_rate": 2.188483373884834e-06, "loss": 0.6439, "step": 20759 }, { "epoch": 0.6061136901112376, "grad_norm": 0.7332624672136976, "learning_rate": 2.188321167883212e-06, "loss": 0.6423, "step": 20760 }, { "epoch": 0.606142886339085, "grad_norm": 0.7325172041671315, "learning_rate": 2.18815896188159e-06, "loss": 0.6273, "step": 20761 }, { "epoch": 0.6061720825669323, "grad_norm": 0.7408488066945823, "learning_rate": 2.187996755879968e-06, "loss": 0.623, "step": 20762 }, { "epoch": 0.6062012787947797, "grad_norm": 0.7505766111286885, "learning_rate": 2.1878345498783455e-06, "loss": 0.706, "step": 20763 }, { "epoch": 0.6062304750226271, "grad_norm": 0.7769075536586232, "learning_rate": 2.1876723438767235e-06, "loss": 0.5779, "step": 20764 }, { "epoch": 0.6062596712504744, "grad_norm": 0.7191042257101488, "learning_rate": 2.1875101378751016e-06, "loss": 0.6076, "step": 20765 }, { "epoch": 0.6062888674783218, "grad_norm": 0.742034618683683, "learning_rate": 2.1873479318734796e-06, "loss": 0.7084, "step": 20766 }, { "epoch": 0.6063180637061691, "grad_norm": 0.7581684991852712, "learning_rate": 2.187185725871857e-06, "loss": 0.7186, "step": 20767 }, { "epoch": 0.6063472599340165, "grad_norm": 0.6989073588265275, "learning_rate": 2.187023519870235e-06, "loss": 0.5985, "step": 20768 }, { "epoch": 0.6063764561618639, "grad_norm": 0.6994227302819724, "learning_rate": 2.1868613138686136e-06, "loss": 0.6116, "step": 20769 }, { "epoch": 0.6064056523897112, "grad_norm": 0.7290849115348581, "learning_rate": 2.186699107866991e-06, "loss": 0.6788, "step": 20770 }, { "epoch": 0.6064348486175586, "grad_norm": 0.7313577259403696, "learning_rate": 2.186536901865369e-06, "loss": 0.6639, "step": 20771 }, { "epoch": 0.606464044845406, "grad_norm": 0.7362262726424521, "learning_rate": 2.186374695863747e-06, "loss": 0.657, "step": 20772 }, { "epoch": 0.6064932410732533, "grad_norm": 0.7071246229797548, "learning_rate": 2.186212489862125e-06, "loss": 0.6247, "step": 20773 }, { "epoch": 0.6065224373011007, "grad_norm": 0.7584370374460889, "learning_rate": 2.186050283860503e-06, "loss": 0.6362, "step": 20774 }, { "epoch": 0.606551633528948, "grad_norm": 0.7496651643875858, "learning_rate": 2.1858880778588808e-06, "loss": 0.6628, "step": 20775 }, { "epoch": 0.6065808297567954, "grad_norm": 0.6716077718702104, "learning_rate": 2.1857258718572588e-06, "loss": 0.5815, "step": 20776 }, { "epoch": 0.6066100259846428, "grad_norm": 0.7727767208347263, "learning_rate": 2.1855636658556368e-06, "loss": 0.6359, "step": 20777 }, { "epoch": 0.6066392222124901, "grad_norm": 0.7033522541055337, "learning_rate": 2.185401459854015e-06, "loss": 0.619, "step": 20778 }, { "epoch": 0.6066684184403375, "grad_norm": 0.7104635566314361, "learning_rate": 2.185239253852393e-06, "loss": 0.6098, "step": 20779 }, { "epoch": 0.6066976146681848, "grad_norm": 0.7950122976940199, "learning_rate": 2.185077047850771e-06, "loss": 0.7207, "step": 20780 }, { "epoch": 0.6067268108960322, "grad_norm": 0.6952629230115599, "learning_rate": 2.184914841849149e-06, "loss": 0.6205, "step": 20781 }, { "epoch": 0.6067560071238796, "grad_norm": 0.6886192281934672, "learning_rate": 2.1847526358475264e-06, "loss": 0.6356, "step": 20782 }, { "epoch": 0.6067852033517269, "grad_norm": 0.7277313168983652, "learning_rate": 2.1845904298459044e-06, "loss": 0.634, "step": 20783 }, { "epoch": 0.6068143995795743, "grad_norm": 0.7178294857006171, "learning_rate": 2.1844282238442824e-06, "loss": 0.57, "step": 20784 }, { "epoch": 0.6068435958074216, "grad_norm": 0.6665829359226706, "learning_rate": 2.1842660178426604e-06, "loss": 0.5274, "step": 20785 }, { "epoch": 0.606872792035269, "grad_norm": 0.7763189143809267, "learning_rate": 2.184103811841038e-06, "loss": 0.6966, "step": 20786 }, { "epoch": 0.6069019882631164, "grad_norm": 0.7328586487821773, "learning_rate": 2.183941605839416e-06, "loss": 0.6436, "step": 20787 }, { "epoch": 0.6069311844909637, "grad_norm": 0.6956476570126763, "learning_rate": 2.1837793998377944e-06, "loss": 0.6541, "step": 20788 }, { "epoch": 0.6069603807188111, "grad_norm": 0.7489773548983172, "learning_rate": 2.183617193836172e-06, "loss": 0.6318, "step": 20789 }, { "epoch": 0.6069895769466584, "grad_norm": 0.7383216007089245, "learning_rate": 2.18345498783455e-06, "loss": 0.7043, "step": 20790 }, { "epoch": 0.6070187731745058, "grad_norm": 0.7087298013054205, "learning_rate": 2.183292781832928e-06, "loss": 0.6589, "step": 20791 }, { "epoch": 0.6070479694023532, "grad_norm": 0.8000927507866944, "learning_rate": 2.183130575831306e-06, "loss": 0.6559, "step": 20792 }, { "epoch": 0.6070771656302005, "grad_norm": 0.7497792079435733, "learning_rate": 2.182968369829684e-06, "loss": 0.6803, "step": 20793 }, { "epoch": 0.6071063618580479, "grad_norm": 0.7132467737126571, "learning_rate": 2.1828061638280616e-06, "loss": 0.6885, "step": 20794 }, { "epoch": 0.6071355580858953, "grad_norm": 0.707734111577774, "learning_rate": 2.1826439578264396e-06, "loss": 0.5824, "step": 20795 }, { "epoch": 0.6071647543137426, "grad_norm": 0.7550464450216876, "learning_rate": 2.1824817518248176e-06, "loss": 0.73, "step": 20796 }, { "epoch": 0.60719395054159, "grad_norm": 0.7187746957954191, "learning_rate": 2.1823195458231957e-06, "loss": 0.5836, "step": 20797 }, { "epoch": 0.6072231467694373, "grad_norm": 0.6359309951708203, "learning_rate": 2.1821573398215737e-06, "loss": 0.528, "step": 20798 }, { "epoch": 0.6072523429972847, "grad_norm": 0.6993868148746853, "learning_rate": 2.1819951338199517e-06, "loss": 0.5837, "step": 20799 }, { "epoch": 0.6072815392251321, "grad_norm": 0.7542514079966297, "learning_rate": 2.1818329278183297e-06, "loss": 0.6395, "step": 20800 }, { "epoch": 0.6073107354529794, "grad_norm": 0.6954361196635321, "learning_rate": 2.1816707218167073e-06, "loss": 0.6154, "step": 20801 }, { "epoch": 0.6073399316808268, "grad_norm": 0.7142999970626784, "learning_rate": 2.1815085158150853e-06, "loss": 0.615, "step": 20802 }, { "epoch": 0.6073691279086741, "grad_norm": 0.7239443727844658, "learning_rate": 2.1813463098134633e-06, "loss": 0.6346, "step": 20803 }, { "epoch": 0.6073983241365216, "grad_norm": 0.716756764876402, "learning_rate": 2.1811841038118413e-06, "loss": 0.6439, "step": 20804 }, { "epoch": 0.607427520364369, "grad_norm": 0.7209803178289997, "learning_rate": 2.181021897810219e-06, "loss": 0.6532, "step": 20805 }, { "epoch": 0.6074567165922163, "grad_norm": 0.7032891701864197, "learning_rate": 2.180859691808597e-06, "loss": 0.5962, "step": 20806 }, { "epoch": 0.6074859128200637, "grad_norm": 0.7532488729238731, "learning_rate": 2.1806974858069753e-06, "loss": 0.6848, "step": 20807 }, { "epoch": 0.6075151090479111, "grad_norm": 0.7501786389922399, "learning_rate": 2.180535279805353e-06, "loss": 0.6475, "step": 20808 }, { "epoch": 0.6075443052757584, "grad_norm": 0.7430507460541929, "learning_rate": 2.180373073803731e-06, "loss": 0.7181, "step": 20809 }, { "epoch": 0.6075735015036058, "grad_norm": 0.7686748983385011, "learning_rate": 2.180210867802109e-06, "loss": 0.6796, "step": 20810 }, { "epoch": 0.6076026977314531, "grad_norm": 0.6701969845952841, "learning_rate": 2.180048661800487e-06, "loss": 0.6266, "step": 20811 }, { "epoch": 0.6076318939593005, "grad_norm": 0.7478855277153406, "learning_rate": 2.179886455798865e-06, "loss": 0.6667, "step": 20812 }, { "epoch": 0.6076610901871479, "grad_norm": 0.7939039690348159, "learning_rate": 2.1797242497972425e-06, "loss": 0.7388, "step": 20813 }, { "epoch": 0.6076902864149952, "grad_norm": 0.7442586792387271, "learning_rate": 2.1795620437956205e-06, "loss": 0.6662, "step": 20814 }, { "epoch": 0.6077194826428426, "grad_norm": 0.7198145102232109, "learning_rate": 2.1793998377939985e-06, "loss": 0.6854, "step": 20815 }, { "epoch": 0.60774867887069, "grad_norm": 0.7278053347253253, "learning_rate": 2.1792376317923765e-06, "loss": 0.6607, "step": 20816 }, { "epoch": 0.6077778750985373, "grad_norm": 0.7122055648802492, "learning_rate": 2.1790754257907545e-06, "loss": 0.6118, "step": 20817 }, { "epoch": 0.6078070713263847, "grad_norm": 0.6618334427570288, "learning_rate": 2.1789132197891325e-06, "loss": 0.5286, "step": 20818 }, { "epoch": 0.607836267554232, "grad_norm": 0.7176549426408434, "learning_rate": 2.1787510137875105e-06, "loss": 0.6451, "step": 20819 }, { "epoch": 0.6078654637820794, "grad_norm": 0.6786576670135421, "learning_rate": 2.178588807785888e-06, "loss": 0.6033, "step": 20820 }, { "epoch": 0.6078946600099268, "grad_norm": 0.669726271281048, "learning_rate": 2.178426601784266e-06, "loss": 0.5731, "step": 20821 }, { "epoch": 0.6079238562377741, "grad_norm": 0.7393075232320507, "learning_rate": 2.178264395782644e-06, "loss": 0.6298, "step": 20822 }, { "epoch": 0.6079530524656215, "grad_norm": 0.721135801983558, "learning_rate": 2.178102189781022e-06, "loss": 0.6492, "step": 20823 }, { "epoch": 0.6079822486934688, "grad_norm": 0.6758930382044037, "learning_rate": 2.1779399837793997e-06, "loss": 0.5978, "step": 20824 }, { "epoch": 0.6080114449213162, "grad_norm": 0.7514842960507568, "learning_rate": 2.1777777777777777e-06, "loss": 0.69, "step": 20825 }, { "epoch": 0.6080406411491636, "grad_norm": 0.918931300286834, "learning_rate": 2.177615571776156e-06, "loss": 0.6687, "step": 20826 }, { "epoch": 0.6080698373770109, "grad_norm": 0.7409454059432445, "learning_rate": 2.1774533657745337e-06, "loss": 0.6089, "step": 20827 }, { "epoch": 0.6080990336048583, "grad_norm": 0.8083945901475795, "learning_rate": 2.1772911597729117e-06, "loss": 0.7123, "step": 20828 }, { "epoch": 0.6081282298327056, "grad_norm": 0.7185525707742261, "learning_rate": 2.1771289537712898e-06, "loss": 0.6455, "step": 20829 }, { "epoch": 0.608157426060553, "grad_norm": 0.6843956220204092, "learning_rate": 2.1769667477696678e-06, "loss": 0.6436, "step": 20830 }, { "epoch": 0.6081866222884004, "grad_norm": 0.7453687074645439, "learning_rate": 2.1768045417680458e-06, "loss": 0.715, "step": 20831 }, { "epoch": 0.6082158185162477, "grad_norm": 0.7543443109722913, "learning_rate": 2.1766423357664234e-06, "loss": 0.6955, "step": 20832 }, { "epoch": 0.6082450147440951, "grad_norm": 0.7752972126305783, "learning_rate": 2.1764801297648014e-06, "loss": 0.7048, "step": 20833 }, { "epoch": 0.6082742109719425, "grad_norm": 0.7600659272274236, "learning_rate": 2.1763179237631794e-06, "loss": 0.7075, "step": 20834 }, { "epoch": 0.6083034071997898, "grad_norm": 0.7586389953099951, "learning_rate": 2.1761557177615574e-06, "loss": 0.525, "step": 20835 }, { "epoch": 0.6083326034276372, "grad_norm": 0.7822009415376179, "learning_rate": 2.1759935117599354e-06, "loss": 0.7425, "step": 20836 }, { "epoch": 0.6083617996554845, "grad_norm": 0.75144453767852, "learning_rate": 2.1758313057583134e-06, "loss": 0.6792, "step": 20837 }, { "epoch": 0.6083909958833319, "grad_norm": 0.7555918410714151, "learning_rate": 2.1756690997566914e-06, "loss": 0.7538, "step": 20838 }, { "epoch": 0.6084201921111793, "grad_norm": 0.750346163059029, "learning_rate": 2.175506893755069e-06, "loss": 0.6893, "step": 20839 }, { "epoch": 0.6084493883390266, "grad_norm": 0.7062301318286266, "learning_rate": 2.175344687753447e-06, "loss": 0.6083, "step": 20840 }, { "epoch": 0.608478584566874, "grad_norm": 0.7587720766490078, "learning_rate": 2.175182481751825e-06, "loss": 0.6959, "step": 20841 }, { "epoch": 0.6085077807947213, "grad_norm": 0.7153837512029535, "learning_rate": 2.175020275750203e-06, "loss": 0.5828, "step": 20842 }, { "epoch": 0.6085369770225687, "grad_norm": 0.7147753936702695, "learning_rate": 2.1748580697485806e-06, "loss": 0.598, "step": 20843 }, { "epoch": 0.6085661732504161, "grad_norm": 0.7247566530600653, "learning_rate": 2.1746958637469586e-06, "loss": 0.6643, "step": 20844 }, { "epoch": 0.6085953694782634, "grad_norm": 0.7897136551840326, "learning_rate": 2.174533657745337e-06, "loss": 0.7885, "step": 20845 }, { "epoch": 0.6086245657061108, "grad_norm": 0.7074170053568308, "learning_rate": 2.1743714517437146e-06, "loss": 0.6548, "step": 20846 }, { "epoch": 0.6086537619339581, "grad_norm": 0.7053109435726899, "learning_rate": 2.1742092457420926e-06, "loss": 0.5852, "step": 20847 }, { "epoch": 0.6086829581618055, "grad_norm": 0.7854168132456423, "learning_rate": 2.1740470397404706e-06, "loss": 0.7533, "step": 20848 }, { "epoch": 0.6087121543896529, "grad_norm": 0.8031699342594979, "learning_rate": 2.1738848337388486e-06, "loss": 0.715, "step": 20849 }, { "epoch": 0.6087413506175002, "grad_norm": 0.7077855183969164, "learning_rate": 2.1737226277372266e-06, "loss": 0.6645, "step": 20850 }, { "epoch": 0.6087705468453476, "grad_norm": 0.7014351866197855, "learning_rate": 2.173560421735604e-06, "loss": 0.6107, "step": 20851 }, { "epoch": 0.608799743073195, "grad_norm": 0.706823635003983, "learning_rate": 2.1733982157339822e-06, "loss": 0.6187, "step": 20852 }, { "epoch": 0.6088289393010423, "grad_norm": 0.728987054892766, "learning_rate": 2.1732360097323602e-06, "loss": 0.6149, "step": 20853 }, { "epoch": 0.6088581355288897, "grad_norm": 0.7231711123225744, "learning_rate": 2.1730738037307382e-06, "loss": 0.6392, "step": 20854 }, { "epoch": 0.608887331756737, "grad_norm": 0.7037791497879414, "learning_rate": 2.1729115977291162e-06, "loss": 0.5826, "step": 20855 }, { "epoch": 0.6089165279845844, "grad_norm": 0.7046047451142773, "learning_rate": 2.1727493917274942e-06, "loss": 0.6119, "step": 20856 }, { "epoch": 0.6089457242124318, "grad_norm": 0.7323277589158346, "learning_rate": 2.1725871857258722e-06, "loss": 0.657, "step": 20857 }, { "epoch": 0.6089749204402791, "grad_norm": 0.7791451924815576, "learning_rate": 2.17242497972425e-06, "loss": 0.7154, "step": 20858 }, { "epoch": 0.6090041166681265, "grad_norm": 0.7680974537899387, "learning_rate": 2.172262773722628e-06, "loss": 0.7113, "step": 20859 }, { "epoch": 0.6090333128959738, "grad_norm": 0.6464555966724469, "learning_rate": 2.172100567721006e-06, "loss": 0.5099, "step": 20860 }, { "epoch": 0.6090625091238212, "grad_norm": 0.6964301144363672, "learning_rate": 2.171938361719384e-06, "loss": 0.5825, "step": 20861 }, { "epoch": 0.6090917053516686, "grad_norm": 0.738735076149393, "learning_rate": 2.1717761557177614e-06, "loss": 0.6916, "step": 20862 }, { "epoch": 0.6091209015795159, "grad_norm": 0.7539587619726433, "learning_rate": 2.17161394971614e-06, "loss": 0.663, "step": 20863 }, { "epoch": 0.6091500978073633, "grad_norm": 0.7388724859704492, "learning_rate": 2.171451743714518e-06, "loss": 0.6408, "step": 20864 }, { "epoch": 0.6091792940352107, "grad_norm": 0.7658845132189946, "learning_rate": 2.1712895377128955e-06, "loss": 0.7034, "step": 20865 }, { "epoch": 0.609208490263058, "grad_norm": 0.6938084616493021, "learning_rate": 2.1711273317112735e-06, "loss": 0.6085, "step": 20866 }, { "epoch": 0.6092376864909054, "grad_norm": 0.6380036234339151, "learning_rate": 2.1709651257096515e-06, "loss": 0.5348, "step": 20867 }, { "epoch": 0.6092668827187527, "grad_norm": 0.8122198107512562, "learning_rate": 2.1708029197080295e-06, "loss": 0.6904, "step": 20868 }, { "epoch": 0.6092960789466001, "grad_norm": 0.6979132644816752, "learning_rate": 2.1706407137064075e-06, "loss": 0.6323, "step": 20869 }, { "epoch": 0.6093252751744475, "grad_norm": 0.7183985798039974, "learning_rate": 2.170478507704785e-06, "loss": 0.644, "step": 20870 }, { "epoch": 0.6093544714022948, "grad_norm": 0.7135727868306813, "learning_rate": 2.170316301703163e-06, "loss": 0.6063, "step": 20871 }, { "epoch": 0.6093836676301422, "grad_norm": 0.7234539643493815, "learning_rate": 2.170154095701541e-06, "loss": 0.6134, "step": 20872 }, { "epoch": 0.6094128638579895, "grad_norm": 0.7440577756668314, "learning_rate": 2.169991889699919e-06, "loss": 0.6761, "step": 20873 }, { "epoch": 0.6094420600858369, "grad_norm": 0.7013776939287287, "learning_rate": 2.169829683698297e-06, "loss": 0.6151, "step": 20874 }, { "epoch": 0.6094712563136843, "grad_norm": 0.6667822447824955, "learning_rate": 2.169667477696675e-06, "loss": 0.6034, "step": 20875 }, { "epoch": 0.6095004525415316, "grad_norm": 0.7509567080559382, "learning_rate": 2.169505271695053e-06, "loss": 0.7081, "step": 20876 }, { "epoch": 0.609529648769379, "grad_norm": 0.6952789958522768, "learning_rate": 2.1693430656934307e-06, "loss": 0.6184, "step": 20877 }, { "epoch": 0.6095588449972263, "grad_norm": 0.723819746590668, "learning_rate": 2.1691808596918087e-06, "loss": 0.6243, "step": 20878 }, { "epoch": 0.6095880412250737, "grad_norm": 0.7075338725892226, "learning_rate": 2.1690186536901867e-06, "loss": 0.6136, "step": 20879 }, { "epoch": 0.6096172374529211, "grad_norm": 0.740387461961862, "learning_rate": 2.1688564476885647e-06, "loss": 0.5917, "step": 20880 }, { "epoch": 0.6096464336807684, "grad_norm": 0.7460765859071851, "learning_rate": 2.1686942416869423e-06, "loss": 0.6518, "step": 20881 }, { "epoch": 0.6096756299086158, "grad_norm": 0.7551545266481695, "learning_rate": 2.1685320356853207e-06, "loss": 0.7169, "step": 20882 }, { "epoch": 0.6097048261364632, "grad_norm": 0.7424029563223437, "learning_rate": 2.1683698296836987e-06, "loss": 0.6243, "step": 20883 }, { "epoch": 0.6097340223643105, "grad_norm": 0.704350788331494, "learning_rate": 2.1682076236820763e-06, "loss": 0.5907, "step": 20884 }, { "epoch": 0.6097632185921579, "grad_norm": 0.7190197967926468, "learning_rate": 2.1680454176804543e-06, "loss": 0.5677, "step": 20885 }, { "epoch": 0.6097924148200052, "grad_norm": 0.7060865199010967, "learning_rate": 2.1678832116788323e-06, "loss": 0.5998, "step": 20886 }, { "epoch": 0.6098216110478526, "grad_norm": 0.8021562882934395, "learning_rate": 2.1677210056772103e-06, "loss": 0.7071, "step": 20887 }, { "epoch": 0.6098508072757, "grad_norm": 0.8078949455382405, "learning_rate": 2.1675587996755883e-06, "loss": 0.7161, "step": 20888 }, { "epoch": 0.6098800035035473, "grad_norm": 0.7770329044080313, "learning_rate": 2.167396593673966e-06, "loss": 0.7228, "step": 20889 }, { "epoch": 0.6099091997313947, "grad_norm": 0.7478427063434246, "learning_rate": 2.167234387672344e-06, "loss": 0.6772, "step": 20890 }, { "epoch": 0.609938395959242, "grad_norm": 0.7051365384915965, "learning_rate": 2.167072181670722e-06, "loss": 0.6225, "step": 20891 }, { "epoch": 0.6099675921870894, "grad_norm": 0.689958410819739, "learning_rate": 2.1669099756691e-06, "loss": 0.5735, "step": 20892 }, { "epoch": 0.6099967884149368, "grad_norm": 0.739728287543035, "learning_rate": 2.166747769667478e-06, "loss": 0.6761, "step": 20893 }, { "epoch": 0.6100259846427841, "grad_norm": 0.733086503411605, "learning_rate": 2.166585563665856e-06, "loss": 0.6486, "step": 20894 }, { "epoch": 0.6100551808706315, "grad_norm": 0.7763089840891906, "learning_rate": 2.166423357664234e-06, "loss": 0.7344, "step": 20895 }, { "epoch": 0.6100843770984788, "grad_norm": 0.7295960359625605, "learning_rate": 2.1662611516626116e-06, "loss": 0.5882, "step": 20896 }, { "epoch": 0.6101135733263262, "grad_norm": 0.7022294889653213, "learning_rate": 2.1660989456609896e-06, "loss": 0.6317, "step": 20897 }, { "epoch": 0.6101427695541736, "grad_norm": 0.7278188359908447, "learning_rate": 2.1659367396593676e-06, "loss": 0.6217, "step": 20898 }, { "epoch": 0.6101719657820209, "grad_norm": 0.7137713813455068, "learning_rate": 2.1657745336577456e-06, "loss": 0.6329, "step": 20899 }, { "epoch": 0.6102011620098683, "grad_norm": 0.6898680665893868, "learning_rate": 2.165612327656123e-06, "loss": 0.6388, "step": 20900 }, { "epoch": 0.6102303582377157, "grad_norm": 0.7294496686879925, "learning_rate": 2.1654501216545016e-06, "loss": 0.6735, "step": 20901 }, { "epoch": 0.610259554465563, "grad_norm": 0.7820543267737734, "learning_rate": 2.1652879156528796e-06, "loss": 0.6915, "step": 20902 }, { "epoch": 0.6102887506934104, "grad_norm": 0.7581183515899431, "learning_rate": 2.165125709651257e-06, "loss": 0.7162, "step": 20903 }, { "epoch": 0.6103179469212577, "grad_norm": 0.7182347448398448, "learning_rate": 2.164963503649635e-06, "loss": 0.6231, "step": 20904 }, { "epoch": 0.6103471431491051, "grad_norm": 0.7033820605422424, "learning_rate": 2.164801297648013e-06, "loss": 0.6568, "step": 20905 }, { "epoch": 0.6103763393769525, "grad_norm": 0.7151574580116032, "learning_rate": 2.164639091646391e-06, "loss": 0.6017, "step": 20906 }, { "epoch": 0.6104055356047998, "grad_norm": 0.7422870464700778, "learning_rate": 2.1644768856447688e-06, "loss": 0.6621, "step": 20907 }, { "epoch": 0.6104347318326472, "grad_norm": 0.7244789333871571, "learning_rate": 2.1643146796431468e-06, "loss": 0.6619, "step": 20908 }, { "epoch": 0.6104639280604945, "grad_norm": 0.7482961149835475, "learning_rate": 2.164152473641525e-06, "loss": 0.7023, "step": 20909 }, { "epoch": 0.6104931242883419, "grad_norm": 0.7900031546587839, "learning_rate": 2.163990267639903e-06, "loss": 0.7239, "step": 20910 }, { "epoch": 0.6105223205161893, "grad_norm": 0.7620142673446958, "learning_rate": 2.163828061638281e-06, "loss": 0.6744, "step": 20911 }, { "epoch": 0.6105515167440366, "grad_norm": 0.7819088600661198, "learning_rate": 2.163665855636659e-06, "loss": 0.7078, "step": 20912 }, { "epoch": 0.610580712971884, "grad_norm": 0.7163793903541629, "learning_rate": 2.163503649635037e-06, "loss": 0.6349, "step": 20913 }, { "epoch": 0.6106099091997313, "grad_norm": 0.7421994641766658, "learning_rate": 2.163341443633415e-06, "loss": 0.656, "step": 20914 }, { "epoch": 0.6106391054275787, "grad_norm": 0.754514732289459, "learning_rate": 2.1631792376317924e-06, "loss": 0.6645, "step": 20915 }, { "epoch": 0.6106683016554261, "grad_norm": 0.7710144167451374, "learning_rate": 2.1630170316301704e-06, "loss": 0.6841, "step": 20916 }, { "epoch": 0.6106974978832734, "grad_norm": 0.6979779185177546, "learning_rate": 2.1628548256285484e-06, "loss": 0.6158, "step": 20917 }, { "epoch": 0.6107266941111208, "grad_norm": 0.8109256934263446, "learning_rate": 2.1626926196269264e-06, "loss": 0.78, "step": 20918 }, { "epoch": 0.6107558903389682, "grad_norm": 0.6978881826648873, "learning_rate": 2.162530413625304e-06, "loss": 0.6026, "step": 20919 }, { "epoch": 0.6107850865668155, "grad_norm": 0.7029304686307284, "learning_rate": 2.1623682076236824e-06, "loss": 0.6089, "step": 20920 }, { "epoch": 0.6108142827946629, "grad_norm": 0.7380370526335591, "learning_rate": 2.1622060016220604e-06, "loss": 0.668, "step": 20921 }, { "epoch": 0.6108434790225102, "grad_norm": 0.7935699869174937, "learning_rate": 2.162043795620438e-06, "loss": 0.7718, "step": 20922 }, { "epoch": 0.6108726752503576, "grad_norm": 0.7422618829035864, "learning_rate": 2.161881589618816e-06, "loss": 0.6725, "step": 20923 }, { "epoch": 0.610901871478205, "grad_norm": 0.7400911702544424, "learning_rate": 2.161719383617194e-06, "loss": 0.6803, "step": 20924 }, { "epoch": 0.6109310677060524, "grad_norm": 0.7315545031599352, "learning_rate": 2.161557177615572e-06, "loss": 0.6635, "step": 20925 }, { "epoch": 0.6109602639338998, "grad_norm": 0.6982723090339886, "learning_rate": 2.1613949716139496e-06, "loss": 0.5895, "step": 20926 }, { "epoch": 0.6109894601617472, "grad_norm": 0.6814315540665347, "learning_rate": 2.1612327656123276e-06, "loss": 0.5949, "step": 20927 }, { "epoch": 0.6110186563895945, "grad_norm": 0.7588830837382516, "learning_rate": 2.1610705596107057e-06, "loss": 0.6919, "step": 20928 }, { "epoch": 0.6110478526174419, "grad_norm": 0.7458935352897184, "learning_rate": 2.1609083536090837e-06, "loss": 0.6571, "step": 20929 }, { "epoch": 0.6110770488452892, "grad_norm": 0.7272300077140796, "learning_rate": 2.1607461476074617e-06, "loss": 0.6589, "step": 20930 }, { "epoch": 0.6111062450731366, "grad_norm": 0.7188166048498601, "learning_rate": 2.1605839416058397e-06, "loss": 0.6182, "step": 20931 }, { "epoch": 0.611135441300984, "grad_norm": 0.63091469801906, "learning_rate": 2.1604217356042177e-06, "loss": 0.4854, "step": 20932 }, { "epoch": 0.6111646375288313, "grad_norm": 0.7512633132434368, "learning_rate": 2.1602595296025957e-06, "loss": 0.6529, "step": 20933 }, { "epoch": 0.6111938337566787, "grad_norm": 0.7434456126858219, "learning_rate": 2.1600973236009733e-06, "loss": 0.6736, "step": 20934 }, { "epoch": 0.611223029984526, "grad_norm": 0.6972797601933022, "learning_rate": 2.1599351175993513e-06, "loss": 0.6368, "step": 20935 }, { "epoch": 0.6112522262123734, "grad_norm": 0.6950967683111783, "learning_rate": 2.1597729115977293e-06, "loss": 0.5774, "step": 20936 }, { "epoch": 0.6112814224402208, "grad_norm": 0.7467279338011784, "learning_rate": 2.1596107055961073e-06, "loss": 0.5829, "step": 20937 }, { "epoch": 0.6113106186680681, "grad_norm": 0.7498564446544755, "learning_rate": 2.159448499594485e-06, "loss": 0.6911, "step": 20938 }, { "epoch": 0.6113398148959155, "grad_norm": 0.759875833527504, "learning_rate": 2.1592862935928633e-06, "loss": 0.6275, "step": 20939 }, { "epoch": 0.6113690111237629, "grad_norm": 0.7166185142432279, "learning_rate": 2.1591240875912413e-06, "loss": 0.6387, "step": 20940 }, { "epoch": 0.6113982073516102, "grad_norm": 0.7536899980544242, "learning_rate": 2.158961881589619e-06, "loss": 0.634, "step": 20941 }, { "epoch": 0.6114274035794576, "grad_norm": 0.6815178121077059, "learning_rate": 2.158799675587997e-06, "loss": 0.5627, "step": 20942 }, { "epoch": 0.6114565998073049, "grad_norm": 0.6508333458294989, "learning_rate": 2.158637469586375e-06, "loss": 0.5137, "step": 20943 }, { "epoch": 0.6114857960351523, "grad_norm": 0.73230055291423, "learning_rate": 2.158475263584753e-06, "loss": 0.626, "step": 20944 }, { "epoch": 0.6115149922629997, "grad_norm": 0.6712415973157005, "learning_rate": 2.1583130575831305e-06, "loss": 0.5999, "step": 20945 }, { "epoch": 0.611544188490847, "grad_norm": 0.7032901954874037, "learning_rate": 2.1581508515815085e-06, "loss": 0.594, "step": 20946 }, { "epoch": 0.6115733847186944, "grad_norm": 0.6807430488199882, "learning_rate": 2.1579886455798865e-06, "loss": 0.5847, "step": 20947 }, { "epoch": 0.6116025809465417, "grad_norm": 0.7080451945722075, "learning_rate": 2.1578264395782645e-06, "loss": 0.6472, "step": 20948 }, { "epoch": 0.6116317771743891, "grad_norm": 0.749048319576075, "learning_rate": 2.1576642335766425e-06, "loss": 0.7048, "step": 20949 }, { "epoch": 0.6116609734022365, "grad_norm": 0.7454251856516089, "learning_rate": 2.1575020275750205e-06, "loss": 0.7195, "step": 20950 }, { "epoch": 0.6116901696300838, "grad_norm": 0.7146529409222829, "learning_rate": 2.1573398215733985e-06, "loss": 0.6056, "step": 20951 }, { "epoch": 0.6117193658579312, "grad_norm": 0.7150911798891318, "learning_rate": 2.1571776155717765e-06, "loss": 0.5888, "step": 20952 }, { "epoch": 0.6117485620857785, "grad_norm": 0.7683049412508595, "learning_rate": 2.157015409570154e-06, "loss": 0.682, "step": 20953 }, { "epoch": 0.6117777583136259, "grad_norm": 0.7300540617305026, "learning_rate": 2.156853203568532e-06, "loss": 0.6457, "step": 20954 }, { "epoch": 0.6118069545414733, "grad_norm": 0.7882604566445035, "learning_rate": 2.15669099756691e-06, "loss": 0.6778, "step": 20955 }, { "epoch": 0.6118361507693206, "grad_norm": 0.725407137728465, "learning_rate": 2.156528791565288e-06, "loss": 0.6966, "step": 20956 }, { "epoch": 0.611865346997168, "grad_norm": 0.7662813658591271, "learning_rate": 2.1563665855636657e-06, "loss": 0.6825, "step": 20957 }, { "epoch": 0.6118945432250154, "grad_norm": 0.6985060290004143, "learning_rate": 2.156204379562044e-06, "loss": 0.5541, "step": 20958 }, { "epoch": 0.6119237394528627, "grad_norm": 0.8425378901298091, "learning_rate": 2.156042173560422e-06, "loss": 0.6382, "step": 20959 }, { "epoch": 0.6119529356807101, "grad_norm": 0.7031657993597468, "learning_rate": 2.1558799675587998e-06, "loss": 0.6099, "step": 20960 }, { "epoch": 0.6119821319085574, "grad_norm": 0.6979446833124098, "learning_rate": 2.1557177615571778e-06, "loss": 0.6094, "step": 20961 }, { "epoch": 0.6120113281364048, "grad_norm": 0.772754537451266, "learning_rate": 2.1555555555555558e-06, "loss": 0.6461, "step": 20962 }, { "epoch": 0.6120405243642522, "grad_norm": 0.6941297006465058, "learning_rate": 2.1553933495539338e-06, "loss": 0.6133, "step": 20963 }, { "epoch": 0.6120697205920995, "grad_norm": 0.7148510576240934, "learning_rate": 2.1552311435523114e-06, "loss": 0.6241, "step": 20964 }, { "epoch": 0.6120989168199469, "grad_norm": 0.6994671699565906, "learning_rate": 2.1550689375506894e-06, "loss": 0.6, "step": 20965 }, { "epoch": 0.6121281130477942, "grad_norm": 0.6888005638923632, "learning_rate": 2.1549067315490674e-06, "loss": 0.6168, "step": 20966 }, { "epoch": 0.6121573092756416, "grad_norm": 0.7387648369877062, "learning_rate": 2.1547445255474454e-06, "loss": 0.6772, "step": 20967 }, { "epoch": 0.612186505503489, "grad_norm": 0.7404318821082442, "learning_rate": 2.1545823195458234e-06, "loss": 0.6358, "step": 20968 }, { "epoch": 0.6122157017313363, "grad_norm": 0.7418737301733311, "learning_rate": 2.1544201135442014e-06, "loss": 0.6719, "step": 20969 }, { "epoch": 0.6122448979591837, "grad_norm": 0.7006934805258211, "learning_rate": 2.1542579075425794e-06, "loss": 0.6179, "step": 20970 }, { "epoch": 0.612274094187031, "grad_norm": 0.7589156221153375, "learning_rate": 2.1540957015409574e-06, "loss": 0.7454, "step": 20971 }, { "epoch": 0.6123032904148784, "grad_norm": 0.7806420768174397, "learning_rate": 2.153933495539335e-06, "loss": 0.6281, "step": 20972 }, { "epoch": 0.6123324866427258, "grad_norm": 0.7286670552439982, "learning_rate": 2.153771289537713e-06, "loss": 0.6566, "step": 20973 }, { "epoch": 0.6123616828705731, "grad_norm": 0.7454061152221816, "learning_rate": 2.153609083536091e-06, "loss": 0.6354, "step": 20974 }, { "epoch": 0.6123908790984205, "grad_norm": 0.7005821329156677, "learning_rate": 2.153446877534469e-06, "loss": 0.6633, "step": 20975 }, { "epoch": 0.6124200753262679, "grad_norm": 0.7452074239395975, "learning_rate": 2.1532846715328466e-06, "loss": 0.7166, "step": 20976 }, { "epoch": 0.6124492715541152, "grad_norm": 0.6997682183699289, "learning_rate": 2.153122465531225e-06, "loss": 0.6437, "step": 20977 }, { "epoch": 0.6124784677819626, "grad_norm": 0.7810562953598795, "learning_rate": 2.152960259529603e-06, "loss": 0.7137, "step": 20978 }, { "epoch": 0.6125076640098099, "grad_norm": 0.7283864002201065, "learning_rate": 2.1527980535279806e-06, "loss": 0.5968, "step": 20979 }, { "epoch": 0.6125368602376573, "grad_norm": 0.6718768606676072, "learning_rate": 2.1526358475263586e-06, "loss": 0.6185, "step": 20980 }, { "epoch": 0.6125660564655047, "grad_norm": 0.6893292335571312, "learning_rate": 2.1524736415247366e-06, "loss": 0.5738, "step": 20981 }, { "epoch": 0.612595252693352, "grad_norm": 0.8039581831741043, "learning_rate": 2.1523114355231146e-06, "loss": 0.748, "step": 20982 }, { "epoch": 0.6126244489211994, "grad_norm": 0.7306958809639815, "learning_rate": 2.1521492295214922e-06, "loss": 0.6828, "step": 20983 }, { "epoch": 0.6126536451490467, "grad_norm": 0.7212456414724563, "learning_rate": 2.1519870235198702e-06, "loss": 0.6431, "step": 20984 }, { "epoch": 0.6126828413768941, "grad_norm": 0.8384594461641514, "learning_rate": 2.1518248175182482e-06, "loss": 0.7436, "step": 20985 }, { "epoch": 0.6127120376047415, "grad_norm": 0.7479463218284214, "learning_rate": 2.1516626115166262e-06, "loss": 0.693, "step": 20986 }, { "epoch": 0.6127412338325888, "grad_norm": 0.7386423814370106, "learning_rate": 2.1515004055150042e-06, "loss": 0.6761, "step": 20987 }, { "epoch": 0.6127704300604362, "grad_norm": 0.7150234748677095, "learning_rate": 2.1513381995133822e-06, "loss": 0.6181, "step": 20988 }, { "epoch": 0.6127996262882835, "grad_norm": 0.7188963398769016, "learning_rate": 2.1511759935117603e-06, "loss": 0.6462, "step": 20989 }, { "epoch": 0.6128288225161309, "grad_norm": 0.830545523219043, "learning_rate": 2.1510137875101383e-06, "loss": 0.6158, "step": 20990 }, { "epoch": 0.6128580187439783, "grad_norm": 0.7188818904479937, "learning_rate": 2.150851581508516e-06, "loss": 0.5614, "step": 20991 }, { "epoch": 0.6128872149718256, "grad_norm": 0.8489313567712464, "learning_rate": 2.150689375506894e-06, "loss": 0.778, "step": 20992 }, { "epoch": 0.612916411199673, "grad_norm": 0.8010970469775934, "learning_rate": 2.150527169505272e-06, "loss": 0.6906, "step": 20993 }, { "epoch": 0.6129456074275204, "grad_norm": 0.6975546716959384, "learning_rate": 2.15036496350365e-06, "loss": 0.5957, "step": 20994 }, { "epoch": 0.6129748036553677, "grad_norm": 0.762320586925389, "learning_rate": 2.1502027575020274e-06, "loss": 0.6122, "step": 20995 }, { "epoch": 0.6130039998832151, "grad_norm": 0.7304208172623939, "learning_rate": 2.150040551500406e-06, "loss": 0.6359, "step": 20996 }, { "epoch": 0.6130331961110624, "grad_norm": 0.7281243568751637, "learning_rate": 2.149878345498784e-06, "loss": 0.6073, "step": 20997 }, { "epoch": 0.6130623923389098, "grad_norm": 0.7151564258078866, "learning_rate": 2.1497161394971615e-06, "loss": 0.6387, "step": 20998 }, { "epoch": 0.6130915885667572, "grad_norm": 0.7215530248989696, "learning_rate": 2.1495539334955395e-06, "loss": 0.6694, "step": 20999 }, { "epoch": 0.6131207847946045, "grad_norm": 0.7328628368235667, "learning_rate": 2.1493917274939175e-06, "loss": 0.6328, "step": 21000 }, { "epoch": 0.6131499810224519, "grad_norm": 0.7246354672309625, "learning_rate": 2.1492295214922955e-06, "loss": 0.6005, "step": 21001 }, { "epoch": 0.6131791772502992, "grad_norm": 0.7424843448628214, "learning_rate": 2.149067315490673e-06, "loss": 0.7029, "step": 21002 }, { "epoch": 0.6132083734781466, "grad_norm": 0.7268507931149638, "learning_rate": 2.148905109489051e-06, "loss": 0.6729, "step": 21003 }, { "epoch": 0.613237569705994, "grad_norm": 0.7935884043036019, "learning_rate": 2.148742903487429e-06, "loss": 0.6531, "step": 21004 }, { "epoch": 0.6132667659338413, "grad_norm": 0.6734999115133745, "learning_rate": 2.148580697485807e-06, "loss": 0.5607, "step": 21005 }, { "epoch": 0.6132959621616887, "grad_norm": 0.7377288217375149, "learning_rate": 2.148418491484185e-06, "loss": 0.6962, "step": 21006 }, { "epoch": 0.613325158389536, "grad_norm": 0.6653317584766469, "learning_rate": 2.148256285482563e-06, "loss": 0.5504, "step": 21007 }, { "epoch": 0.6133543546173834, "grad_norm": 0.714064219155499, "learning_rate": 2.148094079480941e-06, "loss": 0.6389, "step": 21008 }, { "epoch": 0.6133835508452308, "grad_norm": 0.8916444550952356, "learning_rate": 2.147931873479319e-06, "loss": 0.6591, "step": 21009 }, { "epoch": 0.6134127470730781, "grad_norm": 0.7174494820669236, "learning_rate": 2.1477696674776967e-06, "loss": 0.6331, "step": 21010 }, { "epoch": 0.6134419433009255, "grad_norm": 0.7342779435180621, "learning_rate": 2.1476074614760747e-06, "loss": 0.6163, "step": 21011 }, { "epoch": 0.6134711395287729, "grad_norm": 0.7588253228080754, "learning_rate": 2.1474452554744527e-06, "loss": 0.6468, "step": 21012 }, { "epoch": 0.6135003357566202, "grad_norm": 0.7822189018050386, "learning_rate": 2.1472830494728307e-06, "loss": 0.7231, "step": 21013 }, { "epoch": 0.6135295319844676, "grad_norm": 0.7221019058669554, "learning_rate": 2.1471208434712087e-06, "loss": 0.6291, "step": 21014 }, { "epoch": 0.6135587282123149, "grad_norm": 0.7135440761659325, "learning_rate": 2.1469586374695867e-06, "loss": 0.6441, "step": 21015 }, { "epoch": 0.6135879244401623, "grad_norm": 0.7132037873570475, "learning_rate": 2.1467964314679647e-06, "loss": 0.6175, "step": 21016 }, { "epoch": 0.6136171206680097, "grad_norm": 0.732363200524768, "learning_rate": 2.1466342254663423e-06, "loss": 0.6459, "step": 21017 }, { "epoch": 0.613646316895857, "grad_norm": 0.7129288665236313, "learning_rate": 2.1464720194647203e-06, "loss": 0.6437, "step": 21018 }, { "epoch": 0.6136755131237044, "grad_norm": 0.687349410808462, "learning_rate": 2.1463098134630983e-06, "loss": 0.5814, "step": 21019 }, { "epoch": 0.6137047093515517, "grad_norm": 0.7318984059436175, "learning_rate": 2.1461476074614763e-06, "loss": 0.6968, "step": 21020 }, { "epoch": 0.6137339055793991, "grad_norm": 0.728419599763965, "learning_rate": 2.145985401459854e-06, "loss": 0.6661, "step": 21021 }, { "epoch": 0.6137631018072465, "grad_norm": 0.7860540976564123, "learning_rate": 2.145823195458232e-06, "loss": 0.6937, "step": 21022 }, { "epoch": 0.6137922980350938, "grad_norm": 0.694636497404804, "learning_rate": 2.14566098945661e-06, "loss": 0.6296, "step": 21023 }, { "epoch": 0.6138214942629412, "grad_norm": 0.7459639561085482, "learning_rate": 2.145498783454988e-06, "loss": 0.6971, "step": 21024 }, { "epoch": 0.6138506904907886, "grad_norm": 0.7794862502542383, "learning_rate": 2.145336577453366e-06, "loss": 0.6147, "step": 21025 }, { "epoch": 0.6138798867186359, "grad_norm": 0.7384941251099912, "learning_rate": 2.145174371451744e-06, "loss": 0.6882, "step": 21026 }, { "epoch": 0.6139090829464833, "grad_norm": 0.7212455215718587, "learning_rate": 2.145012165450122e-06, "loss": 0.6738, "step": 21027 }, { "epoch": 0.6139382791743306, "grad_norm": 0.7500120152092009, "learning_rate": 2.1448499594485e-06, "loss": 0.6455, "step": 21028 }, { "epoch": 0.613967475402178, "grad_norm": 0.7348692567999046, "learning_rate": 2.1446877534468776e-06, "loss": 0.6432, "step": 21029 }, { "epoch": 0.6139966716300254, "grad_norm": 0.6884955705219727, "learning_rate": 2.1445255474452556e-06, "loss": 0.6422, "step": 21030 }, { "epoch": 0.6140258678578727, "grad_norm": 0.7697341540600624, "learning_rate": 2.1443633414436336e-06, "loss": 0.7403, "step": 21031 }, { "epoch": 0.6140550640857201, "grad_norm": 0.6746556797182834, "learning_rate": 2.1442011354420116e-06, "loss": 0.641, "step": 21032 }, { "epoch": 0.6140842603135674, "grad_norm": 0.7084349223596759, "learning_rate": 2.1440389294403896e-06, "loss": 0.6587, "step": 21033 }, { "epoch": 0.6141134565414148, "grad_norm": 0.7126279385481858, "learning_rate": 2.1438767234387676e-06, "loss": 0.58, "step": 21034 }, { "epoch": 0.6141426527692622, "grad_norm": 0.78499363514975, "learning_rate": 2.1437145174371456e-06, "loss": 0.7203, "step": 21035 }, { "epoch": 0.6141718489971095, "grad_norm": 0.7344483270911883, "learning_rate": 2.143552311435523e-06, "loss": 0.701, "step": 21036 }, { "epoch": 0.6142010452249569, "grad_norm": 0.7794096330636826, "learning_rate": 2.143390105433901e-06, "loss": 0.6863, "step": 21037 }, { "epoch": 0.6142302414528042, "grad_norm": 0.7068767567157853, "learning_rate": 2.143227899432279e-06, "loss": 0.6242, "step": 21038 }, { "epoch": 0.6142594376806516, "grad_norm": 0.6980718132196684, "learning_rate": 2.143065693430657e-06, "loss": 0.5855, "step": 21039 }, { "epoch": 0.614288633908499, "grad_norm": 0.7284861363548109, "learning_rate": 2.142903487429035e-06, "loss": 0.6388, "step": 21040 }, { "epoch": 0.6143178301363463, "grad_norm": 0.7290213872141257, "learning_rate": 2.142741281427413e-06, "loss": 0.6064, "step": 21041 }, { "epoch": 0.6143470263641937, "grad_norm": 0.691904863953017, "learning_rate": 2.142579075425791e-06, "loss": 0.6119, "step": 21042 }, { "epoch": 0.614376222592041, "grad_norm": 0.7162435733359799, "learning_rate": 2.142416869424169e-06, "loss": 0.6392, "step": 21043 }, { "epoch": 0.6144054188198884, "grad_norm": 0.7621650704558157, "learning_rate": 2.142254663422547e-06, "loss": 0.6855, "step": 21044 }, { "epoch": 0.6144346150477358, "grad_norm": 0.6981205327505471, "learning_rate": 2.142092457420925e-06, "loss": 0.6291, "step": 21045 }, { "epoch": 0.6144638112755832, "grad_norm": 0.7443665273175495, "learning_rate": 2.141930251419303e-06, "loss": 0.6517, "step": 21046 }, { "epoch": 0.6144930075034306, "grad_norm": 0.7399563020915776, "learning_rate": 2.141768045417681e-06, "loss": 0.6744, "step": 21047 }, { "epoch": 0.614522203731278, "grad_norm": 0.7269306621822016, "learning_rate": 2.1416058394160584e-06, "loss": 0.5701, "step": 21048 }, { "epoch": 0.6145513999591253, "grad_norm": 0.7437716640115637, "learning_rate": 2.1414436334144364e-06, "loss": 0.5489, "step": 21049 }, { "epoch": 0.6145805961869727, "grad_norm": 0.7470293342521561, "learning_rate": 2.1412814274128144e-06, "loss": 0.7311, "step": 21050 }, { "epoch": 0.61460979241482, "grad_norm": 0.9303771243414451, "learning_rate": 2.1411192214111924e-06, "loss": 0.6985, "step": 21051 }, { "epoch": 0.6146389886426674, "grad_norm": 0.6735054024548452, "learning_rate": 2.1409570154095704e-06, "loss": 0.5846, "step": 21052 }, { "epoch": 0.6146681848705148, "grad_norm": 0.7848506877485122, "learning_rate": 2.1407948094079485e-06, "loss": 0.6475, "step": 21053 }, { "epoch": 0.6146973810983621, "grad_norm": 0.7164589224935394, "learning_rate": 2.1406326034063265e-06, "loss": 0.6745, "step": 21054 }, { "epoch": 0.6147265773262095, "grad_norm": 0.7316287533986566, "learning_rate": 2.140470397404704e-06, "loss": 0.6278, "step": 21055 }, { "epoch": 0.6147557735540569, "grad_norm": 0.7587326418476759, "learning_rate": 2.140308191403082e-06, "loss": 0.6561, "step": 21056 }, { "epoch": 0.6147849697819042, "grad_norm": 0.6860696134729913, "learning_rate": 2.14014598540146e-06, "loss": 0.5962, "step": 21057 }, { "epoch": 0.6148141660097516, "grad_norm": 0.7775509879255285, "learning_rate": 2.139983779399838e-06, "loss": 0.6558, "step": 21058 }, { "epoch": 0.614843362237599, "grad_norm": 0.6793752536828935, "learning_rate": 2.1398215733982156e-06, "loss": 0.5896, "step": 21059 }, { "epoch": 0.6148725584654463, "grad_norm": 0.8173561456272567, "learning_rate": 2.1396593673965937e-06, "loss": 0.7443, "step": 21060 }, { "epoch": 0.6149017546932937, "grad_norm": 0.6916409763418376, "learning_rate": 2.1394971613949717e-06, "loss": 0.594, "step": 21061 }, { "epoch": 0.614930950921141, "grad_norm": 0.7339996517376961, "learning_rate": 2.1393349553933497e-06, "loss": 0.7101, "step": 21062 }, { "epoch": 0.6149601471489884, "grad_norm": 0.6754321666764694, "learning_rate": 2.1391727493917277e-06, "loss": 0.629, "step": 21063 }, { "epoch": 0.6149893433768358, "grad_norm": 0.7839498609786162, "learning_rate": 2.1390105433901057e-06, "loss": 0.6736, "step": 21064 }, { "epoch": 0.6150185396046831, "grad_norm": 0.7405837053852364, "learning_rate": 2.1388483373884837e-06, "loss": 0.6384, "step": 21065 }, { "epoch": 0.6150477358325305, "grad_norm": 0.6943054920505499, "learning_rate": 2.1386861313868617e-06, "loss": 0.6202, "step": 21066 }, { "epoch": 0.6150769320603778, "grad_norm": 0.676902902986476, "learning_rate": 2.1385239253852393e-06, "loss": 0.5349, "step": 21067 }, { "epoch": 0.6151061282882252, "grad_norm": 0.6995477358580118, "learning_rate": 2.1383617193836173e-06, "loss": 0.6258, "step": 21068 }, { "epoch": 0.6151353245160726, "grad_norm": 0.696284131473539, "learning_rate": 2.1381995133819953e-06, "loss": 0.6278, "step": 21069 }, { "epoch": 0.6151645207439199, "grad_norm": 0.7380848242954019, "learning_rate": 2.1380373073803733e-06, "loss": 0.5954, "step": 21070 }, { "epoch": 0.6151937169717673, "grad_norm": 0.7173494828850285, "learning_rate": 2.1378751013787513e-06, "loss": 0.6129, "step": 21071 }, { "epoch": 0.6152229131996146, "grad_norm": 0.8008232875246242, "learning_rate": 2.1377128953771293e-06, "loss": 0.6926, "step": 21072 }, { "epoch": 0.615252109427462, "grad_norm": 0.7080048000504305, "learning_rate": 2.1375506893755073e-06, "loss": 0.6655, "step": 21073 }, { "epoch": 0.6152813056553094, "grad_norm": 0.7651465542598112, "learning_rate": 2.137388483373885e-06, "loss": 0.6879, "step": 21074 }, { "epoch": 0.6153105018831567, "grad_norm": 0.7314423357319506, "learning_rate": 2.137226277372263e-06, "loss": 0.6744, "step": 21075 }, { "epoch": 0.6153396981110041, "grad_norm": 0.7083536689971779, "learning_rate": 2.137064071370641e-06, "loss": 0.6047, "step": 21076 }, { "epoch": 0.6153688943388514, "grad_norm": 0.7384319160520129, "learning_rate": 2.136901865369019e-06, "loss": 0.6636, "step": 21077 }, { "epoch": 0.6153980905666988, "grad_norm": 0.6867434123069155, "learning_rate": 2.1367396593673965e-06, "loss": 0.6239, "step": 21078 }, { "epoch": 0.6154272867945462, "grad_norm": 0.7570903468671993, "learning_rate": 2.1365774533657745e-06, "loss": 0.6411, "step": 21079 }, { "epoch": 0.6154564830223935, "grad_norm": 0.7042355342523553, "learning_rate": 2.1364152473641525e-06, "loss": 0.6299, "step": 21080 }, { "epoch": 0.6154856792502409, "grad_norm": 0.7802959451104287, "learning_rate": 2.1362530413625305e-06, "loss": 0.7434, "step": 21081 }, { "epoch": 0.6155148754780883, "grad_norm": 0.7767497636199971, "learning_rate": 2.1360908353609085e-06, "loss": 0.6845, "step": 21082 }, { "epoch": 0.6155440717059356, "grad_norm": 0.7433831461142919, "learning_rate": 2.1359286293592865e-06, "loss": 0.6928, "step": 21083 }, { "epoch": 0.615573267933783, "grad_norm": 0.7481053003851211, "learning_rate": 2.1357664233576645e-06, "loss": 0.7044, "step": 21084 }, { "epoch": 0.6156024641616303, "grad_norm": 0.6828573879972362, "learning_rate": 2.1356042173560426e-06, "loss": 0.6218, "step": 21085 }, { "epoch": 0.6156316603894777, "grad_norm": 0.7758530360814643, "learning_rate": 2.13544201135442e-06, "loss": 0.7628, "step": 21086 }, { "epoch": 0.6156608566173251, "grad_norm": 0.7378647922422983, "learning_rate": 2.135279805352798e-06, "loss": 0.6379, "step": 21087 }, { "epoch": 0.6156900528451724, "grad_norm": 0.7329926330157477, "learning_rate": 2.135117599351176e-06, "loss": 0.6518, "step": 21088 }, { "epoch": 0.6157192490730198, "grad_norm": 0.7334183796474268, "learning_rate": 2.134955393349554e-06, "loss": 0.5946, "step": 21089 }, { "epoch": 0.6157484453008671, "grad_norm": 0.6927756387669396, "learning_rate": 2.134793187347932e-06, "loss": 0.5952, "step": 21090 }, { "epoch": 0.6157776415287145, "grad_norm": 0.7255590007436526, "learning_rate": 2.13463098134631e-06, "loss": 0.6104, "step": 21091 }, { "epoch": 0.6158068377565619, "grad_norm": 0.8255301427650529, "learning_rate": 2.134468775344688e-06, "loss": 0.6035, "step": 21092 }, { "epoch": 0.6158360339844092, "grad_norm": 0.7453987532112234, "learning_rate": 2.1343065693430658e-06, "loss": 0.6743, "step": 21093 }, { "epoch": 0.6158652302122566, "grad_norm": 0.7428264591544749, "learning_rate": 2.1341443633414438e-06, "loss": 0.595, "step": 21094 }, { "epoch": 0.615894426440104, "grad_norm": 0.8037349500633966, "learning_rate": 2.1339821573398218e-06, "loss": 0.723, "step": 21095 }, { "epoch": 0.6159236226679513, "grad_norm": 0.7344893627446719, "learning_rate": 2.1338199513381998e-06, "loss": 0.6002, "step": 21096 }, { "epoch": 0.6159528188957987, "grad_norm": 0.7538248236109398, "learning_rate": 2.1336577453365774e-06, "loss": 0.6649, "step": 21097 }, { "epoch": 0.615982015123646, "grad_norm": 0.7701899347358026, "learning_rate": 2.1334955393349554e-06, "loss": 0.7385, "step": 21098 }, { "epoch": 0.6160112113514934, "grad_norm": 0.7367690485304099, "learning_rate": 2.133333333333334e-06, "loss": 0.6217, "step": 21099 }, { "epoch": 0.6160404075793408, "grad_norm": 0.6820842420077566, "learning_rate": 2.1331711273317114e-06, "loss": 0.5305, "step": 21100 }, { "epoch": 0.6160696038071881, "grad_norm": 0.7347879386397153, "learning_rate": 2.1330089213300894e-06, "loss": 0.7216, "step": 21101 }, { "epoch": 0.6160988000350355, "grad_norm": 0.7423539170256807, "learning_rate": 2.1328467153284674e-06, "loss": 0.6704, "step": 21102 }, { "epoch": 0.6161279962628828, "grad_norm": 0.8286561559765878, "learning_rate": 2.1326845093268454e-06, "loss": 0.6903, "step": 21103 }, { "epoch": 0.6161571924907302, "grad_norm": 0.6470969512141521, "learning_rate": 2.132522303325223e-06, "loss": 0.5364, "step": 21104 }, { "epoch": 0.6161863887185776, "grad_norm": 0.7544297283335193, "learning_rate": 2.132360097323601e-06, "loss": 0.7236, "step": 21105 }, { "epoch": 0.6162155849464249, "grad_norm": 0.831391799169374, "learning_rate": 2.132197891321979e-06, "loss": 0.6818, "step": 21106 }, { "epoch": 0.6162447811742723, "grad_norm": 0.7121013682806384, "learning_rate": 2.132035685320357e-06, "loss": 0.6112, "step": 21107 }, { "epoch": 0.6162739774021196, "grad_norm": 0.7200166646409202, "learning_rate": 2.131873479318735e-06, "loss": 0.6176, "step": 21108 }, { "epoch": 0.616303173629967, "grad_norm": 0.7155403880452901, "learning_rate": 2.131711273317113e-06, "loss": 0.6427, "step": 21109 }, { "epoch": 0.6163323698578144, "grad_norm": 0.7574648114740566, "learning_rate": 2.131549067315491e-06, "loss": 0.7011, "step": 21110 }, { "epoch": 0.6163615660856617, "grad_norm": 0.6974872497775328, "learning_rate": 2.131386861313869e-06, "loss": 0.6171, "step": 21111 }, { "epoch": 0.6163907623135091, "grad_norm": 0.7847542112866026, "learning_rate": 2.1312246553122466e-06, "loss": 0.6286, "step": 21112 }, { "epoch": 0.6164199585413564, "grad_norm": 0.7160194225081509, "learning_rate": 2.1310624493106246e-06, "loss": 0.638, "step": 21113 }, { "epoch": 0.6164491547692038, "grad_norm": 0.7277954842556117, "learning_rate": 2.1309002433090026e-06, "loss": 0.6956, "step": 21114 }, { "epoch": 0.6164783509970512, "grad_norm": 0.7306590242752597, "learning_rate": 2.1307380373073806e-06, "loss": 0.6604, "step": 21115 }, { "epoch": 0.6165075472248985, "grad_norm": 0.7073319094741244, "learning_rate": 2.1305758313057582e-06, "loss": 0.6603, "step": 21116 }, { "epoch": 0.6165367434527459, "grad_norm": 0.7361875213078815, "learning_rate": 2.1304136253041362e-06, "loss": 0.6992, "step": 21117 }, { "epoch": 0.6165659396805933, "grad_norm": 0.7133722944429097, "learning_rate": 2.1302514193025147e-06, "loss": 0.5907, "step": 21118 }, { "epoch": 0.6165951359084406, "grad_norm": 0.7290788406757264, "learning_rate": 2.1300892133008922e-06, "loss": 0.6134, "step": 21119 }, { "epoch": 0.616624332136288, "grad_norm": 0.7354552980615313, "learning_rate": 2.1299270072992703e-06, "loss": 0.638, "step": 21120 }, { "epoch": 0.6166535283641353, "grad_norm": 0.691893399052926, "learning_rate": 2.1297648012976483e-06, "loss": 0.5751, "step": 21121 }, { "epoch": 0.6166827245919827, "grad_norm": 0.7126543332850902, "learning_rate": 2.1296025952960263e-06, "loss": 0.5996, "step": 21122 }, { "epoch": 0.6167119208198301, "grad_norm": 0.7021253458764108, "learning_rate": 2.129440389294404e-06, "loss": 0.6396, "step": 21123 }, { "epoch": 0.6167411170476774, "grad_norm": 0.7106586433025248, "learning_rate": 2.129278183292782e-06, "loss": 0.6088, "step": 21124 }, { "epoch": 0.6167703132755248, "grad_norm": 0.713391414502066, "learning_rate": 2.12911597729116e-06, "loss": 0.5992, "step": 21125 }, { "epoch": 0.6167995095033721, "grad_norm": 0.741520596789055, "learning_rate": 2.128953771289538e-06, "loss": 0.6632, "step": 21126 }, { "epoch": 0.6168287057312195, "grad_norm": 0.7607450216359054, "learning_rate": 2.128791565287916e-06, "loss": 0.6723, "step": 21127 }, { "epoch": 0.6168579019590669, "grad_norm": 0.7701899174472981, "learning_rate": 2.128629359286294e-06, "loss": 0.6511, "step": 21128 }, { "epoch": 0.6168870981869142, "grad_norm": 0.6899126850977706, "learning_rate": 2.128467153284672e-06, "loss": 0.6008, "step": 21129 }, { "epoch": 0.6169162944147616, "grad_norm": 0.6958569127397739, "learning_rate": 2.12830494728305e-06, "loss": 0.5912, "step": 21130 }, { "epoch": 0.616945490642609, "grad_norm": 0.7690238389359781, "learning_rate": 2.1281427412814275e-06, "loss": 0.6499, "step": 21131 }, { "epoch": 0.6169746868704563, "grad_norm": 0.8313181818647634, "learning_rate": 2.1279805352798055e-06, "loss": 0.6726, "step": 21132 }, { "epoch": 0.6170038830983037, "grad_norm": 0.6812702466611149, "learning_rate": 2.1278183292781835e-06, "loss": 0.5692, "step": 21133 }, { "epoch": 0.617033079326151, "grad_norm": 0.761686843222984, "learning_rate": 2.1276561232765615e-06, "loss": 0.7049, "step": 21134 }, { "epoch": 0.6170622755539984, "grad_norm": 0.7203121098650529, "learning_rate": 2.127493917274939e-06, "loss": 0.5914, "step": 21135 }, { "epoch": 0.6170914717818458, "grad_norm": 0.6959586772540824, "learning_rate": 2.127331711273317e-06, "loss": 0.6008, "step": 21136 }, { "epoch": 0.6171206680096931, "grad_norm": 0.7366776577156525, "learning_rate": 2.1271695052716955e-06, "loss": 0.6864, "step": 21137 }, { "epoch": 0.6171498642375405, "grad_norm": 0.7808296031764573, "learning_rate": 2.127007299270073e-06, "loss": 0.643, "step": 21138 }, { "epoch": 0.6171790604653878, "grad_norm": 0.86864325609751, "learning_rate": 2.126845093268451e-06, "loss": 0.6754, "step": 21139 }, { "epoch": 0.6172082566932352, "grad_norm": 0.7160362504103882, "learning_rate": 2.126682887266829e-06, "loss": 0.6347, "step": 21140 }, { "epoch": 0.6172374529210826, "grad_norm": 0.7600012466646566, "learning_rate": 2.126520681265207e-06, "loss": 0.6697, "step": 21141 }, { "epoch": 0.6172666491489299, "grad_norm": 0.7140500232029209, "learning_rate": 2.1263584752635847e-06, "loss": 0.6391, "step": 21142 }, { "epoch": 0.6172958453767773, "grad_norm": 0.7553339046785019, "learning_rate": 2.1261962692619627e-06, "loss": 0.7189, "step": 21143 }, { "epoch": 0.6173250416046246, "grad_norm": 0.6996687560846693, "learning_rate": 2.1260340632603407e-06, "loss": 0.5904, "step": 21144 }, { "epoch": 0.617354237832472, "grad_norm": 0.654339404148318, "learning_rate": 2.1258718572587187e-06, "loss": 0.5749, "step": 21145 }, { "epoch": 0.6173834340603194, "grad_norm": 0.728127444871498, "learning_rate": 2.1257096512570967e-06, "loss": 0.6924, "step": 21146 }, { "epoch": 0.6174126302881667, "grad_norm": 0.7084382516288852, "learning_rate": 2.1255474452554747e-06, "loss": 0.6802, "step": 21147 }, { "epoch": 0.6174418265160141, "grad_norm": 0.7062271669005965, "learning_rate": 2.1253852392538527e-06, "loss": 0.6253, "step": 21148 }, { "epoch": 0.6174710227438615, "grad_norm": 0.7118199996224669, "learning_rate": 2.1252230332522308e-06, "loss": 0.6503, "step": 21149 }, { "epoch": 0.6175002189717088, "grad_norm": 0.7441132980691442, "learning_rate": 2.1250608272506083e-06, "loss": 0.6592, "step": 21150 }, { "epoch": 0.6175294151995562, "grad_norm": 0.7638250067604038, "learning_rate": 2.1248986212489863e-06, "loss": 0.7037, "step": 21151 }, { "epoch": 0.6175586114274035, "grad_norm": 0.7046787003155619, "learning_rate": 2.1247364152473644e-06, "loss": 0.6266, "step": 21152 }, { "epoch": 0.6175878076552509, "grad_norm": 0.7253866443351745, "learning_rate": 2.1245742092457424e-06, "loss": 0.6719, "step": 21153 }, { "epoch": 0.6176170038830983, "grad_norm": 0.7059022361596271, "learning_rate": 2.12441200324412e-06, "loss": 0.5994, "step": 21154 }, { "epoch": 0.6176462001109456, "grad_norm": 0.7255236935336994, "learning_rate": 2.124249797242498e-06, "loss": 0.6636, "step": 21155 }, { "epoch": 0.617675396338793, "grad_norm": 0.6962166352906344, "learning_rate": 2.1240875912408764e-06, "loss": 0.5875, "step": 21156 }, { "epoch": 0.6177045925666403, "grad_norm": 0.7147322461344725, "learning_rate": 2.123925385239254e-06, "loss": 0.6089, "step": 21157 }, { "epoch": 0.6177337887944877, "grad_norm": 0.7285602233248127, "learning_rate": 2.123763179237632e-06, "loss": 0.5914, "step": 21158 }, { "epoch": 0.6177629850223351, "grad_norm": 0.7027998864210743, "learning_rate": 2.12360097323601e-06, "loss": 0.6363, "step": 21159 }, { "epoch": 0.6177921812501824, "grad_norm": 0.7114344674195976, "learning_rate": 2.123438767234388e-06, "loss": 0.6511, "step": 21160 }, { "epoch": 0.6178213774780298, "grad_norm": 0.6552878259400344, "learning_rate": 2.1232765612327656e-06, "loss": 0.5522, "step": 21161 }, { "epoch": 0.6178505737058771, "grad_norm": 0.7666653426416641, "learning_rate": 2.1231143552311436e-06, "loss": 0.698, "step": 21162 }, { "epoch": 0.6178797699337245, "grad_norm": 0.7629436512829855, "learning_rate": 2.1229521492295216e-06, "loss": 0.5911, "step": 21163 }, { "epoch": 0.6179089661615719, "grad_norm": 0.6896995610053804, "learning_rate": 2.1227899432278996e-06, "loss": 0.6108, "step": 21164 }, { "epoch": 0.6179381623894192, "grad_norm": 0.7056148656875663, "learning_rate": 2.1226277372262776e-06, "loss": 0.5692, "step": 21165 }, { "epoch": 0.6179673586172667, "grad_norm": 0.7722469929494187, "learning_rate": 2.1224655312246556e-06, "loss": 0.7211, "step": 21166 }, { "epoch": 0.6179965548451141, "grad_norm": 0.7248030148250288, "learning_rate": 2.1223033252230336e-06, "loss": 0.6223, "step": 21167 }, { "epoch": 0.6180257510729614, "grad_norm": 0.757093393873009, "learning_rate": 2.1221411192214116e-06, "loss": 0.7032, "step": 21168 }, { "epoch": 0.6180549473008088, "grad_norm": 0.7071723266504044, "learning_rate": 2.121978913219789e-06, "loss": 0.6335, "step": 21169 }, { "epoch": 0.6180841435286561, "grad_norm": 0.9844960512376404, "learning_rate": 2.121816707218167e-06, "loss": 0.6331, "step": 21170 }, { "epoch": 0.6181133397565035, "grad_norm": 0.7924324552956066, "learning_rate": 2.121654501216545e-06, "loss": 0.6715, "step": 21171 }, { "epoch": 0.6181425359843509, "grad_norm": 0.7325688329708557, "learning_rate": 2.1214922952149232e-06, "loss": 0.5976, "step": 21172 }, { "epoch": 0.6181717322121982, "grad_norm": 0.7694782258970277, "learning_rate": 2.121330089213301e-06, "loss": 0.6649, "step": 21173 }, { "epoch": 0.6182009284400456, "grad_norm": 0.6773328456039382, "learning_rate": 2.121167883211679e-06, "loss": 0.555, "step": 21174 }, { "epoch": 0.618230124667893, "grad_norm": 0.7441824735891379, "learning_rate": 2.1210056772100572e-06, "loss": 0.6328, "step": 21175 }, { "epoch": 0.6182593208957403, "grad_norm": 0.6892948547377569, "learning_rate": 2.120843471208435e-06, "loss": 0.5583, "step": 21176 }, { "epoch": 0.6182885171235877, "grad_norm": 0.7202092313081989, "learning_rate": 2.120681265206813e-06, "loss": 0.6387, "step": 21177 }, { "epoch": 0.618317713351435, "grad_norm": 0.7261155747900297, "learning_rate": 2.120519059205191e-06, "loss": 0.6266, "step": 21178 }, { "epoch": 0.6183469095792824, "grad_norm": 0.6831236286308505, "learning_rate": 2.120356853203569e-06, "loss": 0.567, "step": 21179 }, { "epoch": 0.6183761058071298, "grad_norm": 0.7554696697861674, "learning_rate": 2.1201946472019464e-06, "loss": 0.6871, "step": 21180 }, { "epoch": 0.6184053020349771, "grad_norm": 0.7091912319193324, "learning_rate": 2.1200324412003244e-06, "loss": 0.6485, "step": 21181 }, { "epoch": 0.6184344982628245, "grad_norm": 0.7058513204346172, "learning_rate": 2.1198702351987024e-06, "loss": 0.5839, "step": 21182 }, { "epoch": 0.6184636944906718, "grad_norm": 0.698867105714117, "learning_rate": 2.1197080291970804e-06, "loss": 0.6072, "step": 21183 }, { "epoch": 0.6184928907185192, "grad_norm": 0.7437363479094621, "learning_rate": 2.1195458231954585e-06, "loss": 0.6491, "step": 21184 }, { "epoch": 0.6185220869463666, "grad_norm": 0.6892596626719742, "learning_rate": 2.1193836171938365e-06, "loss": 0.5865, "step": 21185 }, { "epoch": 0.6185512831742139, "grad_norm": 0.780978082970531, "learning_rate": 2.1192214111922145e-06, "loss": 0.7444, "step": 21186 }, { "epoch": 0.6185804794020613, "grad_norm": 0.7572918665423586, "learning_rate": 2.1190592051905925e-06, "loss": 0.5954, "step": 21187 }, { "epoch": 0.6186096756299087, "grad_norm": 0.7291313099249069, "learning_rate": 2.11889699918897e-06, "loss": 0.7096, "step": 21188 }, { "epoch": 0.618638871857756, "grad_norm": 0.7297852254407373, "learning_rate": 2.118734793187348e-06, "loss": 0.5936, "step": 21189 }, { "epoch": 0.6186680680856034, "grad_norm": 0.7448749620076878, "learning_rate": 2.118572587185726e-06, "loss": 0.598, "step": 21190 }, { "epoch": 0.6186972643134507, "grad_norm": 0.6848393211404429, "learning_rate": 2.118410381184104e-06, "loss": 0.6088, "step": 21191 }, { "epoch": 0.6187264605412981, "grad_norm": 0.7173030190760008, "learning_rate": 2.1182481751824817e-06, "loss": 0.6204, "step": 21192 }, { "epoch": 0.6187556567691455, "grad_norm": 0.7320709643422645, "learning_rate": 2.1180859691808597e-06, "loss": 0.6377, "step": 21193 }, { "epoch": 0.6187848529969928, "grad_norm": 0.7577722566726172, "learning_rate": 2.117923763179238e-06, "loss": 0.6308, "step": 21194 }, { "epoch": 0.6188140492248402, "grad_norm": 0.7321524143123563, "learning_rate": 2.1177615571776157e-06, "loss": 0.6913, "step": 21195 }, { "epoch": 0.6188432454526875, "grad_norm": 0.7385975772089409, "learning_rate": 2.1175993511759937e-06, "loss": 0.7087, "step": 21196 }, { "epoch": 0.6188724416805349, "grad_norm": 0.7494674304675731, "learning_rate": 2.1174371451743717e-06, "loss": 0.6684, "step": 21197 }, { "epoch": 0.6189016379083823, "grad_norm": 0.6817785694604799, "learning_rate": 2.1172749391727497e-06, "loss": 0.5722, "step": 21198 }, { "epoch": 0.6189308341362296, "grad_norm": 0.696065375167093, "learning_rate": 2.1171127331711273e-06, "loss": 0.6216, "step": 21199 }, { "epoch": 0.618960030364077, "grad_norm": 0.7554315877563798, "learning_rate": 2.1169505271695053e-06, "loss": 0.6907, "step": 21200 }, { "epoch": 0.6189892265919243, "grad_norm": 0.780095817074146, "learning_rate": 2.1167883211678833e-06, "loss": 0.7081, "step": 21201 }, { "epoch": 0.6190184228197717, "grad_norm": 0.776045316813489, "learning_rate": 2.1166261151662613e-06, "loss": 0.5986, "step": 21202 }, { "epoch": 0.6190476190476191, "grad_norm": 0.7073701037592462, "learning_rate": 2.1164639091646393e-06, "loss": 0.6369, "step": 21203 }, { "epoch": 0.6190768152754664, "grad_norm": 0.7675800372401609, "learning_rate": 2.1163017031630173e-06, "loss": 0.7518, "step": 21204 }, { "epoch": 0.6191060115033138, "grad_norm": 0.6757549948138848, "learning_rate": 2.1161394971613953e-06, "loss": 0.5383, "step": 21205 }, { "epoch": 0.6191352077311612, "grad_norm": 0.6845146374805718, "learning_rate": 2.1159772911597733e-06, "loss": 0.6059, "step": 21206 }, { "epoch": 0.6191644039590085, "grad_norm": 0.7424735594380157, "learning_rate": 2.115815085158151e-06, "loss": 0.6278, "step": 21207 }, { "epoch": 0.6191936001868559, "grad_norm": 0.7437746719543177, "learning_rate": 2.115652879156529e-06, "loss": 0.6834, "step": 21208 }, { "epoch": 0.6192227964147032, "grad_norm": 0.6976470010374316, "learning_rate": 2.115490673154907e-06, "loss": 0.6079, "step": 21209 }, { "epoch": 0.6192519926425506, "grad_norm": 0.6983128856825596, "learning_rate": 2.115328467153285e-06, "loss": 0.602, "step": 21210 }, { "epoch": 0.619281188870398, "grad_norm": 0.7180796064105861, "learning_rate": 2.1151662611516625e-06, "loss": 0.6169, "step": 21211 }, { "epoch": 0.6193103850982453, "grad_norm": 0.6881256539257505, "learning_rate": 2.1150040551500405e-06, "loss": 0.5704, "step": 21212 }, { "epoch": 0.6193395813260927, "grad_norm": 0.7297135396920041, "learning_rate": 2.114841849148419e-06, "loss": 0.6491, "step": 21213 }, { "epoch": 0.61936877755394, "grad_norm": 0.7458703208798391, "learning_rate": 2.1146796431467965e-06, "loss": 0.6279, "step": 21214 }, { "epoch": 0.6193979737817874, "grad_norm": 0.6709489222581579, "learning_rate": 2.1145174371451745e-06, "loss": 0.587, "step": 21215 }, { "epoch": 0.6194271700096348, "grad_norm": 0.6945230940566948, "learning_rate": 2.1143552311435526e-06, "loss": 0.6148, "step": 21216 }, { "epoch": 0.6194563662374821, "grad_norm": 0.7504271431013944, "learning_rate": 2.1141930251419306e-06, "loss": 0.6578, "step": 21217 }, { "epoch": 0.6194855624653295, "grad_norm": 0.7101385310657061, "learning_rate": 2.114030819140308e-06, "loss": 0.6659, "step": 21218 }, { "epoch": 0.6195147586931768, "grad_norm": 0.7627330530396391, "learning_rate": 2.113868613138686e-06, "loss": 0.6531, "step": 21219 }, { "epoch": 0.6195439549210242, "grad_norm": 0.7288774422753571, "learning_rate": 2.113706407137064e-06, "loss": 0.6438, "step": 21220 }, { "epoch": 0.6195731511488716, "grad_norm": 0.7030651273866247, "learning_rate": 2.113544201135442e-06, "loss": 0.5895, "step": 21221 }, { "epoch": 0.6196023473767189, "grad_norm": 0.7859965090273592, "learning_rate": 2.11338199513382e-06, "loss": 0.6623, "step": 21222 }, { "epoch": 0.6196315436045663, "grad_norm": 0.7556742059520404, "learning_rate": 2.113219789132198e-06, "loss": 0.7026, "step": 21223 }, { "epoch": 0.6196607398324137, "grad_norm": 0.746561259031075, "learning_rate": 2.113057583130576e-06, "loss": 0.615, "step": 21224 }, { "epoch": 0.619689936060261, "grad_norm": 0.7035487043732709, "learning_rate": 2.112895377128954e-06, "loss": 0.6287, "step": 21225 }, { "epoch": 0.6197191322881084, "grad_norm": 0.7521680449076459, "learning_rate": 2.1127331711273318e-06, "loss": 0.6335, "step": 21226 }, { "epoch": 0.6197483285159557, "grad_norm": 0.8577450950715515, "learning_rate": 2.1125709651257098e-06, "loss": 0.6568, "step": 21227 }, { "epoch": 0.6197775247438031, "grad_norm": 0.6819129708939146, "learning_rate": 2.1124087591240878e-06, "loss": 0.6364, "step": 21228 }, { "epoch": 0.6198067209716505, "grad_norm": 0.7603491307327934, "learning_rate": 2.112246553122466e-06, "loss": 0.6682, "step": 21229 }, { "epoch": 0.6198359171994978, "grad_norm": 0.6988864447095875, "learning_rate": 2.1120843471208434e-06, "loss": 0.6127, "step": 21230 }, { "epoch": 0.6198651134273452, "grad_norm": 0.7087493384697758, "learning_rate": 2.1119221411192214e-06, "loss": 0.6433, "step": 21231 }, { "epoch": 0.6198943096551925, "grad_norm": 0.7506950837664956, "learning_rate": 2.1117599351176e-06, "loss": 0.6607, "step": 21232 }, { "epoch": 0.6199235058830399, "grad_norm": 0.6893619122057856, "learning_rate": 2.1115977291159774e-06, "loss": 0.5752, "step": 21233 }, { "epoch": 0.6199527021108873, "grad_norm": 0.7084470290017663, "learning_rate": 2.1114355231143554e-06, "loss": 0.6105, "step": 21234 }, { "epoch": 0.6199818983387346, "grad_norm": 0.7192461376143713, "learning_rate": 2.1112733171127334e-06, "loss": 0.6386, "step": 21235 }, { "epoch": 0.620011094566582, "grad_norm": 0.7272808664085451, "learning_rate": 2.1111111111111114e-06, "loss": 0.6225, "step": 21236 }, { "epoch": 0.6200402907944293, "grad_norm": 0.6816530302325756, "learning_rate": 2.110948905109489e-06, "loss": 0.5886, "step": 21237 }, { "epoch": 0.6200694870222767, "grad_norm": 0.7088042996519287, "learning_rate": 2.110786699107867e-06, "loss": 0.6707, "step": 21238 }, { "epoch": 0.6200986832501241, "grad_norm": 0.685469301266493, "learning_rate": 2.110624493106245e-06, "loss": 0.6249, "step": 21239 }, { "epoch": 0.6201278794779714, "grad_norm": 0.7246984535029477, "learning_rate": 2.110462287104623e-06, "loss": 0.6531, "step": 21240 }, { "epoch": 0.6201570757058188, "grad_norm": 0.7054504529582973, "learning_rate": 2.110300081103001e-06, "loss": 0.6222, "step": 21241 }, { "epoch": 0.6201862719336662, "grad_norm": 0.6921310331486252, "learning_rate": 2.110137875101379e-06, "loss": 0.6289, "step": 21242 }, { "epoch": 0.6202154681615135, "grad_norm": 0.7110679009803003, "learning_rate": 2.109975669099757e-06, "loss": 0.6378, "step": 21243 }, { "epoch": 0.6202446643893609, "grad_norm": 0.6931118226902725, "learning_rate": 2.109813463098135e-06, "loss": 0.5919, "step": 21244 }, { "epoch": 0.6202738606172082, "grad_norm": 0.7849828544019092, "learning_rate": 2.1096512570965126e-06, "loss": 0.7614, "step": 21245 }, { "epoch": 0.6203030568450556, "grad_norm": 0.6969683575228894, "learning_rate": 2.1094890510948906e-06, "loss": 0.5671, "step": 21246 }, { "epoch": 0.620332253072903, "grad_norm": 0.6936593684583301, "learning_rate": 2.1093268450932686e-06, "loss": 0.6164, "step": 21247 }, { "epoch": 0.6203614493007503, "grad_norm": 0.8577859209968324, "learning_rate": 2.1091646390916467e-06, "loss": 0.7443, "step": 21248 }, { "epoch": 0.6203906455285977, "grad_norm": 0.7120646353455292, "learning_rate": 2.1090024330900242e-06, "loss": 0.6112, "step": 21249 }, { "epoch": 0.620419841756445, "grad_norm": 0.7365563486890353, "learning_rate": 2.1088402270884027e-06, "loss": 0.6452, "step": 21250 }, { "epoch": 0.6204490379842924, "grad_norm": 0.7058528066393654, "learning_rate": 2.1086780210867807e-06, "loss": 0.6138, "step": 21251 }, { "epoch": 0.6204782342121398, "grad_norm": 0.660824094125988, "learning_rate": 2.1085158150851583e-06, "loss": 0.568, "step": 21252 }, { "epoch": 0.6205074304399871, "grad_norm": 0.7022996970607334, "learning_rate": 2.1083536090835363e-06, "loss": 0.6108, "step": 21253 }, { "epoch": 0.6205366266678345, "grad_norm": 0.7333344124234127, "learning_rate": 2.1081914030819143e-06, "loss": 0.6084, "step": 21254 }, { "epoch": 0.6205658228956819, "grad_norm": 0.7663516866874392, "learning_rate": 2.1080291970802923e-06, "loss": 0.679, "step": 21255 }, { "epoch": 0.6205950191235292, "grad_norm": 0.7136736480234679, "learning_rate": 2.10786699107867e-06, "loss": 0.6262, "step": 21256 }, { "epoch": 0.6206242153513766, "grad_norm": 0.771180283995002, "learning_rate": 2.107704785077048e-06, "loss": 0.6623, "step": 21257 }, { "epoch": 0.6206534115792239, "grad_norm": 0.6968154839595428, "learning_rate": 2.107542579075426e-06, "loss": 0.6236, "step": 21258 }, { "epoch": 0.6206826078070713, "grad_norm": 0.7263044296296114, "learning_rate": 2.107380373073804e-06, "loss": 0.6473, "step": 21259 }, { "epoch": 0.6207118040349187, "grad_norm": 0.7381324613059185, "learning_rate": 2.107218167072182e-06, "loss": 0.6902, "step": 21260 }, { "epoch": 0.620741000262766, "grad_norm": 0.7046066519715094, "learning_rate": 2.10705596107056e-06, "loss": 0.584, "step": 21261 }, { "epoch": 0.6207701964906134, "grad_norm": 0.735720167189874, "learning_rate": 2.106893755068938e-06, "loss": 0.6178, "step": 21262 }, { "epoch": 0.6207993927184607, "grad_norm": 0.7294582379947737, "learning_rate": 2.106731549067316e-06, "loss": 0.649, "step": 21263 }, { "epoch": 0.6208285889463081, "grad_norm": 0.8060303506391846, "learning_rate": 2.1065693430656935e-06, "loss": 0.7838, "step": 21264 }, { "epoch": 0.6208577851741555, "grad_norm": 0.7206454543567712, "learning_rate": 2.1064071370640715e-06, "loss": 0.625, "step": 21265 }, { "epoch": 0.6208869814020028, "grad_norm": 0.7354647039522886, "learning_rate": 2.1062449310624495e-06, "loss": 0.6839, "step": 21266 }, { "epoch": 0.6209161776298502, "grad_norm": 0.7814617850393204, "learning_rate": 2.1060827250608275e-06, "loss": 0.7674, "step": 21267 }, { "epoch": 0.6209453738576975, "grad_norm": 0.716100140040211, "learning_rate": 2.105920519059205e-06, "loss": 0.6461, "step": 21268 }, { "epoch": 0.6209745700855449, "grad_norm": 0.706591906792955, "learning_rate": 2.1057583130575835e-06, "loss": 0.617, "step": 21269 }, { "epoch": 0.6210037663133923, "grad_norm": 0.7714601065608351, "learning_rate": 2.1055961070559615e-06, "loss": 0.6943, "step": 21270 }, { "epoch": 0.6210329625412396, "grad_norm": 0.7059707304140637, "learning_rate": 2.105433901054339e-06, "loss": 0.6234, "step": 21271 }, { "epoch": 0.621062158769087, "grad_norm": 0.7525368776642708, "learning_rate": 2.105271695052717e-06, "loss": 0.6633, "step": 21272 }, { "epoch": 0.6210913549969344, "grad_norm": 0.6999124547584435, "learning_rate": 2.105109489051095e-06, "loss": 0.5822, "step": 21273 }, { "epoch": 0.6211205512247817, "grad_norm": 0.6817761990513314, "learning_rate": 2.104947283049473e-06, "loss": 0.594, "step": 21274 }, { "epoch": 0.6211497474526291, "grad_norm": 0.7434142693963196, "learning_rate": 2.1047850770478507e-06, "loss": 0.6561, "step": 21275 }, { "epoch": 0.6211789436804764, "grad_norm": 0.7279340568804274, "learning_rate": 2.1046228710462287e-06, "loss": 0.5823, "step": 21276 }, { "epoch": 0.6212081399083238, "grad_norm": 0.6933219716759587, "learning_rate": 2.1044606650446067e-06, "loss": 0.6375, "step": 21277 }, { "epoch": 0.6212373361361712, "grad_norm": 0.7600729685075969, "learning_rate": 2.1042984590429847e-06, "loss": 0.6875, "step": 21278 }, { "epoch": 0.6212665323640185, "grad_norm": 0.828815206846462, "learning_rate": 2.1041362530413627e-06, "loss": 0.6895, "step": 21279 }, { "epoch": 0.6212957285918659, "grad_norm": 0.7910544062199371, "learning_rate": 2.1039740470397408e-06, "loss": 0.7995, "step": 21280 }, { "epoch": 0.6213249248197132, "grad_norm": 0.8202263955644632, "learning_rate": 2.1038118410381188e-06, "loss": 0.7201, "step": 21281 }, { "epoch": 0.6213541210475606, "grad_norm": 0.7050855600398656, "learning_rate": 2.1036496350364963e-06, "loss": 0.6079, "step": 21282 }, { "epoch": 0.621383317275408, "grad_norm": 0.7096593994332523, "learning_rate": 2.1034874290348743e-06, "loss": 0.5767, "step": 21283 }, { "epoch": 0.6214125135032553, "grad_norm": 0.6806884035469544, "learning_rate": 2.1033252230332524e-06, "loss": 0.5932, "step": 21284 }, { "epoch": 0.6214417097311027, "grad_norm": 0.6942955097221478, "learning_rate": 2.1031630170316304e-06, "loss": 0.5882, "step": 21285 }, { "epoch": 0.62147090595895, "grad_norm": 0.6877128924114095, "learning_rate": 2.1030008110300084e-06, "loss": 0.5565, "step": 21286 }, { "epoch": 0.6215001021867975, "grad_norm": 0.7570601283318315, "learning_rate": 2.102838605028386e-06, "loss": 0.6343, "step": 21287 }, { "epoch": 0.6215292984146449, "grad_norm": 0.9737394526381209, "learning_rate": 2.1026763990267644e-06, "loss": 0.6, "step": 21288 }, { "epoch": 0.6215584946424922, "grad_norm": 0.7303029113144527, "learning_rate": 2.1025141930251424e-06, "loss": 0.678, "step": 21289 }, { "epoch": 0.6215876908703396, "grad_norm": 0.6722387242172191, "learning_rate": 2.10235198702352e-06, "loss": 0.5298, "step": 21290 }, { "epoch": 0.621616887098187, "grad_norm": 0.7132962495808396, "learning_rate": 2.102189781021898e-06, "loss": 0.6207, "step": 21291 }, { "epoch": 0.6216460833260343, "grad_norm": 0.750385370665472, "learning_rate": 2.102027575020276e-06, "loss": 0.6578, "step": 21292 }, { "epoch": 0.6216752795538817, "grad_norm": 0.6823682988515661, "learning_rate": 2.101865369018654e-06, "loss": 0.6164, "step": 21293 }, { "epoch": 0.621704475781729, "grad_norm": 0.7499896300993427, "learning_rate": 2.1017031630170316e-06, "loss": 0.6492, "step": 21294 }, { "epoch": 0.6217336720095764, "grad_norm": 0.7188672399294614, "learning_rate": 2.1015409570154096e-06, "loss": 0.6496, "step": 21295 }, { "epoch": 0.6217628682374238, "grad_norm": 0.6914626938284398, "learning_rate": 2.1013787510137876e-06, "loss": 0.5534, "step": 21296 }, { "epoch": 0.6217920644652711, "grad_norm": 0.7669076569415962, "learning_rate": 2.1012165450121656e-06, "loss": 0.6736, "step": 21297 }, { "epoch": 0.6218212606931185, "grad_norm": 0.716927608643198, "learning_rate": 2.1010543390105436e-06, "loss": 0.5872, "step": 21298 }, { "epoch": 0.6218504569209659, "grad_norm": 0.813697192035812, "learning_rate": 2.1008921330089216e-06, "loss": 0.8121, "step": 21299 }, { "epoch": 0.6218796531488132, "grad_norm": 0.705338324318985, "learning_rate": 2.1007299270072996e-06, "loss": 0.5752, "step": 21300 }, { "epoch": 0.6219088493766606, "grad_norm": 0.6643744624245413, "learning_rate": 2.100567721005677e-06, "loss": 0.5617, "step": 21301 }, { "epoch": 0.6219380456045079, "grad_norm": 0.7378712309880323, "learning_rate": 2.100405515004055e-06, "loss": 0.668, "step": 21302 }, { "epoch": 0.6219672418323553, "grad_norm": 0.6670925696542801, "learning_rate": 2.1002433090024332e-06, "loss": 0.5884, "step": 21303 }, { "epoch": 0.6219964380602027, "grad_norm": 0.7482957090910286, "learning_rate": 2.1000811030008112e-06, "loss": 0.6131, "step": 21304 }, { "epoch": 0.62202563428805, "grad_norm": 0.718668156375597, "learning_rate": 2.0999188969991892e-06, "loss": 0.6084, "step": 21305 }, { "epoch": 0.6220548305158974, "grad_norm": 0.7042252133725679, "learning_rate": 2.099756690997567e-06, "loss": 0.6157, "step": 21306 }, { "epoch": 0.6220840267437447, "grad_norm": 0.6867561409008461, "learning_rate": 2.0995944849959452e-06, "loss": 0.5901, "step": 21307 }, { "epoch": 0.6221132229715921, "grad_norm": 0.7138667944646274, "learning_rate": 2.0994322789943232e-06, "loss": 0.6344, "step": 21308 }, { "epoch": 0.6221424191994395, "grad_norm": 0.7429687758901002, "learning_rate": 2.099270072992701e-06, "loss": 0.5973, "step": 21309 }, { "epoch": 0.6221716154272868, "grad_norm": 0.8254466109476449, "learning_rate": 2.099107866991079e-06, "loss": 0.7983, "step": 21310 }, { "epoch": 0.6222008116551342, "grad_norm": 0.7696547231230818, "learning_rate": 2.098945660989457e-06, "loss": 0.6857, "step": 21311 }, { "epoch": 0.6222300078829816, "grad_norm": 0.7439589400718781, "learning_rate": 2.098783454987835e-06, "loss": 0.6923, "step": 21312 }, { "epoch": 0.6222592041108289, "grad_norm": 0.721625697514504, "learning_rate": 2.0986212489862124e-06, "loss": 0.6322, "step": 21313 }, { "epoch": 0.6222884003386763, "grad_norm": 0.722765190889366, "learning_rate": 2.0984590429845904e-06, "loss": 0.6056, "step": 21314 }, { "epoch": 0.6223175965665236, "grad_norm": 0.7645864149944027, "learning_rate": 2.0982968369829684e-06, "loss": 0.6411, "step": 21315 }, { "epoch": 0.622346792794371, "grad_norm": 0.7951744968218388, "learning_rate": 2.0981346309813465e-06, "loss": 0.7056, "step": 21316 }, { "epoch": 0.6223759890222184, "grad_norm": 0.6949248592209636, "learning_rate": 2.0979724249797245e-06, "loss": 0.5834, "step": 21317 }, { "epoch": 0.6224051852500657, "grad_norm": 0.7392257228055857, "learning_rate": 2.0978102189781025e-06, "loss": 0.6689, "step": 21318 }, { "epoch": 0.6224343814779131, "grad_norm": 0.7068254064819467, "learning_rate": 2.0976480129764805e-06, "loss": 0.6309, "step": 21319 }, { "epoch": 0.6224635777057604, "grad_norm": 0.7508851157497998, "learning_rate": 2.097485806974858e-06, "loss": 0.6499, "step": 21320 }, { "epoch": 0.6224927739336078, "grad_norm": 0.6665827928452571, "learning_rate": 2.097323600973236e-06, "loss": 0.5436, "step": 21321 }, { "epoch": 0.6225219701614552, "grad_norm": 0.7032638358196067, "learning_rate": 2.097161394971614e-06, "loss": 0.5857, "step": 21322 }, { "epoch": 0.6225511663893025, "grad_norm": 0.7598561855097303, "learning_rate": 2.096999188969992e-06, "loss": 0.6451, "step": 21323 }, { "epoch": 0.6225803626171499, "grad_norm": 0.6809517496092392, "learning_rate": 2.09683698296837e-06, "loss": 0.5853, "step": 21324 }, { "epoch": 0.6226095588449972, "grad_norm": 0.757141325332312, "learning_rate": 2.0966747769667477e-06, "loss": 0.7346, "step": 21325 }, { "epoch": 0.6226387550728446, "grad_norm": 0.7255165719296744, "learning_rate": 2.096512570965126e-06, "loss": 0.6077, "step": 21326 }, { "epoch": 0.622667951300692, "grad_norm": 0.7109795033740002, "learning_rate": 2.096350364963504e-06, "loss": 0.6417, "step": 21327 }, { "epoch": 0.6226971475285393, "grad_norm": 0.7645596936618292, "learning_rate": 2.0961881589618817e-06, "loss": 0.6864, "step": 21328 }, { "epoch": 0.6227263437563867, "grad_norm": 0.6840358243725791, "learning_rate": 2.0960259529602597e-06, "loss": 0.6013, "step": 21329 }, { "epoch": 0.622755539984234, "grad_norm": 0.7394725977728297, "learning_rate": 2.0958637469586377e-06, "loss": 0.6999, "step": 21330 }, { "epoch": 0.6227847362120814, "grad_norm": 0.7173313747912753, "learning_rate": 2.0957015409570157e-06, "loss": 0.6156, "step": 21331 }, { "epoch": 0.6228139324399288, "grad_norm": 0.7279851022291086, "learning_rate": 2.0955393349553933e-06, "loss": 0.659, "step": 21332 }, { "epoch": 0.6228431286677761, "grad_norm": 0.7566627338365763, "learning_rate": 2.0953771289537713e-06, "loss": 0.6792, "step": 21333 }, { "epoch": 0.6228723248956235, "grad_norm": 0.7145272719514671, "learning_rate": 2.0952149229521493e-06, "loss": 0.6488, "step": 21334 }, { "epoch": 0.6229015211234709, "grad_norm": 0.7700338076643886, "learning_rate": 2.0950527169505273e-06, "loss": 0.719, "step": 21335 }, { "epoch": 0.6229307173513182, "grad_norm": 0.7111953220650574, "learning_rate": 2.0948905109489053e-06, "loss": 0.6366, "step": 21336 }, { "epoch": 0.6229599135791656, "grad_norm": 0.6770996170171902, "learning_rate": 2.0947283049472833e-06, "loss": 0.6007, "step": 21337 }, { "epoch": 0.6229891098070129, "grad_norm": 0.7245728776529012, "learning_rate": 2.0945660989456613e-06, "loss": 0.6253, "step": 21338 }, { "epoch": 0.6230183060348603, "grad_norm": 0.7568785035403723, "learning_rate": 2.094403892944039e-06, "loss": 0.6423, "step": 21339 }, { "epoch": 0.6230475022627077, "grad_norm": 0.7171918595082104, "learning_rate": 2.094241686942417e-06, "loss": 0.6032, "step": 21340 }, { "epoch": 0.623076698490555, "grad_norm": 0.7511078398769568, "learning_rate": 2.094079480940795e-06, "loss": 0.6833, "step": 21341 }, { "epoch": 0.6231058947184024, "grad_norm": 0.7006444208181657, "learning_rate": 2.093917274939173e-06, "loss": 0.6314, "step": 21342 }, { "epoch": 0.6231350909462497, "grad_norm": 0.8203099119534744, "learning_rate": 2.093755068937551e-06, "loss": 0.7526, "step": 21343 }, { "epoch": 0.6231642871740971, "grad_norm": 0.7147911901584852, "learning_rate": 2.0935928629359285e-06, "loss": 0.634, "step": 21344 }, { "epoch": 0.6231934834019445, "grad_norm": 0.716388262167611, "learning_rate": 2.093430656934307e-06, "loss": 0.6093, "step": 21345 }, { "epoch": 0.6232226796297918, "grad_norm": 0.7159867277487906, "learning_rate": 2.093268450932685e-06, "loss": 0.5947, "step": 21346 }, { "epoch": 0.6232518758576392, "grad_norm": 0.7277734965200505, "learning_rate": 2.0931062449310625e-06, "loss": 0.6462, "step": 21347 }, { "epoch": 0.6232810720854866, "grad_norm": 0.6875792464761046, "learning_rate": 2.0929440389294406e-06, "loss": 0.5572, "step": 21348 }, { "epoch": 0.6233102683133339, "grad_norm": 0.7163060395544109, "learning_rate": 2.0927818329278186e-06, "loss": 0.6405, "step": 21349 }, { "epoch": 0.6233394645411813, "grad_norm": 0.7498000256904436, "learning_rate": 2.0926196269261966e-06, "loss": 0.6637, "step": 21350 }, { "epoch": 0.6233686607690286, "grad_norm": 0.6743555749667552, "learning_rate": 2.092457420924574e-06, "loss": 0.6097, "step": 21351 }, { "epoch": 0.623397856996876, "grad_norm": 0.7128687537694103, "learning_rate": 2.092295214922952e-06, "loss": 0.633, "step": 21352 }, { "epoch": 0.6234270532247234, "grad_norm": 0.767354433999892, "learning_rate": 2.09213300892133e-06, "loss": 0.615, "step": 21353 }, { "epoch": 0.6234562494525707, "grad_norm": 0.7318851548845905, "learning_rate": 2.091970802919708e-06, "loss": 0.642, "step": 21354 }, { "epoch": 0.6234854456804181, "grad_norm": 0.8975644010922512, "learning_rate": 2.091808596918086e-06, "loss": 0.5607, "step": 21355 }, { "epoch": 0.6235146419082654, "grad_norm": 0.6954737119648821, "learning_rate": 2.091646390916464e-06, "loss": 0.5744, "step": 21356 }, { "epoch": 0.6235438381361128, "grad_norm": 0.6764927948306318, "learning_rate": 2.091484184914842e-06, "loss": 0.5941, "step": 21357 }, { "epoch": 0.6235730343639602, "grad_norm": 0.9040054849933084, "learning_rate": 2.0913219789132198e-06, "loss": 0.7417, "step": 21358 }, { "epoch": 0.6236022305918075, "grad_norm": 0.7951706335758696, "learning_rate": 2.0911597729115978e-06, "loss": 0.68, "step": 21359 }, { "epoch": 0.6236314268196549, "grad_norm": 0.7070241230867302, "learning_rate": 2.090997566909976e-06, "loss": 0.6263, "step": 21360 }, { "epoch": 0.6236606230475022, "grad_norm": 0.679532232704695, "learning_rate": 2.090835360908354e-06, "loss": 0.5725, "step": 21361 }, { "epoch": 0.6236898192753496, "grad_norm": 1.147507273895186, "learning_rate": 2.090673154906732e-06, "loss": 0.6514, "step": 21362 }, { "epoch": 0.623719015503197, "grad_norm": 0.7086857899809169, "learning_rate": 2.0905109489051094e-06, "loss": 0.6678, "step": 21363 }, { "epoch": 0.6237482117310443, "grad_norm": 0.710428418741995, "learning_rate": 2.090348742903488e-06, "loss": 0.6254, "step": 21364 }, { "epoch": 0.6237774079588917, "grad_norm": 0.73971438180975, "learning_rate": 2.090186536901866e-06, "loss": 0.6696, "step": 21365 }, { "epoch": 0.623806604186739, "grad_norm": 0.7642969132239763, "learning_rate": 2.0900243309002434e-06, "loss": 0.6188, "step": 21366 }, { "epoch": 0.6238358004145864, "grad_norm": 0.7574381364778475, "learning_rate": 2.0898621248986214e-06, "loss": 0.6777, "step": 21367 }, { "epoch": 0.6238649966424338, "grad_norm": 0.7628287445645825, "learning_rate": 2.0896999188969994e-06, "loss": 0.6997, "step": 21368 }, { "epoch": 0.6238941928702811, "grad_norm": 0.7749770921792525, "learning_rate": 2.0895377128953774e-06, "loss": 0.6845, "step": 21369 }, { "epoch": 0.6239233890981285, "grad_norm": 0.7833107462989916, "learning_rate": 2.089375506893755e-06, "loss": 0.6744, "step": 21370 }, { "epoch": 0.6239525853259759, "grad_norm": 0.7455370430644176, "learning_rate": 2.089213300892133e-06, "loss": 0.5888, "step": 21371 }, { "epoch": 0.6239817815538232, "grad_norm": 0.7984105669469459, "learning_rate": 2.089051094890511e-06, "loss": 0.5564, "step": 21372 }, { "epoch": 0.6240109777816706, "grad_norm": 0.6653468679995798, "learning_rate": 2.088888888888889e-06, "loss": 0.5502, "step": 21373 }, { "epoch": 0.624040174009518, "grad_norm": 0.7019070192734539, "learning_rate": 2.088726682887267e-06, "loss": 0.6399, "step": 21374 }, { "epoch": 0.6240693702373653, "grad_norm": 0.7341005853108552, "learning_rate": 2.088564476885645e-06, "loss": 0.683, "step": 21375 }, { "epoch": 0.6240985664652127, "grad_norm": 0.8054711566207441, "learning_rate": 2.088402270884023e-06, "loss": 0.7284, "step": 21376 }, { "epoch": 0.62412776269306, "grad_norm": 0.723304373455412, "learning_rate": 2.0882400648824006e-06, "loss": 0.6182, "step": 21377 }, { "epoch": 0.6241569589209074, "grad_norm": 0.7575443998881227, "learning_rate": 2.0880778588807786e-06, "loss": 0.697, "step": 21378 }, { "epoch": 0.6241861551487548, "grad_norm": 0.7260142123378734, "learning_rate": 2.0879156528791566e-06, "loss": 0.6536, "step": 21379 }, { "epoch": 0.6242153513766021, "grad_norm": 0.7071157560910242, "learning_rate": 2.0877534468775347e-06, "loss": 0.6088, "step": 21380 }, { "epoch": 0.6242445476044495, "grad_norm": 0.6821732431711877, "learning_rate": 2.0875912408759127e-06, "loss": 0.5378, "step": 21381 }, { "epoch": 0.6242737438322968, "grad_norm": 0.7182679334920532, "learning_rate": 2.0874290348742902e-06, "loss": 0.6268, "step": 21382 }, { "epoch": 0.6243029400601442, "grad_norm": 0.7158727597317474, "learning_rate": 2.0872668288726687e-06, "loss": 0.6462, "step": 21383 }, { "epoch": 0.6243321362879916, "grad_norm": 0.8249538662046604, "learning_rate": 2.0871046228710467e-06, "loss": 0.6984, "step": 21384 }, { "epoch": 0.6243613325158389, "grad_norm": 0.7752087800126138, "learning_rate": 2.0869424168694243e-06, "loss": 0.6826, "step": 21385 }, { "epoch": 0.6243905287436863, "grad_norm": 0.783252626589099, "learning_rate": 2.0867802108678023e-06, "loss": 0.7091, "step": 21386 }, { "epoch": 0.6244197249715336, "grad_norm": 0.7410154184387349, "learning_rate": 2.0866180048661803e-06, "loss": 0.653, "step": 21387 }, { "epoch": 0.624448921199381, "grad_norm": 0.746559394862862, "learning_rate": 2.0864557988645583e-06, "loss": 0.6404, "step": 21388 }, { "epoch": 0.6244781174272284, "grad_norm": 0.8048935721274102, "learning_rate": 2.086293592862936e-06, "loss": 0.7444, "step": 21389 }, { "epoch": 0.6245073136550757, "grad_norm": 0.7168951491074995, "learning_rate": 2.086131386861314e-06, "loss": 0.6359, "step": 21390 }, { "epoch": 0.6245365098829231, "grad_norm": 0.7441037468214178, "learning_rate": 2.085969180859692e-06, "loss": 0.7118, "step": 21391 }, { "epoch": 0.6245657061107704, "grad_norm": 0.7926861275339051, "learning_rate": 2.08580697485807e-06, "loss": 0.77, "step": 21392 }, { "epoch": 0.6245949023386178, "grad_norm": 0.7420712311328147, "learning_rate": 2.085644768856448e-06, "loss": 0.6851, "step": 21393 }, { "epoch": 0.6246240985664652, "grad_norm": 0.704586087387803, "learning_rate": 2.085482562854826e-06, "loss": 0.6569, "step": 21394 }, { "epoch": 0.6246532947943125, "grad_norm": 0.735181176491879, "learning_rate": 2.085320356853204e-06, "loss": 0.652, "step": 21395 }, { "epoch": 0.6246824910221599, "grad_norm": 0.7231610491912462, "learning_rate": 2.0851581508515815e-06, "loss": 0.594, "step": 21396 }, { "epoch": 0.6247116872500073, "grad_norm": 0.6772038577716204, "learning_rate": 2.0849959448499595e-06, "loss": 0.6124, "step": 21397 }, { "epoch": 0.6247408834778546, "grad_norm": 0.7350765335937313, "learning_rate": 2.0848337388483375e-06, "loss": 0.6706, "step": 21398 }, { "epoch": 0.624770079705702, "grad_norm": 0.7342152555302659, "learning_rate": 2.0846715328467155e-06, "loss": 0.595, "step": 21399 }, { "epoch": 0.6247992759335493, "grad_norm": 0.6907242764414533, "learning_rate": 2.0845093268450935e-06, "loss": 0.611, "step": 21400 }, { "epoch": 0.6248284721613967, "grad_norm": 0.6360502774016196, "learning_rate": 2.0843471208434715e-06, "loss": 0.516, "step": 21401 }, { "epoch": 0.6248576683892441, "grad_norm": 0.7112692306302647, "learning_rate": 2.0841849148418495e-06, "loss": 0.6338, "step": 21402 }, { "epoch": 0.6248868646170914, "grad_norm": 0.7438636330682691, "learning_rate": 2.0840227088402275e-06, "loss": 0.7054, "step": 21403 }, { "epoch": 0.6249160608449388, "grad_norm": 0.720672438124592, "learning_rate": 2.083860502838605e-06, "loss": 0.6254, "step": 21404 }, { "epoch": 0.6249452570727861, "grad_norm": 0.69505340811333, "learning_rate": 2.083698296836983e-06, "loss": 0.6037, "step": 21405 }, { "epoch": 0.6249744533006335, "grad_norm": 0.7805192412891826, "learning_rate": 2.083536090835361e-06, "loss": 0.6579, "step": 21406 }, { "epoch": 0.625003649528481, "grad_norm": 0.7182496466017013, "learning_rate": 2.083373884833739e-06, "loss": 0.6242, "step": 21407 }, { "epoch": 0.6250328457563283, "grad_norm": 0.7138400441698453, "learning_rate": 2.0832116788321167e-06, "loss": 0.5927, "step": 21408 }, { "epoch": 0.6250620419841757, "grad_norm": 0.6551820715945854, "learning_rate": 2.0830494728304947e-06, "loss": 0.5508, "step": 21409 }, { "epoch": 0.6250912382120231, "grad_norm": 0.6787970565535196, "learning_rate": 2.0828872668288727e-06, "loss": 0.577, "step": 21410 }, { "epoch": 0.6251204344398704, "grad_norm": 0.7768425583788885, "learning_rate": 2.0827250608272507e-06, "loss": 0.6943, "step": 21411 }, { "epoch": 0.6251496306677178, "grad_norm": 0.7028492989181215, "learning_rate": 2.0825628548256288e-06, "loss": 0.5959, "step": 21412 }, { "epoch": 0.6251788268955651, "grad_norm": 0.6799244850522184, "learning_rate": 2.0824006488240068e-06, "loss": 0.5943, "step": 21413 }, { "epoch": 0.6252080231234125, "grad_norm": 0.662805367371321, "learning_rate": 2.0822384428223848e-06, "loss": 0.5318, "step": 21414 }, { "epoch": 0.6252372193512599, "grad_norm": 0.6921776847593848, "learning_rate": 2.0820762368207624e-06, "loss": 0.6011, "step": 21415 }, { "epoch": 0.6252664155791072, "grad_norm": 0.7323847661750479, "learning_rate": 2.0819140308191404e-06, "loss": 0.684, "step": 21416 }, { "epoch": 0.6252956118069546, "grad_norm": 0.7101280237994431, "learning_rate": 2.0817518248175184e-06, "loss": 0.6495, "step": 21417 }, { "epoch": 0.625324808034802, "grad_norm": 0.7352898682329175, "learning_rate": 2.0815896188158964e-06, "loss": 0.6631, "step": 21418 }, { "epoch": 0.6253540042626493, "grad_norm": 0.7900145787254226, "learning_rate": 2.0814274128142744e-06, "loss": 0.716, "step": 21419 }, { "epoch": 0.6253832004904967, "grad_norm": 0.720211256098676, "learning_rate": 2.0812652068126524e-06, "loss": 0.6127, "step": 21420 }, { "epoch": 0.625412396718344, "grad_norm": 0.7857927638089593, "learning_rate": 2.0811030008110304e-06, "loss": 0.6815, "step": 21421 }, { "epoch": 0.6254415929461914, "grad_norm": 0.7038568316959715, "learning_rate": 2.0809407948094084e-06, "loss": 0.6248, "step": 21422 }, { "epoch": 0.6254707891740388, "grad_norm": 0.6911501344420334, "learning_rate": 2.080778588807786e-06, "loss": 0.6255, "step": 21423 }, { "epoch": 0.6254999854018861, "grad_norm": 0.6993983519838757, "learning_rate": 2.080616382806164e-06, "loss": 0.5928, "step": 21424 }, { "epoch": 0.6255291816297335, "grad_norm": 0.7108036999108848, "learning_rate": 2.080454176804542e-06, "loss": 0.6251, "step": 21425 }, { "epoch": 0.6255583778575808, "grad_norm": 0.7453630079375524, "learning_rate": 2.08029197080292e-06, "loss": 0.6283, "step": 21426 }, { "epoch": 0.6255875740854282, "grad_norm": 0.7763531163629809, "learning_rate": 2.0801297648012976e-06, "loss": 0.6166, "step": 21427 }, { "epoch": 0.6256167703132756, "grad_norm": 0.7384551575132513, "learning_rate": 2.0799675587996756e-06, "loss": 0.6301, "step": 21428 }, { "epoch": 0.6256459665411229, "grad_norm": 0.7167376340579263, "learning_rate": 2.0798053527980536e-06, "loss": 0.5383, "step": 21429 }, { "epoch": 0.6256751627689703, "grad_norm": 0.6948385590884948, "learning_rate": 2.0796431467964316e-06, "loss": 0.6276, "step": 21430 }, { "epoch": 0.6257043589968176, "grad_norm": 0.8483079384590028, "learning_rate": 2.0794809407948096e-06, "loss": 0.6977, "step": 21431 }, { "epoch": 0.625733555224665, "grad_norm": 0.6702129479010035, "learning_rate": 2.0793187347931876e-06, "loss": 0.5545, "step": 21432 }, { "epoch": 0.6257627514525124, "grad_norm": 0.7095197389320214, "learning_rate": 2.0791565287915656e-06, "loss": 0.6308, "step": 21433 }, { "epoch": 0.6257919476803597, "grad_norm": 0.7048262961319156, "learning_rate": 2.078994322789943e-06, "loss": 0.5896, "step": 21434 }, { "epoch": 0.6258211439082071, "grad_norm": 0.6865688294458926, "learning_rate": 2.0788321167883212e-06, "loss": 0.6477, "step": 21435 }, { "epoch": 0.6258503401360545, "grad_norm": 0.7477526245892778, "learning_rate": 2.0786699107866992e-06, "loss": 0.7055, "step": 21436 }, { "epoch": 0.6258795363639018, "grad_norm": 0.7283906311130198, "learning_rate": 2.0785077047850772e-06, "loss": 0.6528, "step": 21437 }, { "epoch": 0.6259087325917492, "grad_norm": 0.7206416905382679, "learning_rate": 2.0783454987834552e-06, "loss": 0.635, "step": 21438 }, { "epoch": 0.6259379288195965, "grad_norm": 0.6767097919384399, "learning_rate": 2.0781832927818332e-06, "loss": 0.5869, "step": 21439 }, { "epoch": 0.6259671250474439, "grad_norm": 0.7529168598886314, "learning_rate": 2.0780210867802113e-06, "loss": 0.7034, "step": 21440 }, { "epoch": 0.6259963212752913, "grad_norm": 0.752034456828333, "learning_rate": 2.0778588807785893e-06, "loss": 0.6301, "step": 21441 }, { "epoch": 0.6260255175031386, "grad_norm": 0.7339939517485762, "learning_rate": 2.077696674776967e-06, "loss": 0.7047, "step": 21442 }, { "epoch": 0.626054713730986, "grad_norm": 0.7633519474543788, "learning_rate": 2.077534468775345e-06, "loss": 0.6476, "step": 21443 }, { "epoch": 0.6260839099588333, "grad_norm": 0.7527809742330994, "learning_rate": 2.077372262773723e-06, "loss": 0.7177, "step": 21444 }, { "epoch": 0.6261131061866807, "grad_norm": 0.7163965548181308, "learning_rate": 2.077210056772101e-06, "loss": 0.6298, "step": 21445 }, { "epoch": 0.6261423024145281, "grad_norm": 0.7236408890513543, "learning_rate": 2.0770478507704784e-06, "loss": 0.659, "step": 21446 }, { "epoch": 0.6261714986423754, "grad_norm": 0.734343200535577, "learning_rate": 2.0768856447688565e-06, "loss": 0.6512, "step": 21447 }, { "epoch": 0.6262006948702228, "grad_norm": 0.7810850418056726, "learning_rate": 2.0767234387672345e-06, "loss": 0.6812, "step": 21448 }, { "epoch": 0.6262298910980701, "grad_norm": 0.734985070220656, "learning_rate": 2.0765612327656125e-06, "loss": 0.7155, "step": 21449 }, { "epoch": 0.6262590873259175, "grad_norm": 0.6974351287138322, "learning_rate": 2.0763990267639905e-06, "loss": 0.5854, "step": 21450 }, { "epoch": 0.6262882835537649, "grad_norm": 0.7292113088921023, "learning_rate": 2.0762368207623685e-06, "loss": 0.6819, "step": 21451 }, { "epoch": 0.6263174797816122, "grad_norm": 0.7104575941787553, "learning_rate": 2.0760746147607465e-06, "loss": 0.5962, "step": 21452 }, { "epoch": 0.6263466760094596, "grad_norm": 0.6654048096083776, "learning_rate": 2.075912408759124e-06, "loss": 0.5451, "step": 21453 }, { "epoch": 0.626375872237307, "grad_norm": 0.7293026648771441, "learning_rate": 2.075750202757502e-06, "loss": 0.6747, "step": 21454 }, { "epoch": 0.6264050684651543, "grad_norm": 0.7090030937554191, "learning_rate": 2.07558799675588e-06, "loss": 0.5916, "step": 21455 }, { "epoch": 0.6264342646930017, "grad_norm": 0.6992123377679534, "learning_rate": 2.075425790754258e-06, "loss": 0.5811, "step": 21456 }, { "epoch": 0.626463460920849, "grad_norm": 0.7317068766089604, "learning_rate": 2.075263584752636e-06, "loss": 0.6224, "step": 21457 }, { "epoch": 0.6264926571486964, "grad_norm": 0.6729915416132489, "learning_rate": 2.075101378751014e-06, "loss": 0.5976, "step": 21458 }, { "epoch": 0.6265218533765438, "grad_norm": 0.7785713300607283, "learning_rate": 2.074939172749392e-06, "loss": 0.6694, "step": 21459 }, { "epoch": 0.6265510496043911, "grad_norm": 0.6687219293516397, "learning_rate": 2.07477696674777e-06, "loss": 0.6131, "step": 21460 }, { "epoch": 0.6265802458322385, "grad_norm": 0.7276080255759254, "learning_rate": 2.0746147607461477e-06, "loss": 0.6983, "step": 21461 }, { "epoch": 0.6266094420600858, "grad_norm": 0.7345195794950876, "learning_rate": 2.0744525547445257e-06, "loss": 0.6115, "step": 21462 }, { "epoch": 0.6266386382879332, "grad_norm": 0.6860145686558959, "learning_rate": 2.0742903487429037e-06, "loss": 0.5897, "step": 21463 }, { "epoch": 0.6266678345157806, "grad_norm": 0.7256726825295495, "learning_rate": 2.0741281427412817e-06, "loss": 0.6472, "step": 21464 }, { "epoch": 0.6266970307436279, "grad_norm": 0.6733597650669696, "learning_rate": 2.0739659367396593e-06, "loss": 0.5513, "step": 21465 }, { "epoch": 0.6267262269714753, "grad_norm": 0.8454070901211802, "learning_rate": 2.0738037307380373e-06, "loss": 0.775, "step": 21466 }, { "epoch": 0.6267554231993226, "grad_norm": 0.7935766163535334, "learning_rate": 2.0736415247364153e-06, "loss": 0.7209, "step": 21467 }, { "epoch": 0.62678461942717, "grad_norm": 0.7239446197034377, "learning_rate": 2.0734793187347933e-06, "loss": 0.6251, "step": 21468 }, { "epoch": 0.6268138156550174, "grad_norm": 0.6934240557932554, "learning_rate": 2.0733171127331713e-06, "loss": 0.6309, "step": 21469 }, { "epoch": 0.6268430118828647, "grad_norm": 0.7320082907099195, "learning_rate": 2.0731549067315493e-06, "loss": 0.6424, "step": 21470 }, { "epoch": 0.6268722081107121, "grad_norm": 0.7044836062372583, "learning_rate": 2.0729927007299273e-06, "loss": 0.6147, "step": 21471 }, { "epoch": 0.6269014043385595, "grad_norm": 0.73971561729865, "learning_rate": 2.072830494728305e-06, "loss": 0.6717, "step": 21472 }, { "epoch": 0.6269306005664068, "grad_norm": 0.7626681474050202, "learning_rate": 2.072668288726683e-06, "loss": 0.7726, "step": 21473 }, { "epoch": 0.6269597967942542, "grad_norm": 0.7581337173037808, "learning_rate": 2.072506082725061e-06, "loss": 0.6745, "step": 21474 }, { "epoch": 0.6269889930221015, "grad_norm": 0.7049819821955078, "learning_rate": 2.072343876723439e-06, "loss": 0.6882, "step": 21475 }, { "epoch": 0.6270181892499489, "grad_norm": 0.6605465874693477, "learning_rate": 2.072181670721817e-06, "loss": 0.578, "step": 21476 }, { "epoch": 0.6270473854777963, "grad_norm": 0.7797457244995357, "learning_rate": 2.072019464720195e-06, "loss": 0.6832, "step": 21477 }, { "epoch": 0.6270765817056436, "grad_norm": 0.7100076252739718, "learning_rate": 2.071857258718573e-06, "loss": 0.6126, "step": 21478 }, { "epoch": 0.627105777933491, "grad_norm": 0.7058782592876677, "learning_rate": 2.0716950527169506e-06, "loss": 0.6192, "step": 21479 }, { "epoch": 0.6271349741613383, "grad_norm": 0.7253226168902831, "learning_rate": 2.0715328467153286e-06, "loss": 0.688, "step": 21480 }, { "epoch": 0.6271641703891857, "grad_norm": 0.7176087407172972, "learning_rate": 2.0713706407137066e-06, "loss": 0.6323, "step": 21481 }, { "epoch": 0.6271933666170331, "grad_norm": 0.6874650252163341, "learning_rate": 2.0712084347120846e-06, "loss": 0.5851, "step": 21482 }, { "epoch": 0.6272225628448804, "grad_norm": 0.7951799379760216, "learning_rate": 2.0710462287104626e-06, "loss": 0.7294, "step": 21483 }, { "epoch": 0.6272517590727278, "grad_norm": 0.8363252617800773, "learning_rate": 2.07088402270884e-06, "loss": 0.6819, "step": 21484 }, { "epoch": 0.6272809553005751, "grad_norm": 0.752637993870764, "learning_rate": 2.070721816707218e-06, "loss": 0.7076, "step": 21485 }, { "epoch": 0.6273101515284225, "grad_norm": 0.7292293826515793, "learning_rate": 2.0705596107055966e-06, "loss": 0.6301, "step": 21486 }, { "epoch": 0.6273393477562699, "grad_norm": 0.7010721998899273, "learning_rate": 2.070397404703974e-06, "loss": 0.6214, "step": 21487 }, { "epoch": 0.6273685439841172, "grad_norm": 0.7339618754176771, "learning_rate": 2.070235198702352e-06, "loss": 0.601, "step": 21488 }, { "epoch": 0.6273977402119646, "grad_norm": 0.7274010352668808, "learning_rate": 2.07007299270073e-06, "loss": 0.5981, "step": 21489 }, { "epoch": 0.627426936439812, "grad_norm": 0.7413648224500214, "learning_rate": 2.069910786699108e-06, "loss": 0.6373, "step": 21490 }, { "epoch": 0.6274561326676593, "grad_norm": 0.7370935520444566, "learning_rate": 2.0697485806974858e-06, "loss": 0.675, "step": 21491 }, { "epoch": 0.6274853288955067, "grad_norm": 0.754132867791708, "learning_rate": 2.069586374695864e-06, "loss": 0.6807, "step": 21492 }, { "epoch": 0.627514525123354, "grad_norm": 0.7658612791736977, "learning_rate": 2.069424168694242e-06, "loss": 0.7333, "step": 21493 }, { "epoch": 0.6275437213512014, "grad_norm": 0.7506739113853266, "learning_rate": 2.06926196269262e-06, "loss": 0.6391, "step": 21494 }, { "epoch": 0.6275729175790488, "grad_norm": 0.698729161781318, "learning_rate": 2.069099756690998e-06, "loss": 0.6303, "step": 21495 }, { "epoch": 0.6276021138068961, "grad_norm": 0.7622739533832469, "learning_rate": 2.068937550689376e-06, "loss": 0.6937, "step": 21496 }, { "epoch": 0.6276313100347435, "grad_norm": 0.7124151855352706, "learning_rate": 2.068775344687754e-06, "loss": 0.6085, "step": 21497 }, { "epoch": 0.6276605062625908, "grad_norm": 0.7035265827582947, "learning_rate": 2.0686131386861314e-06, "loss": 0.5472, "step": 21498 }, { "epoch": 0.6276897024904382, "grad_norm": 0.6800241176854497, "learning_rate": 2.0684509326845094e-06, "loss": 0.5588, "step": 21499 }, { "epoch": 0.6277188987182856, "grad_norm": 0.6803710285266621, "learning_rate": 2.0682887266828874e-06, "loss": 0.5738, "step": 21500 }, { "epoch": 0.6277480949461329, "grad_norm": 0.7195196555892767, "learning_rate": 2.0681265206812654e-06, "loss": 0.6993, "step": 21501 }, { "epoch": 0.6277772911739803, "grad_norm": 0.7001007770067863, "learning_rate": 2.0679643146796434e-06, "loss": 0.6057, "step": 21502 }, { "epoch": 0.6278064874018277, "grad_norm": 0.7098944025038623, "learning_rate": 2.067802108678021e-06, "loss": 0.6209, "step": 21503 }, { "epoch": 0.627835683629675, "grad_norm": 0.7138681862336894, "learning_rate": 2.067639902676399e-06, "loss": 0.6681, "step": 21504 }, { "epoch": 0.6278648798575224, "grad_norm": 0.6713585572386328, "learning_rate": 2.0674776966747775e-06, "loss": 0.588, "step": 21505 }, { "epoch": 0.6278940760853697, "grad_norm": 0.7131511398333319, "learning_rate": 2.067315490673155e-06, "loss": 0.6329, "step": 21506 }, { "epoch": 0.6279232723132171, "grad_norm": 0.734612630155112, "learning_rate": 2.067153284671533e-06, "loss": 0.6656, "step": 21507 }, { "epoch": 0.6279524685410645, "grad_norm": 0.6672768140779152, "learning_rate": 2.066991078669911e-06, "loss": 0.5885, "step": 21508 }, { "epoch": 0.6279816647689118, "grad_norm": 0.9945311920242487, "learning_rate": 2.066828872668289e-06, "loss": 0.6566, "step": 21509 }, { "epoch": 0.6280108609967592, "grad_norm": 0.6706000456605028, "learning_rate": 2.0666666666666666e-06, "loss": 0.5647, "step": 21510 }, { "epoch": 0.6280400572246065, "grad_norm": 0.7451978305443334, "learning_rate": 2.0665044606650447e-06, "loss": 0.6672, "step": 21511 }, { "epoch": 0.6280692534524539, "grad_norm": 0.7051175491662773, "learning_rate": 2.0663422546634227e-06, "loss": 0.6078, "step": 21512 }, { "epoch": 0.6280984496803013, "grad_norm": 0.7474652581807224, "learning_rate": 2.0661800486618007e-06, "loss": 0.7131, "step": 21513 }, { "epoch": 0.6281276459081486, "grad_norm": 0.7165343279042193, "learning_rate": 2.0660178426601787e-06, "loss": 0.5986, "step": 21514 }, { "epoch": 0.628156842135996, "grad_norm": 0.7259307104117676, "learning_rate": 2.0658556366585567e-06, "loss": 0.6411, "step": 21515 }, { "epoch": 0.6281860383638433, "grad_norm": 0.7259702139953816, "learning_rate": 2.0656934306569347e-06, "loss": 0.6564, "step": 21516 }, { "epoch": 0.6282152345916907, "grad_norm": 0.7107784983704608, "learning_rate": 2.0655312246553123e-06, "loss": 0.6321, "step": 21517 }, { "epoch": 0.6282444308195381, "grad_norm": 0.7372253082876338, "learning_rate": 2.0653690186536903e-06, "loss": 0.6255, "step": 21518 }, { "epoch": 0.6282736270473854, "grad_norm": 0.7456443555455528, "learning_rate": 2.0652068126520683e-06, "loss": 0.6932, "step": 21519 }, { "epoch": 0.6283028232752328, "grad_norm": 0.6735798101475566, "learning_rate": 2.0650446066504463e-06, "loss": 0.5854, "step": 21520 }, { "epoch": 0.6283320195030802, "grad_norm": 0.7442701642321076, "learning_rate": 2.0648824006488243e-06, "loss": 0.6681, "step": 21521 }, { "epoch": 0.6283612157309275, "grad_norm": 0.77400783777056, "learning_rate": 2.064720194647202e-06, "loss": 0.7541, "step": 21522 }, { "epoch": 0.6283904119587749, "grad_norm": 0.7417811189489643, "learning_rate": 2.06455798864558e-06, "loss": 0.6361, "step": 21523 }, { "epoch": 0.6284196081866222, "grad_norm": 0.6776975745332499, "learning_rate": 2.0643957826439583e-06, "loss": 0.5559, "step": 21524 }, { "epoch": 0.6284488044144696, "grad_norm": 0.7315795414563151, "learning_rate": 2.064233576642336e-06, "loss": 0.6542, "step": 21525 }, { "epoch": 0.628478000642317, "grad_norm": 0.7582050224099931, "learning_rate": 2.064071370640714e-06, "loss": 0.6432, "step": 21526 }, { "epoch": 0.6285071968701643, "grad_norm": 0.7460892294092635, "learning_rate": 2.063909164639092e-06, "loss": 0.7109, "step": 21527 }, { "epoch": 0.6285363930980118, "grad_norm": 0.6627980912382747, "learning_rate": 2.06374695863747e-06, "loss": 0.579, "step": 21528 }, { "epoch": 0.6285655893258592, "grad_norm": 0.7789960425741383, "learning_rate": 2.0635847526358475e-06, "loss": 0.6783, "step": 21529 }, { "epoch": 0.6285947855537065, "grad_norm": 0.688292996379023, "learning_rate": 2.0634225466342255e-06, "loss": 0.6145, "step": 21530 }, { "epoch": 0.6286239817815539, "grad_norm": 0.745011046414861, "learning_rate": 2.0632603406326035e-06, "loss": 0.6855, "step": 21531 }, { "epoch": 0.6286531780094012, "grad_norm": 0.7380629276203099, "learning_rate": 2.0630981346309815e-06, "loss": 0.6514, "step": 21532 }, { "epoch": 0.6286823742372486, "grad_norm": 0.753442809240193, "learning_rate": 2.0629359286293595e-06, "loss": 0.6959, "step": 21533 }, { "epoch": 0.628711570465096, "grad_norm": 0.724590104198867, "learning_rate": 2.0627737226277375e-06, "loss": 0.6685, "step": 21534 }, { "epoch": 0.6287407666929433, "grad_norm": 0.7575582398670372, "learning_rate": 2.0626115166261155e-06, "loss": 0.6593, "step": 21535 }, { "epoch": 0.6287699629207907, "grad_norm": 0.7188335083567553, "learning_rate": 2.062449310624493e-06, "loss": 0.622, "step": 21536 }, { "epoch": 0.628799159148638, "grad_norm": 0.7379522295290423, "learning_rate": 2.062287104622871e-06, "loss": 0.6585, "step": 21537 }, { "epoch": 0.6288283553764854, "grad_norm": 0.748607383801296, "learning_rate": 2.062124898621249e-06, "loss": 0.6459, "step": 21538 }, { "epoch": 0.6288575516043328, "grad_norm": 0.7043409555243142, "learning_rate": 2.061962692619627e-06, "loss": 0.5855, "step": 21539 }, { "epoch": 0.6288867478321801, "grad_norm": 0.6993060482504164, "learning_rate": 2.061800486618005e-06, "loss": 0.65, "step": 21540 }, { "epoch": 0.6289159440600275, "grad_norm": 0.6978824713690418, "learning_rate": 2.0616382806163827e-06, "loss": 0.5987, "step": 21541 }, { "epoch": 0.6289451402878748, "grad_norm": 0.730040158570281, "learning_rate": 2.0614760746147607e-06, "loss": 0.6217, "step": 21542 }, { "epoch": 0.6289743365157222, "grad_norm": 0.705939975451466, "learning_rate": 2.061313868613139e-06, "loss": 0.6311, "step": 21543 }, { "epoch": 0.6290035327435696, "grad_norm": 0.7223856796865178, "learning_rate": 2.0611516626115168e-06, "loss": 0.656, "step": 21544 }, { "epoch": 0.6290327289714169, "grad_norm": 0.682672034769301, "learning_rate": 2.0609894566098948e-06, "loss": 0.5601, "step": 21545 }, { "epoch": 0.6290619251992643, "grad_norm": 0.7568599690280324, "learning_rate": 2.0608272506082728e-06, "loss": 0.6536, "step": 21546 }, { "epoch": 0.6290911214271117, "grad_norm": 0.7235692985695976, "learning_rate": 2.0606650446066508e-06, "loss": 0.5748, "step": 21547 }, { "epoch": 0.629120317654959, "grad_norm": 0.7820837145124981, "learning_rate": 2.0605028386050284e-06, "loss": 0.7133, "step": 21548 }, { "epoch": 0.6291495138828064, "grad_norm": 0.7528494499218461, "learning_rate": 2.0603406326034064e-06, "loss": 0.6868, "step": 21549 }, { "epoch": 0.6291787101106537, "grad_norm": 0.7297095593574711, "learning_rate": 2.0601784266017844e-06, "loss": 0.6263, "step": 21550 }, { "epoch": 0.6292079063385011, "grad_norm": 0.7599221802184055, "learning_rate": 2.0600162206001624e-06, "loss": 0.6765, "step": 21551 }, { "epoch": 0.6292371025663485, "grad_norm": 0.7465456875915008, "learning_rate": 2.0598540145985404e-06, "loss": 0.655, "step": 21552 }, { "epoch": 0.6292662987941958, "grad_norm": 0.8571192677966436, "learning_rate": 2.0596918085969184e-06, "loss": 0.69, "step": 21553 }, { "epoch": 0.6292954950220432, "grad_norm": 0.6890725076799578, "learning_rate": 2.0595296025952964e-06, "loss": 0.6328, "step": 21554 }, { "epoch": 0.6293246912498905, "grad_norm": 0.7297684923108844, "learning_rate": 2.059367396593674e-06, "loss": 0.6675, "step": 21555 }, { "epoch": 0.6293538874777379, "grad_norm": 0.7491181867180952, "learning_rate": 2.059205190592052e-06, "loss": 0.6809, "step": 21556 }, { "epoch": 0.6293830837055853, "grad_norm": 0.7015529982553382, "learning_rate": 2.05904298459043e-06, "loss": 0.6124, "step": 21557 }, { "epoch": 0.6294122799334326, "grad_norm": 0.7276875253579788, "learning_rate": 2.058880778588808e-06, "loss": 0.6515, "step": 21558 }, { "epoch": 0.62944147616128, "grad_norm": 0.7611949030796747, "learning_rate": 2.058718572587186e-06, "loss": 0.7182, "step": 21559 }, { "epoch": 0.6294706723891274, "grad_norm": 0.7463002800555305, "learning_rate": 2.0585563665855636e-06, "loss": 0.7137, "step": 21560 }, { "epoch": 0.6294998686169747, "grad_norm": 0.7075248212975498, "learning_rate": 2.0583941605839416e-06, "loss": 0.6495, "step": 21561 }, { "epoch": 0.6295290648448221, "grad_norm": 0.770205515872245, "learning_rate": 2.05823195458232e-06, "loss": 0.7043, "step": 21562 }, { "epoch": 0.6295582610726694, "grad_norm": 0.7365363719049244, "learning_rate": 2.0580697485806976e-06, "loss": 0.6893, "step": 21563 }, { "epoch": 0.6295874573005168, "grad_norm": 0.728115777333356, "learning_rate": 2.0579075425790756e-06, "loss": 0.6181, "step": 21564 }, { "epoch": 0.6296166535283642, "grad_norm": 0.8464715497194019, "learning_rate": 2.0577453365774536e-06, "loss": 0.5753, "step": 21565 }, { "epoch": 0.6296458497562115, "grad_norm": 0.6533780830621249, "learning_rate": 2.0575831305758316e-06, "loss": 0.5322, "step": 21566 }, { "epoch": 0.6296750459840589, "grad_norm": 0.7381328795991497, "learning_rate": 2.0574209245742092e-06, "loss": 0.6777, "step": 21567 }, { "epoch": 0.6297042422119062, "grad_norm": 0.6655213894848603, "learning_rate": 2.0572587185725872e-06, "loss": 0.5885, "step": 21568 }, { "epoch": 0.6297334384397536, "grad_norm": 0.7191772748687506, "learning_rate": 2.0570965125709652e-06, "loss": 0.6074, "step": 21569 }, { "epoch": 0.629762634667601, "grad_norm": 0.7279263542368666, "learning_rate": 2.0569343065693432e-06, "loss": 0.6195, "step": 21570 }, { "epoch": 0.6297918308954483, "grad_norm": 0.7436661088036953, "learning_rate": 2.0567721005677212e-06, "loss": 0.5949, "step": 21571 }, { "epoch": 0.6298210271232957, "grad_norm": 0.7170451188690229, "learning_rate": 2.0566098945660993e-06, "loss": 0.6396, "step": 21572 }, { "epoch": 0.629850223351143, "grad_norm": 0.8087299694544815, "learning_rate": 2.0564476885644773e-06, "loss": 0.798, "step": 21573 }, { "epoch": 0.6298794195789904, "grad_norm": 0.7520059191268498, "learning_rate": 2.056285482562855e-06, "loss": 0.6409, "step": 21574 }, { "epoch": 0.6299086158068378, "grad_norm": 0.8210389613296801, "learning_rate": 2.056123276561233e-06, "loss": 0.6765, "step": 21575 }, { "epoch": 0.6299378120346851, "grad_norm": 0.6777015066119884, "learning_rate": 2.055961070559611e-06, "loss": 0.5944, "step": 21576 }, { "epoch": 0.6299670082625325, "grad_norm": 0.7194619376521024, "learning_rate": 2.055798864557989e-06, "loss": 0.6607, "step": 21577 }, { "epoch": 0.6299962044903799, "grad_norm": 0.6923403505151753, "learning_rate": 2.055636658556367e-06, "loss": 0.6041, "step": 21578 }, { "epoch": 0.6300254007182272, "grad_norm": 0.7818682541787371, "learning_rate": 2.0554744525547445e-06, "loss": 0.7085, "step": 21579 }, { "epoch": 0.6300545969460746, "grad_norm": 0.6988508002822801, "learning_rate": 2.0553122465531225e-06, "loss": 0.6208, "step": 21580 }, { "epoch": 0.6300837931739219, "grad_norm": 0.6974553966106959, "learning_rate": 2.055150040551501e-06, "loss": 0.6316, "step": 21581 }, { "epoch": 0.6301129894017693, "grad_norm": 0.7648151981665174, "learning_rate": 2.0549878345498785e-06, "loss": 0.7192, "step": 21582 }, { "epoch": 0.6301421856296167, "grad_norm": 0.6983632348408945, "learning_rate": 2.0548256285482565e-06, "loss": 0.6182, "step": 21583 }, { "epoch": 0.630171381857464, "grad_norm": 0.7413405004562333, "learning_rate": 2.0546634225466345e-06, "loss": 0.662, "step": 21584 }, { "epoch": 0.6302005780853114, "grad_norm": 0.7486705256150858, "learning_rate": 2.0545012165450125e-06, "loss": 0.6531, "step": 21585 }, { "epoch": 0.6302297743131587, "grad_norm": 0.6908495169619981, "learning_rate": 2.05433901054339e-06, "loss": 0.5762, "step": 21586 }, { "epoch": 0.6302589705410061, "grad_norm": 0.7150098046584857, "learning_rate": 2.054176804541768e-06, "loss": 0.622, "step": 21587 }, { "epoch": 0.6302881667688535, "grad_norm": 0.7554692507393149, "learning_rate": 2.054014598540146e-06, "loss": 0.6612, "step": 21588 }, { "epoch": 0.6303173629967008, "grad_norm": 0.7116302794841997, "learning_rate": 2.053852392538524e-06, "loss": 0.6267, "step": 21589 }, { "epoch": 0.6303465592245482, "grad_norm": 0.7770102323118596, "learning_rate": 2.053690186536902e-06, "loss": 0.6673, "step": 21590 }, { "epoch": 0.6303757554523955, "grad_norm": 0.7668642973410842, "learning_rate": 2.05352798053528e-06, "loss": 0.697, "step": 21591 }, { "epoch": 0.6304049516802429, "grad_norm": 0.6736863801865463, "learning_rate": 2.053365774533658e-06, "loss": 0.5608, "step": 21592 }, { "epoch": 0.6304341479080903, "grad_norm": 0.7358380306289621, "learning_rate": 2.0532035685320357e-06, "loss": 0.6308, "step": 21593 }, { "epoch": 0.6304633441359376, "grad_norm": 0.7433916705837266, "learning_rate": 2.0530413625304137e-06, "loss": 0.6021, "step": 21594 }, { "epoch": 0.630492540363785, "grad_norm": 0.6626764967169642, "learning_rate": 2.0528791565287917e-06, "loss": 0.5734, "step": 21595 }, { "epoch": 0.6305217365916324, "grad_norm": 0.7146156384269875, "learning_rate": 2.0527169505271697e-06, "loss": 0.6091, "step": 21596 }, { "epoch": 0.6305509328194797, "grad_norm": 0.7210388157840001, "learning_rate": 2.0525547445255477e-06, "loss": 0.6481, "step": 21597 }, { "epoch": 0.6305801290473271, "grad_norm": 0.7289991813542916, "learning_rate": 2.0523925385239253e-06, "loss": 0.6407, "step": 21598 }, { "epoch": 0.6306093252751744, "grad_norm": 0.7424052750325087, "learning_rate": 2.0522303325223033e-06, "loss": 0.6649, "step": 21599 }, { "epoch": 0.6306385215030218, "grad_norm": 0.775071492887586, "learning_rate": 2.0520681265206817e-06, "loss": 0.6625, "step": 21600 }, { "epoch": 0.6306677177308692, "grad_norm": 0.7737602103626952, "learning_rate": 2.0519059205190593e-06, "loss": 0.6893, "step": 21601 }, { "epoch": 0.6306969139587165, "grad_norm": 0.7584230066469931, "learning_rate": 2.0517437145174373e-06, "loss": 0.6472, "step": 21602 }, { "epoch": 0.6307261101865639, "grad_norm": 0.7284804010422371, "learning_rate": 2.0515815085158153e-06, "loss": 0.612, "step": 21603 }, { "epoch": 0.6307553064144112, "grad_norm": 0.730619215770265, "learning_rate": 2.0514193025141934e-06, "loss": 0.6714, "step": 21604 }, { "epoch": 0.6307845026422586, "grad_norm": 0.6851260630556397, "learning_rate": 2.051257096512571e-06, "loss": 0.6123, "step": 21605 }, { "epoch": 0.630813698870106, "grad_norm": 0.7577554200527445, "learning_rate": 2.051094890510949e-06, "loss": 0.7115, "step": 21606 }, { "epoch": 0.6308428950979533, "grad_norm": 0.7686991068680428, "learning_rate": 2.050932684509327e-06, "loss": 0.7427, "step": 21607 }, { "epoch": 0.6308720913258007, "grad_norm": 0.6856597558828649, "learning_rate": 2.050770478507705e-06, "loss": 0.5797, "step": 21608 }, { "epoch": 0.630901287553648, "grad_norm": 0.7308929725446135, "learning_rate": 2.050608272506083e-06, "loss": 0.6717, "step": 21609 }, { "epoch": 0.6309304837814954, "grad_norm": 0.7015775914902779, "learning_rate": 2.050446066504461e-06, "loss": 0.6121, "step": 21610 }, { "epoch": 0.6309596800093428, "grad_norm": 0.6944248920369841, "learning_rate": 2.050283860502839e-06, "loss": 0.5847, "step": 21611 }, { "epoch": 0.6309888762371901, "grad_norm": 0.7488802462499634, "learning_rate": 2.0501216545012166e-06, "loss": 0.6917, "step": 21612 }, { "epoch": 0.6310180724650375, "grad_norm": 0.7414789686550938, "learning_rate": 2.0499594484995946e-06, "loss": 0.6581, "step": 21613 }, { "epoch": 0.6310472686928849, "grad_norm": 0.7133980145419586, "learning_rate": 2.0497972424979726e-06, "loss": 0.653, "step": 21614 }, { "epoch": 0.6310764649207322, "grad_norm": 0.759147428441122, "learning_rate": 2.0496350364963506e-06, "loss": 0.6487, "step": 21615 }, { "epoch": 0.6311056611485796, "grad_norm": 0.7620948162840457, "learning_rate": 2.0494728304947286e-06, "loss": 0.6439, "step": 21616 }, { "epoch": 0.6311348573764269, "grad_norm": 0.7398202712665956, "learning_rate": 2.049310624493106e-06, "loss": 0.6273, "step": 21617 }, { "epoch": 0.6311640536042743, "grad_norm": 0.7896856279173676, "learning_rate": 2.049148418491484e-06, "loss": 0.7089, "step": 21618 }, { "epoch": 0.6311932498321217, "grad_norm": 0.7113529686829718, "learning_rate": 2.0489862124898626e-06, "loss": 0.6204, "step": 21619 }, { "epoch": 0.631222446059969, "grad_norm": 0.7185376575895998, "learning_rate": 2.04882400648824e-06, "loss": 0.6666, "step": 21620 }, { "epoch": 0.6312516422878164, "grad_norm": 0.831981237419957, "learning_rate": 2.048661800486618e-06, "loss": 0.6949, "step": 21621 }, { "epoch": 0.6312808385156637, "grad_norm": 0.7291922629466581, "learning_rate": 2.048499594484996e-06, "loss": 0.6697, "step": 21622 }, { "epoch": 0.6313100347435111, "grad_norm": 0.7301313668616075, "learning_rate": 2.0483373884833742e-06, "loss": 0.6496, "step": 21623 }, { "epoch": 0.6313392309713585, "grad_norm": 0.7513288887819526, "learning_rate": 2.048175182481752e-06, "loss": 0.6872, "step": 21624 }, { "epoch": 0.6313684271992058, "grad_norm": 0.6630395902032443, "learning_rate": 2.04801297648013e-06, "loss": 0.5382, "step": 21625 }, { "epoch": 0.6313976234270532, "grad_norm": 0.7149980928876523, "learning_rate": 2.047850770478508e-06, "loss": 0.6512, "step": 21626 }, { "epoch": 0.6314268196549006, "grad_norm": 0.746362545823184, "learning_rate": 2.047688564476886e-06, "loss": 0.6459, "step": 21627 }, { "epoch": 0.6314560158827479, "grad_norm": 0.7607117800022413, "learning_rate": 2.047526358475264e-06, "loss": 0.7051, "step": 21628 }, { "epoch": 0.6314852121105953, "grad_norm": 0.7037400660965617, "learning_rate": 2.047364152473642e-06, "loss": 0.597, "step": 21629 }, { "epoch": 0.6315144083384426, "grad_norm": 0.7299742278392182, "learning_rate": 2.04720194647202e-06, "loss": 0.6292, "step": 21630 }, { "epoch": 0.63154360456629, "grad_norm": 0.7687283945996783, "learning_rate": 2.0470397404703974e-06, "loss": 0.743, "step": 21631 }, { "epoch": 0.6315728007941374, "grad_norm": 0.7520427413490421, "learning_rate": 2.0468775344687754e-06, "loss": 0.6747, "step": 21632 }, { "epoch": 0.6316019970219847, "grad_norm": 0.7443823213668669, "learning_rate": 2.0467153284671534e-06, "loss": 0.5974, "step": 21633 }, { "epoch": 0.6316311932498321, "grad_norm": 0.7040992464195539, "learning_rate": 2.0465531224655314e-06, "loss": 0.5789, "step": 21634 }, { "epoch": 0.6316603894776794, "grad_norm": 0.7779266554460125, "learning_rate": 2.0463909164639094e-06, "loss": 0.7193, "step": 21635 }, { "epoch": 0.6316895857055268, "grad_norm": 0.6803844566956212, "learning_rate": 2.046228710462287e-06, "loss": 0.5874, "step": 21636 }, { "epoch": 0.6317187819333742, "grad_norm": 0.7272088587716863, "learning_rate": 2.0460665044606655e-06, "loss": 0.6727, "step": 21637 }, { "epoch": 0.6317479781612215, "grad_norm": 0.7390325998652653, "learning_rate": 2.0459042984590435e-06, "loss": 0.6673, "step": 21638 }, { "epoch": 0.6317771743890689, "grad_norm": 0.7722631364311521, "learning_rate": 2.045742092457421e-06, "loss": 0.726, "step": 21639 }, { "epoch": 0.6318063706169162, "grad_norm": 0.7623294799607395, "learning_rate": 2.045579886455799e-06, "loss": 0.6162, "step": 21640 }, { "epoch": 0.6318355668447636, "grad_norm": 0.7696396133872645, "learning_rate": 2.045417680454177e-06, "loss": 0.6966, "step": 21641 }, { "epoch": 0.631864763072611, "grad_norm": 0.6966599289192886, "learning_rate": 2.045255474452555e-06, "loss": 0.5815, "step": 21642 }, { "epoch": 0.6318939593004583, "grad_norm": 0.7360027529360053, "learning_rate": 2.0450932684509327e-06, "loss": 0.651, "step": 21643 }, { "epoch": 0.6319231555283057, "grad_norm": 0.7361748669088979, "learning_rate": 2.0449310624493107e-06, "loss": 0.6541, "step": 21644 }, { "epoch": 0.631952351756153, "grad_norm": 0.765369100225029, "learning_rate": 2.0447688564476887e-06, "loss": 0.6859, "step": 21645 }, { "epoch": 0.6319815479840004, "grad_norm": 0.7274816460429562, "learning_rate": 2.0446066504460667e-06, "loss": 0.6681, "step": 21646 }, { "epoch": 0.6320107442118478, "grad_norm": 0.7192028589428535, "learning_rate": 2.0444444444444447e-06, "loss": 0.6819, "step": 21647 }, { "epoch": 0.6320399404396951, "grad_norm": 0.8337722212881906, "learning_rate": 2.0442822384428227e-06, "loss": 0.7461, "step": 21648 }, { "epoch": 0.6320691366675426, "grad_norm": 0.6717666330979035, "learning_rate": 2.0441200324412007e-06, "loss": 0.552, "step": 21649 }, { "epoch": 0.63209833289539, "grad_norm": 0.6993256527781152, "learning_rate": 2.0439578264395783e-06, "loss": 0.6, "step": 21650 }, { "epoch": 0.6321275291232373, "grad_norm": 0.6814201968387152, "learning_rate": 2.0437956204379563e-06, "loss": 0.5456, "step": 21651 }, { "epoch": 0.6321567253510847, "grad_norm": 0.713215796657589, "learning_rate": 2.0436334144363343e-06, "loss": 0.6349, "step": 21652 }, { "epoch": 0.632185921578932, "grad_norm": 0.7183803859822671, "learning_rate": 2.0434712084347123e-06, "loss": 0.6554, "step": 21653 }, { "epoch": 0.6322151178067794, "grad_norm": 0.7565743306580871, "learning_rate": 2.0433090024330903e-06, "loss": 0.7228, "step": 21654 }, { "epoch": 0.6322443140346268, "grad_norm": 0.7306361535309595, "learning_rate": 2.043146796431468e-06, "loss": 0.5871, "step": 21655 }, { "epoch": 0.6322735102624741, "grad_norm": 0.6974104469034061, "learning_rate": 2.0429845904298463e-06, "loss": 0.5816, "step": 21656 }, { "epoch": 0.6323027064903215, "grad_norm": 0.7287857675764142, "learning_rate": 2.0428223844282243e-06, "loss": 0.6388, "step": 21657 }, { "epoch": 0.6323319027181689, "grad_norm": 0.7248297158936766, "learning_rate": 2.042660178426602e-06, "loss": 0.6409, "step": 21658 }, { "epoch": 0.6323610989460162, "grad_norm": 0.7357301332044146, "learning_rate": 2.04249797242498e-06, "loss": 0.643, "step": 21659 }, { "epoch": 0.6323902951738636, "grad_norm": 0.750154492946488, "learning_rate": 2.042335766423358e-06, "loss": 0.652, "step": 21660 }, { "epoch": 0.6324194914017109, "grad_norm": 0.7462863307176488, "learning_rate": 2.042173560421736e-06, "loss": 0.6025, "step": 21661 }, { "epoch": 0.6324486876295583, "grad_norm": 0.7301674795034863, "learning_rate": 2.0420113544201135e-06, "loss": 0.7045, "step": 21662 }, { "epoch": 0.6324778838574057, "grad_norm": 0.7318303932747915, "learning_rate": 2.0418491484184915e-06, "loss": 0.6589, "step": 21663 }, { "epoch": 0.632507080085253, "grad_norm": 0.7743344320388865, "learning_rate": 2.0416869424168695e-06, "loss": 0.7056, "step": 21664 }, { "epoch": 0.6325362763131004, "grad_norm": 0.746507906701262, "learning_rate": 2.0415247364152475e-06, "loss": 0.6819, "step": 21665 }, { "epoch": 0.6325654725409477, "grad_norm": 0.726805338880024, "learning_rate": 2.0413625304136255e-06, "loss": 0.6659, "step": 21666 }, { "epoch": 0.6325946687687951, "grad_norm": 0.688249932654436, "learning_rate": 2.0412003244120035e-06, "loss": 0.5942, "step": 21667 }, { "epoch": 0.6326238649966425, "grad_norm": 0.6851209022337232, "learning_rate": 2.0410381184103816e-06, "loss": 0.5562, "step": 21668 }, { "epoch": 0.6326530612244898, "grad_norm": 0.6981519173963588, "learning_rate": 2.040875912408759e-06, "loss": 0.5982, "step": 21669 }, { "epoch": 0.6326822574523372, "grad_norm": 0.8262787439151351, "learning_rate": 2.040713706407137e-06, "loss": 0.7883, "step": 21670 }, { "epoch": 0.6327114536801846, "grad_norm": 0.7547858837336306, "learning_rate": 2.040551500405515e-06, "loss": 0.5908, "step": 21671 }, { "epoch": 0.6327406499080319, "grad_norm": 0.672409365591592, "learning_rate": 2.040389294403893e-06, "loss": 0.6097, "step": 21672 }, { "epoch": 0.6327698461358793, "grad_norm": 0.769531875306522, "learning_rate": 2.040227088402271e-06, "loss": 0.6519, "step": 21673 }, { "epoch": 0.6327990423637266, "grad_norm": 0.7048863710976603, "learning_rate": 2.0400648824006487e-06, "loss": 0.5071, "step": 21674 }, { "epoch": 0.632828238591574, "grad_norm": 0.7032901594846461, "learning_rate": 2.039902676399027e-06, "loss": 0.6639, "step": 21675 }, { "epoch": 0.6328574348194214, "grad_norm": 0.7083024054939838, "learning_rate": 2.0397404703974048e-06, "loss": 0.5626, "step": 21676 }, { "epoch": 0.6328866310472687, "grad_norm": 0.7244459793625263, "learning_rate": 2.0395782643957828e-06, "loss": 0.6463, "step": 21677 }, { "epoch": 0.6329158272751161, "grad_norm": 0.7032880831858531, "learning_rate": 2.0394160583941608e-06, "loss": 0.6421, "step": 21678 }, { "epoch": 0.6329450235029634, "grad_norm": 0.7058938385460054, "learning_rate": 2.0392538523925388e-06, "loss": 0.5537, "step": 21679 }, { "epoch": 0.6329742197308108, "grad_norm": 0.7640975082215691, "learning_rate": 2.0390916463909168e-06, "loss": 0.7164, "step": 21680 }, { "epoch": 0.6330034159586582, "grad_norm": 0.85167573669296, "learning_rate": 2.0389294403892944e-06, "loss": 0.7169, "step": 21681 }, { "epoch": 0.6330326121865055, "grad_norm": 0.6677489981097129, "learning_rate": 2.0387672343876724e-06, "loss": 0.6222, "step": 21682 }, { "epoch": 0.6330618084143529, "grad_norm": 0.772922424068743, "learning_rate": 2.0386050283860504e-06, "loss": 0.6794, "step": 21683 }, { "epoch": 0.6330910046422003, "grad_norm": 0.7088866018550759, "learning_rate": 2.0384428223844284e-06, "loss": 0.5922, "step": 21684 }, { "epoch": 0.6331202008700476, "grad_norm": 0.7261462975709498, "learning_rate": 2.0382806163828064e-06, "loss": 0.6548, "step": 21685 }, { "epoch": 0.633149397097895, "grad_norm": 0.6545026351041929, "learning_rate": 2.0381184103811844e-06, "loss": 0.5492, "step": 21686 }, { "epoch": 0.6331785933257423, "grad_norm": 0.7165356048163721, "learning_rate": 2.0379562043795624e-06, "loss": 0.5999, "step": 21687 }, { "epoch": 0.6332077895535897, "grad_norm": 0.7442513459344242, "learning_rate": 2.03779399837794e-06, "loss": 0.5904, "step": 21688 }, { "epoch": 0.6332369857814371, "grad_norm": 0.7115882784193097, "learning_rate": 2.037631792376318e-06, "loss": 0.5743, "step": 21689 }, { "epoch": 0.6332661820092844, "grad_norm": 0.7106017669455931, "learning_rate": 2.037469586374696e-06, "loss": 0.6387, "step": 21690 }, { "epoch": 0.6332953782371318, "grad_norm": 0.7554717313612771, "learning_rate": 2.037307380373074e-06, "loss": 0.6114, "step": 21691 }, { "epoch": 0.6333245744649791, "grad_norm": 0.7553422641281597, "learning_rate": 2.037145174371452e-06, "loss": 0.7151, "step": 21692 }, { "epoch": 0.6333537706928265, "grad_norm": 0.7363963746896026, "learning_rate": 2.0369829683698296e-06, "loss": 0.6353, "step": 21693 }, { "epoch": 0.6333829669206739, "grad_norm": 0.7535942241237208, "learning_rate": 2.036820762368208e-06, "loss": 0.6019, "step": 21694 }, { "epoch": 0.6334121631485212, "grad_norm": 0.753621349139874, "learning_rate": 2.0366585563665856e-06, "loss": 0.6745, "step": 21695 }, { "epoch": 0.6334413593763686, "grad_norm": 0.696624828397812, "learning_rate": 2.0364963503649636e-06, "loss": 0.5544, "step": 21696 }, { "epoch": 0.633470555604216, "grad_norm": 0.6789024144446296, "learning_rate": 2.0363341443633416e-06, "loss": 0.5592, "step": 21697 }, { "epoch": 0.6334997518320633, "grad_norm": 0.7902659463464125, "learning_rate": 2.0361719383617196e-06, "loss": 0.7063, "step": 21698 }, { "epoch": 0.6335289480599107, "grad_norm": 0.7462852191434574, "learning_rate": 2.0360097323600976e-06, "loss": 0.6255, "step": 21699 }, { "epoch": 0.633558144287758, "grad_norm": 0.7776288790639907, "learning_rate": 2.0358475263584752e-06, "loss": 0.739, "step": 21700 }, { "epoch": 0.6335873405156054, "grad_norm": 0.7085274106985059, "learning_rate": 2.0356853203568532e-06, "loss": 0.636, "step": 21701 }, { "epoch": 0.6336165367434528, "grad_norm": 0.7773765905146656, "learning_rate": 2.0355231143552312e-06, "loss": 0.7342, "step": 21702 }, { "epoch": 0.6336457329713001, "grad_norm": 0.6757813800734778, "learning_rate": 2.0353609083536093e-06, "loss": 0.5916, "step": 21703 }, { "epoch": 0.6336749291991475, "grad_norm": 0.7457258361535043, "learning_rate": 2.0351987023519873e-06, "loss": 0.74, "step": 21704 }, { "epoch": 0.6337041254269948, "grad_norm": 0.6697036589715671, "learning_rate": 2.0350364963503653e-06, "loss": 0.5552, "step": 21705 }, { "epoch": 0.6337333216548422, "grad_norm": 0.7404585572099253, "learning_rate": 2.0348742903487433e-06, "loss": 0.6735, "step": 21706 }, { "epoch": 0.6337625178826896, "grad_norm": 0.705118402945412, "learning_rate": 2.034712084347121e-06, "loss": 0.6049, "step": 21707 }, { "epoch": 0.6337917141105369, "grad_norm": 0.7067181066032711, "learning_rate": 2.034549878345499e-06, "loss": 0.6102, "step": 21708 }, { "epoch": 0.6338209103383843, "grad_norm": 0.6741144675289661, "learning_rate": 2.034387672343877e-06, "loss": 0.615, "step": 21709 }, { "epoch": 0.6338501065662316, "grad_norm": 0.7228468882415707, "learning_rate": 2.034225466342255e-06, "loss": 0.6628, "step": 21710 }, { "epoch": 0.633879302794079, "grad_norm": 0.7051230679204141, "learning_rate": 2.034063260340633e-06, "loss": 0.6342, "step": 21711 }, { "epoch": 0.6339084990219264, "grad_norm": 0.685409905320363, "learning_rate": 2.0339010543390105e-06, "loss": 0.6063, "step": 21712 }, { "epoch": 0.6339376952497737, "grad_norm": 0.6852058079328672, "learning_rate": 2.033738848337389e-06, "loss": 0.5828, "step": 21713 }, { "epoch": 0.6339668914776211, "grad_norm": 0.7085218165333206, "learning_rate": 2.0335766423357665e-06, "loss": 0.6257, "step": 21714 }, { "epoch": 0.6339960877054684, "grad_norm": 0.6475241385129741, "learning_rate": 2.0334144363341445e-06, "loss": 0.547, "step": 21715 }, { "epoch": 0.6340252839333158, "grad_norm": 0.7285674761235225, "learning_rate": 2.0332522303325225e-06, "loss": 0.6593, "step": 21716 }, { "epoch": 0.6340544801611632, "grad_norm": 0.668541279603292, "learning_rate": 2.0330900243309005e-06, "loss": 0.5555, "step": 21717 }, { "epoch": 0.6340836763890105, "grad_norm": 0.6935616825520702, "learning_rate": 2.0329278183292785e-06, "loss": 0.5822, "step": 21718 }, { "epoch": 0.6341128726168579, "grad_norm": 0.7130821123153129, "learning_rate": 2.032765612327656e-06, "loss": 0.6073, "step": 21719 }, { "epoch": 0.6341420688447053, "grad_norm": 0.6957926003725592, "learning_rate": 2.032603406326034e-06, "loss": 0.6313, "step": 21720 }, { "epoch": 0.6341712650725526, "grad_norm": 0.7172306268812179, "learning_rate": 2.032441200324412e-06, "loss": 0.5944, "step": 21721 }, { "epoch": 0.6342004613004, "grad_norm": 0.8297279246567514, "learning_rate": 2.03227899432279e-06, "loss": 0.6495, "step": 21722 }, { "epoch": 0.6342296575282473, "grad_norm": 0.7390956015039858, "learning_rate": 2.032116788321168e-06, "loss": 0.6709, "step": 21723 }, { "epoch": 0.6342588537560947, "grad_norm": 0.7677013646615566, "learning_rate": 2.031954582319546e-06, "loss": 0.7003, "step": 21724 }, { "epoch": 0.6342880499839421, "grad_norm": 0.755601406817278, "learning_rate": 2.031792376317924e-06, "loss": 0.6774, "step": 21725 }, { "epoch": 0.6343172462117894, "grad_norm": 0.8547510221440174, "learning_rate": 2.0316301703163017e-06, "loss": 0.8169, "step": 21726 }, { "epoch": 0.6343464424396368, "grad_norm": 0.7205879702306266, "learning_rate": 2.0314679643146797e-06, "loss": 0.6403, "step": 21727 }, { "epoch": 0.6343756386674841, "grad_norm": 0.7408736143418078, "learning_rate": 2.0313057583130577e-06, "loss": 0.6248, "step": 21728 }, { "epoch": 0.6344048348953315, "grad_norm": 0.7897973337297158, "learning_rate": 2.0311435523114357e-06, "loss": 0.7201, "step": 21729 }, { "epoch": 0.6344340311231789, "grad_norm": 0.762936102655734, "learning_rate": 2.0309813463098137e-06, "loss": 0.6813, "step": 21730 }, { "epoch": 0.6344632273510262, "grad_norm": 0.6674262780210911, "learning_rate": 2.0308191403081913e-06, "loss": 0.6031, "step": 21731 }, { "epoch": 0.6344924235788736, "grad_norm": 0.7053133837119314, "learning_rate": 2.0306569343065698e-06, "loss": 0.6443, "step": 21732 }, { "epoch": 0.634521619806721, "grad_norm": 0.704562052080055, "learning_rate": 2.0304947283049473e-06, "loss": 0.6384, "step": 21733 }, { "epoch": 0.6345508160345683, "grad_norm": 0.7734205128478584, "learning_rate": 2.0303325223033253e-06, "loss": 0.6462, "step": 21734 }, { "epoch": 0.6345800122624157, "grad_norm": 0.7618748061311369, "learning_rate": 2.0301703163017034e-06, "loss": 0.6857, "step": 21735 }, { "epoch": 0.634609208490263, "grad_norm": 0.7575435158935956, "learning_rate": 2.0300081103000814e-06, "loss": 0.6176, "step": 21736 }, { "epoch": 0.6346384047181104, "grad_norm": 0.7157482606973848, "learning_rate": 2.0298459042984594e-06, "loss": 0.6554, "step": 21737 }, { "epoch": 0.6346676009459578, "grad_norm": 0.7575579795998197, "learning_rate": 2.029683698296837e-06, "loss": 0.6395, "step": 21738 }, { "epoch": 0.6346967971738051, "grad_norm": 0.6479696064356631, "learning_rate": 2.029521492295215e-06, "loss": 0.4931, "step": 21739 }, { "epoch": 0.6347259934016525, "grad_norm": 0.669360360552244, "learning_rate": 2.029359286293593e-06, "loss": 0.5892, "step": 21740 }, { "epoch": 0.6347551896294998, "grad_norm": 0.7419220719116637, "learning_rate": 2.029197080291971e-06, "loss": 0.6617, "step": 21741 }, { "epoch": 0.6347843858573472, "grad_norm": 0.7185256808905708, "learning_rate": 2.029034874290349e-06, "loss": 0.6177, "step": 21742 }, { "epoch": 0.6348135820851946, "grad_norm": 0.6929578160112994, "learning_rate": 2.028872668288727e-06, "loss": 0.6197, "step": 21743 }, { "epoch": 0.6348427783130419, "grad_norm": 0.712403335105026, "learning_rate": 2.028710462287105e-06, "loss": 0.5997, "step": 21744 }, { "epoch": 0.6348719745408893, "grad_norm": 0.7269063986852664, "learning_rate": 2.0285482562854826e-06, "loss": 0.5865, "step": 21745 }, { "epoch": 0.6349011707687366, "grad_norm": 0.713843725822323, "learning_rate": 2.0283860502838606e-06, "loss": 0.6822, "step": 21746 }, { "epoch": 0.634930366996584, "grad_norm": 1.0313258922029493, "learning_rate": 2.0282238442822386e-06, "loss": 0.6895, "step": 21747 }, { "epoch": 0.6349595632244314, "grad_norm": 0.7912337726728641, "learning_rate": 2.0280616382806166e-06, "loss": 0.7265, "step": 21748 }, { "epoch": 0.6349887594522787, "grad_norm": 0.7446075384630326, "learning_rate": 2.027899432278994e-06, "loss": 0.6566, "step": 21749 }, { "epoch": 0.6350179556801261, "grad_norm": 0.7510479584223549, "learning_rate": 2.027737226277372e-06, "loss": 0.6983, "step": 21750 }, { "epoch": 0.6350471519079735, "grad_norm": 0.6776977509825889, "learning_rate": 2.0275750202757506e-06, "loss": 0.6442, "step": 21751 }, { "epoch": 0.6350763481358208, "grad_norm": 0.747856340924674, "learning_rate": 2.027412814274128e-06, "loss": 0.6463, "step": 21752 }, { "epoch": 0.6351055443636682, "grad_norm": 0.6519077060476102, "learning_rate": 2.027250608272506e-06, "loss": 0.5643, "step": 21753 }, { "epoch": 0.6351347405915155, "grad_norm": 0.7728789897742432, "learning_rate": 2.027088402270884e-06, "loss": 0.6415, "step": 21754 }, { "epoch": 0.6351639368193629, "grad_norm": 0.6983792145765784, "learning_rate": 2.0269261962692622e-06, "loss": 0.6093, "step": 21755 }, { "epoch": 0.6351931330472103, "grad_norm": 0.7155667627914147, "learning_rate": 2.0267639902676402e-06, "loss": 0.5898, "step": 21756 }, { "epoch": 0.6352223292750576, "grad_norm": 0.7371368245666652, "learning_rate": 2.026601784266018e-06, "loss": 0.6145, "step": 21757 }, { "epoch": 0.635251525502905, "grad_norm": 0.7431096549901529, "learning_rate": 2.026439578264396e-06, "loss": 0.701, "step": 21758 }, { "epoch": 0.6352807217307523, "grad_norm": 0.7115190127332559, "learning_rate": 2.026277372262774e-06, "loss": 0.5456, "step": 21759 }, { "epoch": 0.6353099179585997, "grad_norm": 0.7174857085185407, "learning_rate": 2.026115166261152e-06, "loss": 0.6563, "step": 21760 }, { "epoch": 0.6353391141864471, "grad_norm": 1.0876110668263586, "learning_rate": 2.02595296025953e-06, "loss": 0.5909, "step": 21761 }, { "epoch": 0.6353683104142944, "grad_norm": 0.710824943124445, "learning_rate": 2.025790754257908e-06, "loss": 0.6091, "step": 21762 }, { "epoch": 0.6353975066421418, "grad_norm": 1.024733363182424, "learning_rate": 2.025628548256286e-06, "loss": 0.6811, "step": 21763 }, { "epoch": 0.6354267028699891, "grad_norm": 0.7330059458473337, "learning_rate": 2.0254663422546634e-06, "loss": 0.6074, "step": 21764 }, { "epoch": 0.6354558990978365, "grad_norm": 0.7318636439802364, "learning_rate": 2.0253041362530414e-06, "loss": 0.6032, "step": 21765 }, { "epoch": 0.6354850953256839, "grad_norm": 0.7320052203764534, "learning_rate": 2.0251419302514194e-06, "loss": 0.6612, "step": 21766 }, { "epoch": 0.6355142915535312, "grad_norm": 0.7569897069715995, "learning_rate": 2.0249797242497975e-06, "loss": 0.6921, "step": 21767 }, { "epoch": 0.6355434877813786, "grad_norm": 0.7620882172551081, "learning_rate": 2.024817518248175e-06, "loss": 0.636, "step": 21768 }, { "epoch": 0.6355726840092261, "grad_norm": 0.7484614717419895, "learning_rate": 2.024655312246553e-06, "loss": 0.6691, "step": 21769 }, { "epoch": 0.6356018802370734, "grad_norm": 0.6899504940854885, "learning_rate": 2.0244931062449315e-06, "loss": 0.5682, "step": 21770 }, { "epoch": 0.6356310764649208, "grad_norm": 0.715279720274938, "learning_rate": 2.024330900243309e-06, "loss": 0.6217, "step": 21771 }, { "epoch": 0.6356602726927681, "grad_norm": 0.8904382796144931, "learning_rate": 2.024168694241687e-06, "loss": 0.5679, "step": 21772 }, { "epoch": 0.6356894689206155, "grad_norm": 0.731679261958916, "learning_rate": 2.024006488240065e-06, "loss": 0.6863, "step": 21773 }, { "epoch": 0.6357186651484629, "grad_norm": 0.7122396882682367, "learning_rate": 2.023844282238443e-06, "loss": 0.6037, "step": 21774 }, { "epoch": 0.6357478613763102, "grad_norm": 0.7100852888517368, "learning_rate": 2.023682076236821e-06, "loss": 0.5946, "step": 21775 }, { "epoch": 0.6357770576041576, "grad_norm": 0.7195038045893504, "learning_rate": 2.0235198702351987e-06, "loss": 0.6413, "step": 21776 }, { "epoch": 0.635806253832005, "grad_norm": 0.7125496751855742, "learning_rate": 2.0233576642335767e-06, "loss": 0.6648, "step": 21777 }, { "epoch": 0.6358354500598523, "grad_norm": 0.7252818677505863, "learning_rate": 2.0231954582319547e-06, "loss": 0.6587, "step": 21778 }, { "epoch": 0.6358646462876997, "grad_norm": 0.6997364254195251, "learning_rate": 2.0230332522303327e-06, "loss": 0.5984, "step": 21779 }, { "epoch": 0.635893842515547, "grad_norm": 0.6927704152199624, "learning_rate": 2.0228710462287107e-06, "loss": 0.6155, "step": 21780 }, { "epoch": 0.6359230387433944, "grad_norm": 0.7330402489013549, "learning_rate": 2.0227088402270887e-06, "loss": 0.6428, "step": 21781 }, { "epoch": 0.6359522349712418, "grad_norm": 0.7104116514207913, "learning_rate": 2.0225466342254667e-06, "loss": 0.6411, "step": 21782 }, { "epoch": 0.6359814311990891, "grad_norm": 0.7231851962534948, "learning_rate": 2.0223844282238443e-06, "loss": 0.6157, "step": 21783 }, { "epoch": 0.6360106274269365, "grad_norm": 0.7406277329421677, "learning_rate": 2.0222222222222223e-06, "loss": 0.7011, "step": 21784 }, { "epoch": 0.6360398236547838, "grad_norm": 0.7316395182893511, "learning_rate": 2.0220600162206003e-06, "loss": 0.6335, "step": 21785 }, { "epoch": 0.6360690198826312, "grad_norm": 0.7147560097130465, "learning_rate": 2.0218978102189783e-06, "loss": 0.5894, "step": 21786 }, { "epoch": 0.6360982161104786, "grad_norm": 0.7884964481756196, "learning_rate": 2.021735604217356e-06, "loss": 0.6902, "step": 21787 }, { "epoch": 0.6361274123383259, "grad_norm": 0.7230868379214256, "learning_rate": 2.0215733982157343e-06, "loss": 0.6839, "step": 21788 }, { "epoch": 0.6361566085661733, "grad_norm": 0.6737627701796616, "learning_rate": 2.0214111922141123e-06, "loss": 0.5644, "step": 21789 }, { "epoch": 0.6361858047940206, "grad_norm": 0.6819336928310169, "learning_rate": 2.02124898621249e-06, "loss": 0.5943, "step": 21790 }, { "epoch": 0.636215001021868, "grad_norm": 0.663395807026962, "learning_rate": 2.021086780210868e-06, "loss": 0.5553, "step": 21791 }, { "epoch": 0.6362441972497154, "grad_norm": 0.7211845961752121, "learning_rate": 2.020924574209246e-06, "loss": 0.7092, "step": 21792 }, { "epoch": 0.6362733934775627, "grad_norm": 0.7156457913056086, "learning_rate": 2.020762368207624e-06, "loss": 0.637, "step": 21793 }, { "epoch": 0.6363025897054101, "grad_norm": 0.6931152460113292, "learning_rate": 2.020600162206002e-06, "loss": 0.6097, "step": 21794 }, { "epoch": 0.6363317859332575, "grad_norm": 0.7849138004906958, "learning_rate": 2.0204379562043795e-06, "loss": 0.7224, "step": 21795 }, { "epoch": 0.6363609821611048, "grad_norm": 0.6759221993253325, "learning_rate": 2.0202757502027575e-06, "loss": 0.5607, "step": 21796 }, { "epoch": 0.6363901783889522, "grad_norm": 0.7035560793243809, "learning_rate": 2.0201135442011355e-06, "loss": 0.6157, "step": 21797 }, { "epoch": 0.6364193746167995, "grad_norm": 0.7476327760890438, "learning_rate": 2.0199513381995135e-06, "loss": 0.7263, "step": 21798 }, { "epoch": 0.6364485708446469, "grad_norm": 0.7461081137321995, "learning_rate": 2.0197891321978916e-06, "loss": 0.6988, "step": 21799 }, { "epoch": 0.6364777670724943, "grad_norm": 0.69613412409841, "learning_rate": 2.0196269261962696e-06, "loss": 0.5994, "step": 21800 }, { "epoch": 0.6365069633003416, "grad_norm": 0.7321642015292761, "learning_rate": 2.0194647201946476e-06, "loss": 0.6643, "step": 21801 }, { "epoch": 0.636536159528189, "grad_norm": 0.7608386189382073, "learning_rate": 2.019302514193025e-06, "loss": 0.6944, "step": 21802 }, { "epoch": 0.6365653557560363, "grad_norm": 0.7963931002185632, "learning_rate": 2.019140308191403e-06, "loss": 0.7017, "step": 21803 }, { "epoch": 0.6365945519838837, "grad_norm": 0.8342063172382741, "learning_rate": 2.018978102189781e-06, "loss": 0.7405, "step": 21804 }, { "epoch": 0.6366237482117311, "grad_norm": 0.7160605884061696, "learning_rate": 2.018815896188159e-06, "loss": 0.5852, "step": 21805 }, { "epoch": 0.6366529444395784, "grad_norm": 0.7162769353265456, "learning_rate": 2.0186536901865368e-06, "loss": 0.6567, "step": 21806 }, { "epoch": 0.6366821406674258, "grad_norm": 0.8181221419364215, "learning_rate": 2.018491484184915e-06, "loss": 0.7345, "step": 21807 }, { "epoch": 0.6367113368952732, "grad_norm": 0.7447026014327164, "learning_rate": 2.018329278183293e-06, "loss": 0.6583, "step": 21808 }, { "epoch": 0.6367405331231205, "grad_norm": 0.7421248517668014, "learning_rate": 2.0181670721816708e-06, "loss": 0.6791, "step": 21809 }, { "epoch": 0.6367697293509679, "grad_norm": 0.7053987809841628, "learning_rate": 2.0180048661800488e-06, "loss": 0.6265, "step": 21810 }, { "epoch": 0.6367989255788152, "grad_norm": 0.7289548008530943, "learning_rate": 2.0178426601784268e-06, "loss": 0.701, "step": 21811 }, { "epoch": 0.6368281218066626, "grad_norm": 0.6928850782114151, "learning_rate": 2.017680454176805e-06, "loss": 0.5714, "step": 21812 }, { "epoch": 0.63685731803451, "grad_norm": 0.7048836837870752, "learning_rate": 2.017518248175183e-06, "loss": 0.585, "step": 21813 }, { "epoch": 0.6368865142623573, "grad_norm": 0.7245109664606764, "learning_rate": 2.0173560421735604e-06, "loss": 0.6806, "step": 21814 }, { "epoch": 0.6369157104902047, "grad_norm": 0.7000972534972807, "learning_rate": 2.0171938361719384e-06, "loss": 0.6463, "step": 21815 }, { "epoch": 0.636944906718052, "grad_norm": 0.7570970655594227, "learning_rate": 2.0170316301703164e-06, "loss": 0.681, "step": 21816 }, { "epoch": 0.6369741029458994, "grad_norm": 0.7801770112956823, "learning_rate": 2.0168694241686944e-06, "loss": 0.5757, "step": 21817 }, { "epoch": 0.6370032991737468, "grad_norm": 0.678230629413981, "learning_rate": 2.0167072181670724e-06, "loss": 0.6151, "step": 21818 }, { "epoch": 0.6370324954015941, "grad_norm": 0.6668401656888121, "learning_rate": 2.0165450121654504e-06, "loss": 0.5458, "step": 21819 }, { "epoch": 0.6370616916294415, "grad_norm": 0.7075939588412774, "learning_rate": 2.0163828061638284e-06, "loss": 0.6397, "step": 21820 }, { "epoch": 0.6370908878572888, "grad_norm": 0.7575599410621633, "learning_rate": 2.016220600162206e-06, "loss": 0.6994, "step": 21821 }, { "epoch": 0.6371200840851362, "grad_norm": 0.6794821103337527, "learning_rate": 2.016058394160584e-06, "loss": 0.5997, "step": 21822 }, { "epoch": 0.6371492803129836, "grad_norm": 0.7642442472367441, "learning_rate": 2.015896188158962e-06, "loss": 0.7011, "step": 21823 }, { "epoch": 0.6371784765408309, "grad_norm": 0.7031216528173813, "learning_rate": 2.01573398215734e-06, "loss": 0.6237, "step": 21824 }, { "epoch": 0.6372076727686783, "grad_norm": 0.6647335254950963, "learning_rate": 2.0155717761557176e-06, "loss": 0.5329, "step": 21825 }, { "epoch": 0.6372368689965257, "grad_norm": 0.7097005305230005, "learning_rate": 2.015409570154096e-06, "loss": 0.6009, "step": 21826 }, { "epoch": 0.637266065224373, "grad_norm": 0.704356349741566, "learning_rate": 2.015247364152474e-06, "loss": 0.5922, "step": 21827 }, { "epoch": 0.6372952614522204, "grad_norm": 0.8442208250201775, "learning_rate": 2.0150851581508516e-06, "loss": 0.6535, "step": 21828 }, { "epoch": 0.6373244576800677, "grad_norm": 0.7389656790633213, "learning_rate": 2.0149229521492296e-06, "loss": 0.6663, "step": 21829 }, { "epoch": 0.6373536539079151, "grad_norm": 0.7240460088310959, "learning_rate": 2.0147607461476076e-06, "loss": 0.6621, "step": 21830 }, { "epoch": 0.6373828501357625, "grad_norm": 0.7141560746074241, "learning_rate": 2.0145985401459857e-06, "loss": 0.5979, "step": 21831 }, { "epoch": 0.6374120463636098, "grad_norm": 0.6913717174938564, "learning_rate": 2.0144363341443637e-06, "loss": 0.5876, "step": 21832 }, { "epoch": 0.6374412425914572, "grad_norm": 0.7636915090935849, "learning_rate": 2.0142741281427412e-06, "loss": 0.604, "step": 21833 }, { "epoch": 0.6374704388193045, "grad_norm": 0.6875315089321961, "learning_rate": 2.0141119221411192e-06, "loss": 0.5901, "step": 21834 }, { "epoch": 0.6374996350471519, "grad_norm": 0.6919189966328532, "learning_rate": 2.0139497161394973e-06, "loss": 0.5687, "step": 21835 }, { "epoch": 0.6375288312749993, "grad_norm": 0.798139004507433, "learning_rate": 2.0137875101378753e-06, "loss": 0.6897, "step": 21836 }, { "epoch": 0.6375580275028466, "grad_norm": 0.7861066685974729, "learning_rate": 2.0136253041362533e-06, "loss": 0.688, "step": 21837 }, { "epoch": 0.637587223730694, "grad_norm": 0.7473520828972448, "learning_rate": 2.0134630981346313e-06, "loss": 0.6838, "step": 21838 }, { "epoch": 0.6376164199585413, "grad_norm": 0.7838500086816277, "learning_rate": 2.0133008921330093e-06, "loss": 0.6985, "step": 21839 }, { "epoch": 0.6376456161863887, "grad_norm": 0.7231728741097533, "learning_rate": 2.013138686131387e-06, "loss": 0.6334, "step": 21840 }, { "epoch": 0.6376748124142361, "grad_norm": 0.7182854001587322, "learning_rate": 2.012976480129765e-06, "loss": 0.6464, "step": 21841 }, { "epoch": 0.6377040086420834, "grad_norm": 0.6991691064874509, "learning_rate": 2.012814274128143e-06, "loss": 0.6158, "step": 21842 }, { "epoch": 0.6377332048699308, "grad_norm": 0.7223958355348031, "learning_rate": 2.012652068126521e-06, "loss": 0.6872, "step": 21843 }, { "epoch": 0.6377624010977782, "grad_norm": 0.7007989798332313, "learning_rate": 2.0124898621248985e-06, "loss": 0.5842, "step": 21844 }, { "epoch": 0.6377915973256255, "grad_norm": 0.7347289655656724, "learning_rate": 2.012327656123277e-06, "loss": 0.5972, "step": 21845 }, { "epoch": 0.6378207935534729, "grad_norm": 0.7238347265697795, "learning_rate": 2.012165450121655e-06, "loss": 0.6693, "step": 21846 }, { "epoch": 0.6378499897813202, "grad_norm": 0.684650134936236, "learning_rate": 2.0120032441200325e-06, "loss": 0.5694, "step": 21847 }, { "epoch": 0.6378791860091676, "grad_norm": 0.7566171036418384, "learning_rate": 2.0118410381184105e-06, "loss": 0.6693, "step": 21848 }, { "epoch": 0.637908382237015, "grad_norm": 0.7227956018693297, "learning_rate": 2.0116788321167885e-06, "loss": 0.6992, "step": 21849 }, { "epoch": 0.6379375784648623, "grad_norm": 0.7606251136218583, "learning_rate": 2.0115166261151665e-06, "loss": 0.6224, "step": 21850 }, { "epoch": 0.6379667746927097, "grad_norm": 0.7473048645652088, "learning_rate": 2.0113544201135445e-06, "loss": 0.6824, "step": 21851 }, { "epoch": 0.637995970920557, "grad_norm": 0.6825121862188835, "learning_rate": 2.011192214111922e-06, "loss": 0.5665, "step": 21852 }, { "epoch": 0.6380251671484044, "grad_norm": 0.7166816232557471, "learning_rate": 2.0110300081103e-06, "loss": 0.6105, "step": 21853 }, { "epoch": 0.6380543633762518, "grad_norm": 0.658192853422388, "learning_rate": 2.010867802108678e-06, "loss": 0.5436, "step": 21854 }, { "epoch": 0.6380835596040991, "grad_norm": 0.699172084128456, "learning_rate": 2.010705596107056e-06, "loss": 0.5947, "step": 21855 }, { "epoch": 0.6381127558319465, "grad_norm": 0.7615750719278251, "learning_rate": 2.010543390105434e-06, "loss": 0.6602, "step": 21856 }, { "epoch": 0.6381419520597938, "grad_norm": 0.6825734909545982, "learning_rate": 2.010381184103812e-06, "loss": 0.599, "step": 21857 }, { "epoch": 0.6381711482876412, "grad_norm": 0.7716006497591716, "learning_rate": 2.01021897810219e-06, "loss": 0.6606, "step": 21858 }, { "epoch": 0.6382003445154886, "grad_norm": 0.7055178699283396, "learning_rate": 2.0100567721005677e-06, "loss": 0.6314, "step": 21859 }, { "epoch": 0.6382295407433359, "grad_norm": 0.7004386564253344, "learning_rate": 2.0098945660989457e-06, "loss": 0.5907, "step": 21860 }, { "epoch": 0.6382587369711833, "grad_norm": 0.7295852663842608, "learning_rate": 2.0097323600973237e-06, "loss": 0.6255, "step": 21861 }, { "epoch": 0.6382879331990307, "grad_norm": 0.8294451412833533, "learning_rate": 2.0095701540957017e-06, "loss": 0.7157, "step": 21862 }, { "epoch": 0.638317129426878, "grad_norm": 0.6877445151633239, "learning_rate": 2.0094079480940793e-06, "loss": 0.592, "step": 21863 }, { "epoch": 0.6383463256547254, "grad_norm": 0.7145843726045309, "learning_rate": 2.0092457420924578e-06, "loss": 0.6193, "step": 21864 }, { "epoch": 0.6383755218825727, "grad_norm": 0.7206600909708406, "learning_rate": 2.0090835360908358e-06, "loss": 0.6337, "step": 21865 }, { "epoch": 0.6384047181104201, "grad_norm": 0.6731648575866559, "learning_rate": 2.0089213300892133e-06, "loss": 0.57, "step": 21866 }, { "epoch": 0.6384339143382675, "grad_norm": 0.7372564789345316, "learning_rate": 2.0087591240875914e-06, "loss": 0.6534, "step": 21867 }, { "epoch": 0.6384631105661148, "grad_norm": 0.7591848163497767, "learning_rate": 2.0085969180859694e-06, "loss": 0.6647, "step": 21868 }, { "epoch": 0.6384923067939622, "grad_norm": 0.7384416377021646, "learning_rate": 2.0084347120843474e-06, "loss": 0.6679, "step": 21869 }, { "epoch": 0.6385215030218095, "grad_norm": 0.7398344531905103, "learning_rate": 2.0082725060827254e-06, "loss": 0.6315, "step": 21870 }, { "epoch": 0.6385506992496569, "grad_norm": 0.7532108982525514, "learning_rate": 2.008110300081103e-06, "loss": 0.6193, "step": 21871 }, { "epoch": 0.6385798954775043, "grad_norm": 0.6927597631428093, "learning_rate": 2.007948094079481e-06, "loss": 0.5859, "step": 21872 }, { "epoch": 0.6386090917053516, "grad_norm": 0.6973102670851812, "learning_rate": 2.007785888077859e-06, "loss": 0.5964, "step": 21873 }, { "epoch": 0.638638287933199, "grad_norm": 0.738364599844044, "learning_rate": 2.007623682076237e-06, "loss": 0.6667, "step": 21874 }, { "epoch": 0.6386674841610464, "grad_norm": 0.7249537551702588, "learning_rate": 2.007461476074615e-06, "loss": 0.6379, "step": 21875 }, { "epoch": 0.6386966803888937, "grad_norm": 0.7683979366550988, "learning_rate": 2.007299270072993e-06, "loss": 0.7281, "step": 21876 }, { "epoch": 0.6387258766167411, "grad_norm": 0.7831660949299553, "learning_rate": 2.007137064071371e-06, "loss": 0.7066, "step": 21877 }, { "epoch": 0.6387550728445884, "grad_norm": 0.7660234932525872, "learning_rate": 2.0069748580697486e-06, "loss": 0.7409, "step": 21878 }, { "epoch": 0.6387842690724358, "grad_norm": 0.6996917023159941, "learning_rate": 2.0068126520681266e-06, "loss": 0.5897, "step": 21879 }, { "epoch": 0.6388134653002832, "grad_norm": 0.7317270778525568, "learning_rate": 2.0066504460665046e-06, "loss": 0.6404, "step": 21880 }, { "epoch": 0.6388426615281305, "grad_norm": 0.7782184212424984, "learning_rate": 2.0064882400648826e-06, "loss": 0.7529, "step": 21881 }, { "epoch": 0.6388718577559779, "grad_norm": 0.6959791624837356, "learning_rate": 2.00632603406326e-06, "loss": 0.5359, "step": 21882 }, { "epoch": 0.6389010539838252, "grad_norm": 0.7248045170652385, "learning_rate": 2.0061638280616386e-06, "loss": 0.6313, "step": 21883 }, { "epoch": 0.6389302502116726, "grad_norm": 0.8471222589412837, "learning_rate": 2.0060016220600166e-06, "loss": 0.7562, "step": 21884 }, { "epoch": 0.63895944643952, "grad_norm": 0.7294063492986698, "learning_rate": 2.005839416058394e-06, "loss": 0.6469, "step": 21885 }, { "epoch": 0.6389886426673673, "grad_norm": 0.7583283042567199, "learning_rate": 2.0056772100567722e-06, "loss": 0.6535, "step": 21886 }, { "epoch": 0.6390178388952147, "grad_norm": 0.7179589023541079, "learning_rate": 2.0055150040551502e-06, "loss": 0.6211, "step": 21887 }, { "epoch": 0.639047035123062, "grad_norm": 0.7389762014000625, "learning_rate": 2.0053527980535282e-06, "loss": 0.6874, "step": 21888 }, { "epoch": 0.6390762313509094, "grad_norm": 0.7611738596371411, "learning_rate": 2.0051905920519062e-06, "loss": 0.7039, "step": 21889 }, { "epoch": 0.6391054275787569, "grad_norm": 0.6664786851751454, "learning_rate": 2.005028386050284e-06, "loss": 0.5548, "step": 21890 }, { "epoch": 0.6391346238066042, "grad_norm": 0.7394755874462112, "learning_rate": 2.004866180048662e-06, "loss": 0.6183, "step": 21891 }, { "epoch": 0.6391638200344516, "grad_norm": 0.7377840145479988, "learning_rate": 2.00470397404704e-06, "loss": 0.6868, "step": 21892 }, { "epoch": 0.639193016262299, "grad_norm": 0.7279225699022756, "learning_rate": 2.004541768045418e-06, "loss": 0.6363, "step": 21893 }, { "epoch": 0.6392222124901463, "grad_norm": 0.6995784072542782, "learning_rate": 2.004379562043796e-06, "loss": 0.6041, "step": 21894 }, { "epoch": 0.6392514087179937, "grad_norm": 0.699006185522255, "learning_rate": 2.004217356042174e-06, "loss": 0.5794, "step": 21895 }, { "epoch": 0.639280604945841, "grad_norm": 0.6889634283587883, "learning_rate": 2.004055150040552e-06, "loss": 0.5781, "step": 21896 }, { "epoch": 0.6393098011736884, "grad_norm": 0.7018417033374534, "learning_rate": 2.0038929440389294e-06, "loss": 0.5891, "step": 21897 }, { "epoch": 0.6393389974015358, "grad_norm": 0.6500971245338886, "learning_rate": 2.0037307380373074e-06, "loss": 0.5369, "step": 21898 }, { "epoch": 0.6393681936293831, "grad_norm": 0.725394157290495, "learning_rate": 2.0035685320356855e-06, "loss": 0.6099, "step": 21899 }, { "epoch": 0.6393973898572305, "grad_norm": 0.6664861873234744, "learning_rate": 2.0034063260340635e-06, "loss": 0.5564, "step": 21900 }, { "epoch": 0.6394265860850779, "grad_norm": 0.7455689261550824, "learning_rate": 2.003244120032441e-06, "loss": 0.6024, "step": 21901 }, { "epoch": 0.6394557823129252, "grad_norm": 0.7220987553275631, "learning_rate": 2.0030819140308195e-06, "loss": 0.6765, "step": 21902 }, { "epoch": 0.6394849785407726, "grad_norm": 0.7594372279685603, "learning_rate": 2.0029197080291975e-06, "loss": 0.6039, "step": 21903 }, { "epoch": 0.6395141747686199, "grad_norm": 0.7641492998247547, "learning_rate": 2.002757502027575e-06, "loss": 0.7637, "step": 21904 }, { "epoch": 0.6395433709964673, "grad_norm": 0.6857160749283241, "learning_rate": 2.002595296025953e-06, "loss": 0.5367, "step": 21905 }, { "epoch": 0.6395725672243147, "grad_norm": 0.6861962147043008, "learning_rate": 2.002433090024331e-06, "loss": 0.6269, "step": 21906 }, { "epoch": 0.639601763452162, "grad_norm": 0.837435235100678, "learning_rate": 2.002270884022709e-06, "loss": 0.7107, "step": 21907 }, { "epoch": 0.6396309596800094, "grad_norm": 0.7453619628563901, "learning_rate": 2.002108678021087e-06, "loss": 0.6696, "step": 21908 }, { "epoch": 0.6396601559078567, "grad_norm": 0.7375313561008366, "learning_rate": 2.0019464720194647e-06, "loss": 0.6271, "step": 21909 }, { "epoch": 0.6396893521357041, "grad_norm": 0.7874551082010574, "learning_rate": 2.0017842660178427e-06, "loss": 0.6968, "step": 21910 }, { "epoch": 0.6397185483635515, "grad_norm": 0.7704438196661378, "learning_rate": 2.0016220600162207e-06, "loss": 0.638, "step": 21911 }, { "epoch": 0.6397477445913988, "grad_norm": 0.6765806986321913, "learning_rate": 2.0014598540145987e-06, "loss": 0.558, "step": 21912 }, { "epoch": 0.6397769408192462, "grad_norm": 0.67389018532556, "learning_rate": 2.0012976480129767e-06, "loss": 0.619, "step": 21913 }, { "epoch": 0.6398061370470935, "grad_norm": 0.7699559123209478, "learning_rate": 2.0011354420113547e-06, "loss": 0.6162, "step": 21914 }, { "epoch": 0.6398353332749409, "grad_norm": 0.7823158083094464, "learning_rate": 2.0009732360097327e-06, "loss": 0.71, "step": 21915 }, { "epoch": 0.6398645295027883, "grad_norm": 0.6841697974179842, "learning_rate": 2.0008110300081103e-06, "loss": 0.5975, "step": 21916 }, { "epoch": 0.6398937257306356, "grad_norm": 1.2494418927181556, "learning_rate": 2.0006488240064883e-06, "loss": 0.7131, "step": 21917 }, { "epoch": 0.639922921958483, "grad_norm": 0.7590611216068168, "learning_rate": 2.0004866180048663e-06, "loss": 0.7348, "step": 21918 }, { "epoch": 0.6399521181863304, "grad_norm": 0.7329812295662176, "learning_rate": 2.0003244120032443e-06, "loss": 0.6287, "step": 21919 }, { "epoch": 0.6399813144141777, "grad_norm": 0.6916701420931423, "learning_rate": 2.000162206001622e-06, "loss": 0.6237, "step": 21920 }, { "epoch": 0.6400105106420251, "grad_norm": 0.7176966425480977, "learning_rate": 2.0000000000000003e-06, "loss": 0.5599, "step": 21921 }, { "epoch": 0.6400397068698724, "grad_norm": 0.7185799829104882, "learning_rate": 1.9998377939983783e-06, "loss": 0.6222, "step": 21922 }, { "epoch": 0.6400689030977198, "grad_norm": 0.7468275474256649, "learning_rate": 1.999675587996756e-06, "loss": 0.6484, "step": 21923 }, { "epoch": 0.6400980993255672, "grad_norm": 0.6681557474972686, "learning_rate": 1.999513381995134e-06, "loss": 0.5367, "step": 21924 }, { "epoch": 0.6401272955534145, "grad_norm": 0.7759557089860477, "learning_rate": 1.999351175993512e-06, "loss": 0.6953, "step": 21925 }, { "epoch": 0.6401564917812619, "grad_norm": 0.750405979231614, "learning_rate": 1.99918896999189e-06, "loss": 0.6975, "step": 21926 }, { "epoch": 0.6401856880091092, "grad_norm": 0.7762431556687088, "learning_rate": 1.9990267639902675e-06, "loss": 0.7113, "step": 21927 }, { "epoch": 0.6402148842369566, "grad_norm": 0.7607895424883386, "learning_rate": 1.9988645579886455e-06, "loss": 0.7317, "step": 21928 }, { "epoch": 0.640244080464804, "grad_norm": 0.7922419733209675, "learning_rate": 1.9987023519870235e-06, "loss": 0.641, "step": 21929 }, { "epoch": 0.6402732766926513, "grad_norm": 0.747946950921523, "learning_rate": 1.9985401459854015e-06, "loss": 0.6689, "step": 21930 }, { "epoch": 0.6403024729204987, "grad_norm": 0.6711125718070898, "learning_rate": 1.9983779399837796e-06, "loss": 0.5874, "step": 21931 }, { "epoch": 0.640331669148346, "grad_norm": 0.7509929680192737, "learning_rate": 1.9982157339821576e-06, "loss": 0.6788, "step": 21932 }, { "epoch": 0.6403608653761934, "grad_norm": 0.6807802536942095, "learning_rate": 1.9980535279805356e-06, "loss": 0.5581, "step": 21933 }, { "epoch": 0.6403900616040408, "grad_norm": 0.7784169513477041, "learning_rate": 1.9978913219789136e-06, "loss": 0.7015, "step": 21934 }, { "epoch": 0.6404192578318881, "grad_norm": 0.6701538834869452, "learning_rate": 1.997729115977291e-06, "loss": 0.6135, "step": 21935 }, { "epoch": 0.6404484540597355, "grad_norm": 0.7106231254356898, "learning_rate": 1.997566909975669e-06, "loss": 0.6334, "step": 21936 }, { "epoch": 0.6404776502875829, "grad_norm": 0.7879927815392865, "learning_rate": 1.997404703974047e-06, "loss": 0.7582, "step": 21937 }, { "epoch": 0.6405068465154302, "grad_norm": 0.7220210321132348, "learning_rate": 1.997242497972425e-06, "loss": 0.6068, "step": 21938 }, { "epoch": 0.6405360427432776, "grad_norm": 0.7570932525143939, "learning_rate": 1.997080291970803e-06, "loss": 0.6642, "step": 21939 }, { "epoch": 0.6405652389711249, "grad_norm": 0.7117246684671807, "learning_rate": 1.996918085969181e-06, "loss": 0.6421, "step": 21940 }, { "epoch": 0.6405944351989723, "grad_norm": 0.7834531186893534, "learning_rate": 1.996755879967559e-06, "loss": 0.7596, "step": 21941 }, { "epoch": 0.6406236314268197, "grad_norm": 0.7457949483663364, "learning_rate": 1.9965936739659368e-06, "loss": 0.6918, "step": 21942 }, { "epoch": 0.640652827654667, "grad_norm": 0.6826945614983091, "learning_rate": 1.996431467964315e-06, "loss": 0.5711, "step": 21943 }, { "epoch": 0.6406820238825144, "grad_norm": 0.7225122678360142, "learning_rate": 1.996269261962693e-06, "loss": 0.6092, "step": 21944 }, { "epoch": 0.6407112201103617, "grad_norm": 0.721717980860611, "learning_rate": 1.996107055961071e-06, "loss": 0.5938, "step": 21945 }, { "epoch": 0.6407404163382091, "grad_norm": 0.6862054561685041, "learning_rate": 1.9959448499594484e-06, "loss": 0.6045, "step": 21946 }, { "epoch": 0.6407696125660565, "grad_norm": 0.7133471766506456, "learning_rate": 1.9957826439578264e-06, "loss": 0.6676, "step": 21947 }, { "epoch": 0.6407988087939038, "grad_norm": 0.6972998411301331, "learning_rate": 1.9956204379562044e-06, "loss": 0.5894, "step": 21948 }, { "epoch": 0.6408280050217512, "grad_norm": 0.7641913610084219, "learning_rate": 1.9954582319545824e-06, "loss": 0.6403, "step": 21949 }, { "epoch": 0.6408572012495986, "grad_norm": 0.772941878601836, "learning_rate": 1.9952960259529604e-06, "loss": 0.6579, "step": 21950 }, { "epoch": 0.6408863974774459, "grad_norm": 0.8497434130477739, "learning_rate": 1.9951338199513384e-06, "loss": 0.679, "step": 21951 }, { "epoch": 0.6409155937052933, "grad_norm": 0.6757051759088049, "learning_rate": 1.9949716139497164e-06, "loss": 0.5876, "step": 21952 }, { "epoch": 0.6409447899331406, "grad_norm": 0.7327816239568543, "learning_rate": 1.9948094079480944e-06, "loss": 0.6843, "step": 21953 }, { "epoch": 0.640973986160988, "grad_norm": 0.7396417358123694, "learning_rate": 1.994647201946472e-06, "loss": 0.6895, "step": 21954 }, { "epoch": 0.6410031823888354, "grad_norm": 0.7303967650689519, "learning_rate": 1.99448499594485e-06, "loss": 0.6646, "step": 21955 }, { "epoch": 0.6410323786166827, "grad_norm": 0.8058231711705334, "learning_rate": 1.994322789943228e-06, "loss": 0.7655, "step": 21956 }, { "epoch": 0.6410615748445301, "grad_norm": 0.7667934229625781, "learning_rate": 1.994160583941606e-06, "loss": 0.7183, "step": 21957 }, { "epoch": 0.6410907710723774, "grad_norm": 0.7743437162426169, "learning_rate": 1.993998377939984e-06, "loss": 0.6943, "step": 21958 }, { "epoch": 0.6411199673002248, "grad_norm": 0.693760055460307, "learning_rate": 1.993836171938362e-06, "loss": 0.5598, "step": 21959 }, { "epoch": 0.6411491635280722, "grad_norm": 0.7128652599720118, "learning_rate": 1.99367396593674e-06, "loss": 0.674, "step": 21960 }, { "epoch": 0.6411783597559195, "grad_norm": 0.6581475914125773, "learning_rate": 1.9935117599351176e-06, "loss": 0.5461, "step": 21961 }, { "epoch": 0.6412075559837669, "grad_norm": 0.7374713009088916, "learning_rate": 1.9933495539334956e-06, "loss": 0.6981, "step": 21962 }, { "epoch": 0.6412367522116142, "grad_norm": 0.6971298128572523, "learning_rate": 1.9931873479318737e-06, "loss": 0.6122, "step": 21963 }, { "epoch": 0.6412659484394616, "grad_norm": 0.7120481922658642, "learning_rate": 1.9930251419302517e-06, "loss": 0.6253, "step": 21964 }, { "epoch": 0.641295144667309, "grad_norm": 0.7266131910069739, "learning_rate": 1.9928629359286292e-06, "loss": 0.6428, "step": 21965 }, { "epoch": 0.6413243408951563, "grad_norm": 0.7140939644391721, "learning_rate": 1.9927007299270073e-06, "loss": 0.7162, "step": 21966 }, { "epoch": 0.6413535371230037, "grad_norm": 0.7454622351278206, "learning_rate": 1.9925385239253853e-06, "loss": 0.6498, "step": 21967 }, { "epoch": 0.641382733350851, "grad_norm": 0.8033629143525023, "learning_rate": 1.9923763179237633e-06, "loss": 0.6595, "step": 21968 }, { "epoch": 0.6414119295786984, "grad_norm": 0.7092381458504976, "learning_rate": 1.9922141119221413e-06, "loss": 0.6181, "step": 21969 }, { "epoch": 0.6414411258065458, "grad_norm": 0.7000062469863009, "learning_rate": 1.9920519059205193e-06, "loss": 0.5969, "step": 21970 }, { "epoch": 0.6414703220343931, "grad_norm": 0.7952836179954619, "learning_rate": 1.9918896999188973e-06, "loss": 0.7296, "step": 21971 }, { "epoch": 0.6414995182622405, "grad_norm": 0.734040990718823, "learning_rate": 1.9917274939172753e-06, "loss": 0.6477, "step": 21972 }, { "epoch": 0.6415287144900879, "grad_norm": 0.7119316836974434, "learning_rate": 1.991565287915653e-06, "loss": 0.5886, "step": 21973 }, { "epoch": 0.6415579107179352, "grad_norm": 0.678443617477866, "learning_rate": 1.991403081914031e-06, "loss": 0.608, "step": 21974 }, { "epoch": 0.6415871069457826, "grad_norm": 0.6614288689327836, "learning_rate": 1.991240875912409e-06, "loss": 0.552, "step": 21975 }, { "epoch": 0.6416163031736299, "grad_norm": 0.6952435498148736, "learning_rate": 1.991078669910787e-06, "loss": 0.6115, "step": 21976 }, { "epoch": 0.6416454994014773, "grad_norm": 0.7452646156380824, "learning_rate": 1.990916463909165e-06, "loss": 0.6738, "step": 21977 }, { "epoch": 0.6416746956293247, "grad_norm": 0.6577101812593753, "learning_rate": 1.990754257907543e-06, "loss": 0.5441, "step": 21978 }, { "epoch": 0.641703891857172, "grad_norm": 0.726306654663305, "learning_rate": 1.990592051905921e-06, "loss": 0.6755, "step": 21979 }, { "epoch": 0.6417330880850194, "grad_norm": 0.7967401302235024, "learning_rate": 1.9904298459042985e-06, "loss": 0.6627, "step": 21980 }, { "epoch": 0.6417622843128667, "grad_norm": 0.7328729070818673, "learning_rate": 1.9902676399026765e-06, "loss": 0.6268, "step": 21981 }, { "epoch": 0.6417914805407141, "grad_norm": 0.7285086664049704, "learning_rate": 1.9901054339010545e-06, "loss": 0.6711, "step": 21982 }, { "epoch": 0.6418206767685615, "grad_norm": 0.7247097033931684, "learning_rate": 1.9899432278994325e-06, "loss": 0.6256, "step": 21983 }, { "epoch": 0.6418498729964088, "grad_norm": 0.761009937198482, "learning_rate": 1.98978102189781e-06, "loss": 0.6723, "step": 21984 }, { "epoch": 0.6418790692242562, "grad_norm": 0.7275749984899619, "learning_rate": 1.989618815896188e-06, "loss": 0.6634, "step": 21985 }, { "epoch": 0.6419082654521036, "grad_norm": 0.7339415477092909, "learning_rate": 1.989456609894566e-06, "loss": 0.6818, "step": 21986 }, { "epoch": 0.6419374616799509, "grad_norm": 0.7322598582042038, "learning_rate": 1.989294403892944e-06, "loss": 0.6508, "step": 21987 }, { "epoch": 0.6419666579077983, "grad_norm": 0.8259163844683767, "learning_rate": 1.989132197891322e-06, "loss": 0.6342, "step": 21988 }, { "epoch": 0.6419958541356456, "grad_norm": 0.693330243871586, "learning_rate": 1.9889699918897e-06, "loss": 0.6542, "step": 21989 }, { "epoch": 0.642025050363493, "grad_norm": 0.7090686771162314, "learning_rate": 1.988807785888078e-06, "loss": 0.6078, "step": 21990 }, { "epoch": 0.6420542465913404, "grad_norm": 0.7564029969132379, "learning_rate": 1.988645579886456e-06, "loss": 0.623, "step": 21991 }, { "epoch": 0.6420834428191877, "grad_norm": 0.7616371229227524, "learning_rate": 1.9884833738848337e-06, "loss": 0.7224, "step": 21992 }, { "epoch": 0.6421126390470351, "grad_norm": 0.6737686144847896, "learning_rate": 1.9883211678832117e-06, "loss": 0.5598, "step": 21993 }, { "epoch": 0.6421418352748824, "grad_norm": 0.7221288865349499, "learning_rate": 1.9881589618815897e-06, "loss": 0.6748, "step": 21994 }, { "epoch": 0.6421710315027298, "grad_norm": 0.7513652620790108, "learning_rate": 1.9879967558799678e-06, "loss": 0.6349, "step": 21995 }, { "epoch": 0.6422002277305772, "grad_norm": 0.6984481341896974, "learning_rate": 1.9878345498783458e-06, "loss": 0.62, "step": 21996 }, { "epoch": 0.6422294239584245, "grad_norm": 0.6903068463976547, "learning_rate": 1.9876723438767238e-06, "loss": 0.5982, "step": 21997 }, { "epoch": 0.6422586201862719, "grad_norm": 0.6686225875756369, "learning_rate": 1.9875101378751018e-06, "loss": 0.599, "step": 21998 }, { "epoch": 0.6422878164141193, "grad_norm": 0.7406720934205077, "learning_rate": 1.9873479318734794e-06, "loss": 0.6766, "step": 21999 }, { "epoch": 0.6423170126419666, "grad_norm": 0.6906628680594623, "learning_rate": 1.9871857258718574e-06, "loss": 0.5482, "step": 22000 }, { "epoch": 0.642346208869814, "grad_norm": 0.8418127604622814, "learning_rate": 1.9870235198702354e-06, "loss": 0.6268, "step": 22001 }, { "epoch": 0.6423754050976613, "grad_norm": 0.6988663256327002, "learning_rate": 1.9868613138686134e-06, "loss": 0.6317, "step": 22002 }, { "epoch": 0.6424046013255087, "grad_norm": 0.703786496864313, "learning_rate": 1.986699107866991e-06, "loss": 0.6299, "step": 22003 }, { "epoch": 0.6424337975533561, "grad_norm": 0.7180701865386583, "learning_rate": 1.986536901865369e-06, "loss": 0.614, "step": 22004 }, { "epoch": 0.6424629937812034, "grad_norm": 0.730471230813766, "learning_rate": 1.9863746958637474e-06, "loss": 0.6586, "step": 22005 }, { "epoch": 0.6424921900090508, "grad_norm": 0.7441350696119219, "learning_rate": 1.986212489862125e-06, "loss": 0.6725, "step": 22006 }, { "epoch": 0.6425213862368981, "grad_norm": 0.7156091566085117, "learning_rate": 1.986050283860503e-06, "loss": 0.6311, "step": 22007 }, { "epoch": 0.6425505824647455, "grad_norm": 0.6745910293779129, "learning_rate": 1.985888077858881e-06, "loss": 0.5441, "step": 22008 }, { "epoch": 0.6425797786925929, "grad_norm": 0.7317651132121167, "learning_rate": 1.985725871857259e-06, "loss": 0.6887, "step": 22009 }, { "epoch": 0.6426089749204403, "grad_norm": 0.7588999102168578, "learning_rate": 1.985563665855637e-06, "loss": 0.6668, "step": 22010 }, { "epoch": 0.6426381711482877, "grad_norm": 0.7181131695059206, "learning_rate": 1.9854014598540146e-06, "loss": 0.6533, "step": 22011 }, { "epoch": 0.6426673673761351, "grad_norm": 0.71178905187612, "learning_rate": 1.9852392538523926e-06, "loss": 0.6227, "step": 22012 }, { "epoch": 0.6426965636039824, "grad_norm": 0.7462905513450239, "learning_rate": 1.9850770478507706e-06, "loss": 0.681, "step": 22013 }, { "epoch": 0.6427257598318298, "grad_norm": 0.7158890254104093, "learning_rate": 1.9849148418491486e-06, "loss": 0.5751, "step": 22014 }, { "epoch": 0.6427549560596771, "grad_norm": 0.6586954115935326, "learning_rate": 1.9847526358475266e-06, "loss": 0.5448, "step": 22015 }, { "epoch": 0.6427841522875245, "grad_norm": 0.715735980669224, "learning_rate": 1.9845904298459046e-06, "loss": 0.5877, "step": 22016 }, { "epoch": 0.6428133485153719, "grad_norm": 0.7290098825152678, "learning_rate": 1.9844282238442826e-06, "loss": 0.6091, "step": 22017 }, { "epoch": 0.6428425447432192, "grad_norm": 0.7328157728183157, "learning_rate": 1.9842660178426602e-06, "loss": 0.6303, "step": 22018 }, { "epoch": 0.6428717409710666, "grad_norm": 0.7870468607095202, "learning_rate": 1.9841038118410382e-06, "loss": 0.7618, "step": 22019 }, { "epoch": 0.642900937198914, "grad_norm": 0.7579129101416782, "learning_rate": 1.9839416058394162e-06, "loss": 0.638, "step": 22020 }, { "epoch": 0.6429301334267613, "grad_norm": 0.680001549922508, "learning_rate": 1.9837793998377942e-06, "loss": 0.5717, "step": 22021 }, { "epoch": 0.6429593296546087, "grad_norm": 0.7349888036276225, "learning_rate": 1.983617193836172e-06, "loss": 0.6551, "step": 22022 }, { "epoch": 0.642988525882456, "grad_norm": 0.7700024884048823, "learning_rate": 1.98345498783455e-06, "loss": 0.657, "step": 22023 }, { "epoch": 0.6430177221103034, "grad_norm": 0.7251651160412448, "learning_rate": 1.9832927818329283e-06, "loss": 0.6046, "step": 22024 }, { "epoch": 0.6430469183381508, "grad_norm": 0.7393427871128891, "learning_rate": 1.983130575831306e-06, "loss": 0.6227, "step": 22025 }, { "epoch": 0.6430761145659981, "grad_norm": 0.758392785495225, "learning_rate": 1.982968369829684e-06, "loss": 0.6877, "step": 22026 }, { "epoch": 0.6431053107938455, "grad_norm": 0.7279832719594947, "learning_rate": 1.982806163828062e-06, "loss": 0.6646, "step": 22027 }, { "epoch": 0.6431345070216928, "grad_norm": 0.7371635270871766, "learning_rate": 1.98264395782644e-06, "loss": 0.6578, "step": 22028 }, { "epoch": 0.6431637032495402, "grad_norm": 0.730786058701224, "learning_rate": 1.982481751824818e-06, "loss": 0.6336, "step": 22029 }, { "epoch": 0.6431928994773876, "grad_norm": 0.7407734381917085, "learning_rate": 1.9823195458231955e-06, "loss": 0.6811, "step": 22030 }, { "epoch": 0.6432220957052349, "grad_norm": 0.7155600171603915, "learning_rate": 1.9821573398215735e-06, "loss": 0.641, "step": 22031 }, { "epoch": 0.6432512919330823, "grad_norm": 0.7414408307874433, "learning_rate": 1.9819951338199515e-06, "loss": 0.6527, "step": 22032 }, { "epoch": 0.6432804881609296, "grad_norm": 0.7858362456680944, "learning_rate": 1.9818329278183295e-06, "loss": 0.763, "step": 22033 }, { "epoch": 0.643309684388777, "grad_norm": 0.6997784968741978, "learning_rate": 1.9816707218167075e-06, "loss": 0.6301, "step": 22034 }, { "epoch": 0.6433388806166244, "grad_norm": 0.7252522265027035, "learning_rate": 1.9815085158150855e-06, "loss": 0.6491, "step": 22035 }, { "epoch": 0.6433680768444717, "grad_norm": 0.7237652512624734, "learning_rate": 1.9813463098134635e-06, "loss": 0.6426, "step": 22036 }, { "epoch": 0.6433972730723191, "grad_norm": 0.7167704334458102, "learning_rate": 1.981184103811841e-06, "loss": 0.6705, "step": 22037 }, { "epoch": 0.6434264693001664, "grad_norm": 0.7088991114352188, "learning_rate": 1.981021897810219e-06, "loss": 0.6118, "step": 22038 }, { "epoch": 0.6434556655280138, "grad_norm": 0.6617904592508108, "learning_rate": 1.980859691808597e-06, "loss": 0.5818, "step": 22039 }, { "epoch": 0.6434848617558612, "grad_norm": 0.797331765917499, "learning_rate": 1.980697485806975e-06, "loss": 0.7408, "step": 22040 }, { "epoch": 0.6435140579837085, "grad_norm": 0.6703213485821953, "learning_rate": 1.9805352798053527e-06, "loss": 0.5746, "step": 22041 }, { "epoch": 0.6435432542115559, "grad_norm": 0.6755779964468186, "learning_rate": 1.9803730738037307e-06, "loss": 0.539, "step": 22042 }, { "epoch": 0.6435724504394033, "grad_norm": 0.7531402783734508, "learning_rate": 1.980210867802109e-06, "loss": 0.5498, "step": 22043 }, { "epoch": 0.6436016466672506, "grad_norm": 0.8085658865491131, "learning_rate": 1.9800486618004867e-06, "loss": 0.6912, "step": 22044 }, { "epoch": 0.643630842895098, "grad_norm": 0.692076291890275, "learning_rate": 1.9798864557988647e-06, "loss": 0.5788, "step": 22045 }, { "epoch": 0.6436600391229453, "grad_norm": 0.6709859183919165, "learning_rate": 1.9797242497972427e-06, "loss": 0.5993, "step": 22046 }, { "epoch": 0.6436892353507927, "grad_norm": 0.6909545404766224, "learning_rate": 1.9795620437956207e-06, "loss": 0.6142, "step": 22047 }, { "epoch": 0.6437184315786401, "grad_norm": 0.7723435446343744, "learning_rate": 1.9793998377939987e-06, "loss": 0.7694, "step": 22048 }, { "epoch": 0.6437476278064874, "grad_norm": 0.7356362395486765, "learning_rate": 1.9792376317923763e-06, "loss": 0.6315, "step": 22049 }, { "epoch": 0.6437768240343348, "grad_norm": 0.7972635567187879, "learning_rate": 1.9790754257907543e-06, "loss": 0.7318, "step": 22050 }, { "epoch": 0.6438060202621821, "grad_norm": 0.7512145920002972, "learning_rate": 1.9789132197891323e-06, "loss": 0.6177, "step": 22051 }, { "epoch": 0.6438352164900295, "grad_norm": 0.7058800892049131, "learning_rate": 1.9787510137875103e-06, "loss": 0.6155, "step": 22052 }, { "epoch": 0.6438644127178769, "grad_norm": 0.8557232999377171, "learning_rate": 1.9785888077858883e-06, "loss": 0.8075, "step": 22053 }, { "epoch": 0.6438936089457242, "grad_norm": 0.7508825798829273, "learning_rate": 1.9784266017842663e-06, "loss": 0.6878, "step": 22054 }, { "epoch": 0.6439228051735716, "grad_norm": 0.81500024936917, "learning_rate": 1.9782643957826444e-06, "loss": 0.7109, "step": 22055 }, { "epoch": 0.643952001401419, "grad_norm": 0.7134954693464515, "learning_rate": 1.978102189781022e-06, "loss": 0.6641, "step": 22056 }, { "epoch": 0.6439811976292663, "grad_norm": 0.7688824786797398, "learning_rate": 1.9779399837794e-06, "loss": 0.741, "step": 22057 }, { "epoch": 0.6440103938571137, "grad_norm": 0.6510631538071643, "learning_rate": 1.977777777777778e-06, "loss": 0.5482, "step": 22058 }, { "epoch": 0.644039590084961, "grad_norm": 0.7608082825313984, "learning_rate": 1.977615571776156e-06, "loss": 0.6264, "step": 22059 }, { "epoch": 0.6440687863128084, "grad_norm": 0.6773501813091024, "learning_rate": 1.9774533657745335e-06, "loss": 0.5772, "step": 22060 }, { "epoch": 0.6440979825406558, "grad_norm": 0.7034750608701474, "learning_rate": 1.9772911597729115e-06, "loss": 0.5813, "step": 22061 }, { "epoch": 0.6441271787685031, "grad_norm": 0.7379064976452645, "learning_rate": 1.97712895377129e-06, "loss": 0.6608, "step": 22062 }, { "epoch": 0.6441563749963505, "grad_norm": 0.6738045473140715, "learning_rate": 1.9769667477696676e-06, "loss": 0.5907, "step": 22063 }, { "epoch": 0.6441855712241978, "grad_norm": 0.6702292380663398, "learning_rate": 1.9768045417680456e-06, "loss": 0.5656, "step": 22064 }, { "epoch": 0.6442147674520452, "grad_norm": 0.7035680028085624, "learning_rate": 1.9766423357664236e-06, "loss": 0.6457, "step": 22065 }, { "epoch": 0.6442439636798926, "grad_norm": 0.7503078649684771, "learning_rate": 1.9764801297648016e-06, "loss": 0.7154, "step": 22066 }, { "epoch": 0.6442731599077399, "grad_norm": 0.7058071578249024, "learning_rate": 1.9763179237631796e-06, "loss": 0.5806, "step": 22067 }, { "epoch": 0.6443023561355873, "grad_norm": 0.7526434883647761, "learning_rate": 1.976155717761557e-06, "loss": 0.6187, "step": 22068 }, { "epoch": 0.6443315523634346, "grad_norm": 0.7342489146517446, "learning_rate": 1.975993511759935e-06, "loss": 0.6684, "step": 22069 }, { "epoch": 0.644360748591282, "grad_norm": 0.8057032772612402, "learning_rate": 1.975831305758313e-06, "loss": 0.6153, "step": 22070 }, { "epoch": 0.6443899448191294, "grad_norm": 0.7341836237978998, "learning_rate": 1.975669099756691e-06, "loss": 0.6288, "step": 22071 }, { "epoch": 0.6444191410469767, "grad_norm": 0.6855060096181399, "learning_rate": 1.975506893755069e-06, "loss": 0.5791, "step": 22072 }, { "epoch": 0.6444483372748241, "grad_norm": 0.6932382006687993, "learning_rate": 1.975344687753447e-06, "loss": 0.6201, "step": 22073 }, { "epoch": 0.6444775335026715, "grad_norm": 0.7977485364349733, "learning_rate": 1.975182481751825e-06, "loss": 0.6657, "step": 22074 }, { "epoch": 0.6445067297305188, "grad_norm": 0.7101255515250646, "learning_rate": 1.975020275750203e-06, "loss": 0.667, "step": 22075 }, { "epoch": 0.6445359259583662, "grad_norm": 0.6274714778741294, "learning_rate": 1.974858069748581e-06, "loss": 0.5077, "step": 22076 }, { "epoch": 0.6445651221862135, "grad_norm": 0.7309066064092092, "learning_rate": 1.974695863746959e-06, "loss": 0.6853, "step": 22077 }, { "epoch": 0.6445943184140609, "grad_norm": 0.7486416478496591, "learning_rate": 1.974533657745337e-06, "loss": 0.6883, "step": 22078 }, { "epoch": 0.6446235146419083, "grad_norm": 0.7076254389488743, "learning_rate": 1.9743714517437144e-06, "loss": 0.6058, "step": 22079 }, { "epoch": 0.6446527108697556, "grad_norm": 0.7430152483115359, "learning_rate": 1.9742092457420924e-06, "loss": 0.5932, "step": 22080 }, { "epoch": 0.644681907097603, "grad_norm": 0.7194044074817026, "learning_rate": 1.974047039740471e-06, "loss": 0.674, "step": 22081 }, { "epoch": 0.6447111033254503, "grad_norm": 0.7342010376704002, "learning_rate": 1.9738848337388484e-06, "loss": 0.6081, "step": 22082 }, { "epoch": 0.6447402995532977, "grad_norm": 0.7346656048386898, "learning_rate": 1.9737226277372264e-06, "loss": 0.5829, "step": 22083 }, { "epoch": 0.6447694957811451, "grad_norm": 0.695528285123028, "learning_rate": 1.9735604217356044e-06, "loss": 0.5785, "step": 22084 }, { "epoch": 0.6447986920089924, "grad_norm": 0.7423986041432623, "learning_rate": 1.9733982157339824e-06, "loss": 0.6837, "step": 22085 }, { "epoch": 0.6448278882368398, "grad_norm": 0.7093909508991472, "learning_rate": 1.9732360097323604e-06, "loss": 0.5877, "step": 22086 }, { "epoch": 0.6448570844646871, "grad_norm": 0.6841401861566642, "learning_rate": 1.973073803730738e-06, "loss": 0.563, "step": 22087 }, { "epoch": 0.6448862806925345, "grad_norm": 0.7280001777862365, "learning_rate": 1.972911597729116e-06, "loss": 0.6759, "step": 22088 }, { "epoch": 0.6449154769203819, "grad_norm": 0.6939509587840985, "learning_rate": 1.972749391727494e-06, "loss": 0.5469, "step": 22089 }, { "epoch": 0.6449446731482292, "grad_norm": 0.8099065359607704, "learning_rate": 1.972587185725872e-06, "loss": 0.7539, "step": 22090 }, { "epoch": 0.6449738693760766, "grad_norm": 0.6953821012066873, "learning_rate": 1.97242497972425e-06, "loss": 0.6061, "step": 22091 }, { "epoch": 0.645003065603924, "grad_norm": 0.6471088014498403, "learning_rate": 1.972262773722628e-06, "loss": 0.5031, "step": 22092 }, { "epoch": 0.6450322618317713, "grad_norm": 0.730947333705284, "learning_rate": 1.972100567721006e-06, "loss": 0.6097, "step": 22093 }, { "epoch": 0.6450614580596187, "grad_norm": 0.7495472806790525, "learning_rate": 1.9719383617193837e-06, "loss": 0.6946, "step": 22094 }, { "epoch": 0.645090654287466, "grad_norm": 0.7818852007088983, "learning_rate": 1.9717761557177617e-06, "loss": 0.7359, "step": 22095 }, { "epoch": 0.6451198505153134, "grad_norm": 0.726550243365647, "learning_rate": 1.9716139497161397e-06, "loss": 0.6085, "step": 22096 }, { "epoch": 0.6451490467431608, "grad_norm": 0.7012946857586982, "learning_rate": 1.9714517437145177e-06, "loss": 0.5992, "step": 22097 }, { "epoch": 0.6451782429710081, "grad_norm": 0.7083061369951743, "learning_rate": 1.9712895377128953e-06, "loss": 0.6609, "step": 22098 }, { "epoch": 0.6452074391988555, "grad_norm": 0.7148898955654226, "learning_rate": 1.9711273317112733e-06, "loss": 0.6314, "step": 22099 }, { "epoch": 0.6452366354267028, "grad_norm": 0.7425423071432166, "learning_rate": 1.9709651257096517e-06, "loss": 0.6389, "step": 22100 }, { "epoch": 0.6452658316545502, "grad_norm": 0.719578828013044, "learning_rate": 1.9708029197080293e-06, "loss": 0.6567, "step": 22101 }, { "epoch": 0.6452950278823976, "grad_norm": 0.7457407289413426, "learning_rate": 1.9706407137064073e-06, "loss": 0.6453, "step": 22102 }, { "epoch": 0.6453242241102449, "grad_norm": 0.6975878941288459, "learning_rate": 1.9704785077047853e-06, "loss": 0.5637, "step": 22103 }, { "epoch": 0.6453534203380923, "grad_norm": 0.7119470652386741, "learning_rate": 1.9703163017031633e-06, "loss": 0.658, "step": 22104 }, { "epoch": 0.6453826165659396, "grad_norm": 0.710189472340653, "learning_rate": 1.9701540957015413e-06, "loss": 0.6403, "step": 22105 }, { "epoch": 0.645411812793787, "grad_norm": 0.7317599418439495, "learning_rate": 1.969991889699919e-06, "loss": 0.6685, "step": 22106 }, { "epoch": 0.6454410090216344, "grad_norm": 0.7273502671265022, "learning_rate": 1.969829683698297e-06, "loss": 0.6614, "step": 22107 }, { "epoch": 0.6454702052494817, "grad_norm": 0.7152415339342462, "learning_rate": 1.969667477696675e-06, "loss": 0.6256, "step": 22108 }, { "epoch": 0.6454994014773291, "grad_norm": 0.7432594388126452, "learning_rate": 1.969505271695053e-06, "loss": 0.6816, "step": 22109 }, { "epoch": 0.6455285977051765, "grad_norm": 0.7082394571482661, "learning_rate": 1.969343065693431e-06, "loss": 0.5924, "step": 22110 }, { "epoch": 0.6455577939330238, "grad_norm": 0.7334310111948962, "learning_rate": 1.969180859691809e-06, "loss": 0.642, "step": 22111 }, { "epoch": 0.6455869901608712, "grad_norm": 0.7686309284473324, "learning_rate": 1.969018653690187e-06, "loss": 0.7196, "step": 22112 }, { "epoch": 0.6456161863887185, "grad_norm": 0.7545909405893663, "learning_rate": 1.9688564476885645e-06, "loss": 0.6688, "step": 22113 }, { "epoch": 0.6456453826165659, "grad_norm": 0.7508868327381234, "learning_rate": 1.9686942416869425e-06, "loss": 0.6779, "step": 22114 }, { "epoch": 0.6456745788444133, "grad_norm": 0.6945765903527217, "learning_rate": 1.9685320356853205e-06, "loss": 0.6293, "step": 22115 }, { "epoch": 0.6457037750722606, "grad_norm": 0.7513197964296815, "learning_rate": 1.9683698296836985e-06, "loss": 0.6562, "step": 22116 }, { "epoch": 0.645732971300108, "grad_norm": 0.6998719944774898, "learning_rate": 1.968207623682076e-06, "loss": 0.5982, "step": 22117 }, { "epoch": 0.6457621675279553, "grad_norm": 0.7083343596913722, "learning_rate": 1.968045417680454e-06, "loss": 0.6137, "step": 22118 }, { "epoch": 0.6457913637558027, "grad_norm": 0.7344401037813324, "learning_rate": 1.9678832116788326e-06, "loss": 0.6417, "step": 22119 }, { "epoch": 0.6458205599836501, "grad_norm": 0.738991504976005, "learning_rate": 1.96772100567721e-06, "loss": 0.6121, "step": 22120 }, { "epoch": 0.6458497562114974, "grad_norm": 0.7100492204353132, "learning_rate": 1.967558799675588e-06, "loss": 0.6158, "step": 22121 }, { "epoch": 0.6458789524393448, "grad_norm": 0.690343526962434, "learning_rate": 1.967396593673966e-06, "loss": 0.6153, "step": 22122 }, { "epoch": 0.6459081486671922, "grad_norm": 0.7282665116574065, "learning_rate": 1.967234387672344e-06, "loss": 0.626, "step": 22123 }, { "epoch": 0.6459373448950395, "grad_norm": 0.6921112046841222, "learning_rate": 1.9670721816707217e-06, "loss": 0.5803, "step": 22124 }, { "epoch": 0.6459665411228869, "grad_norm": 0.7168452768327533, "learning_rate": 1.9669099756690997e-06, "loss": 0.652, "step": 22125 }, { "epoch": 0.6459957373507342, "grad_norm": 0.7451517526857826, "learning_rate": 1.9667477696674778e-06, "loss": 0.6474, "step": 22126 }, { "epoch": 0.6460249335785816, "grad_norm": 0.6991450857866813, "learning_rate": 1.9665855636658558e-06, "loss": 0.6534, "step": 22127 }, { "epoch": 0.646054129806429, "grad_norm": 0.7343907212446011, "learning_rate": 1.9664233576642338e-06, "loss": 0.5942, "step": 22128 }, { "epoch": 0.6460833260342763, "grad_norm": 0.759850887619574, "learning_rate": 1.9662611516626118e-06, "loss": 0.7274, "step": 22129 }, { "epoch": 0.6461125222621237, "grad_norm": 0.7109395037768723, "learning_rate": 1.9660989456609898e-06, "loss": 0.689, "step": 22130 }, { "epoch": 0.6461417184899712, "grad_norm": 0.7114133482009454, "learning_rate": 1.9659367396593678e-06, "loss": 0.5977, "step": 22131 }, { "epoch": 0.6461709147178185, "grad_norm": 0.7383225436803652, "learning_rate": 1.9657745336577454e-06, "loss": 0.6925, "step": 22132 }, { "epoch": 0.6462001109456659, "grad_norm": 0.7269607570888446, "learning_rate": 1.9656123276561234e-06, "loss": 0.6126, "step": 22133 }, { "epoch": 0.6462293071735132, "grad_norm": 0.7141318654471938, "learning_rate": 1.9654501216545014e-06, "loss": 0.6233, "step": 22134 }, { "epoch": 0.6462585034013606, "grad_norm": 0.7037403894801706, "learning_rate": 1.9652879156528794e-06, "loss": 0.6464, "step": 22135 }, { "epoch": 0.646287699629208, "grad_norm": 0.7385269834713016, "learning_rate": 1.965125709651257e-06, "loss": 0.6296, "step": 22136 }, { "epoch": 0.6463168958570553, "grad_norm": 0.7315360387512364, "learning_rate": 1.964963503649635e-06, "loss": 0.6738, "step": 22137 }, { "epoch": 0.6463460920849027, "grad_norm": 0.7466903105265131, "learning_rate": 1.9648012976480134e-06, "loss": 0.5884, "step": 22138 }, { "epoch": 0.64637528831275, "grad_norm": 0.6923463871916566, "learning_rate": 1.964639091646391e-06, "loss": 0.6421, "step": 22139 }, { "epoch": 0.6464044845405974, "grad_norm": 0.6917921059170739, "learning_rate": 1.964476885644769e-06, "loss": 0.6058, "step": 22140 }, { "epoch": 0.6464336807684448, "grad_norm": 0.7757381682496675, "learning_rate": 1.964314679643147e-06, "loss": 0.7005, "step": 22141 }, { "epoch": 0.6464628769962921, "grad_norm": 0.6704203282392771, "learning_rate": 1.964152473641525e-06, "loss": 0.5943, "step": 22142 }, { "epoch": 0.6464920732241395, "grad_norm": 0.7371104703820838, "learning_rate": 1.9639902676399026e-06, "loss": 0.6993, "step": 22143 }, { "epoch": 0.6465212694519868, "grad_norm": 0.6733882860925976, "learning_rate": 1.9638280616382806e-06, "loss": 0.5253, "step": 22144 }, { "epoch": 0.6465504656798342, "grad_norm": 0.733643269583337, "learning_rate": 1.9636658556366586e-06, "loss": 0.6978, "step": 22145 }, { "epoch": 0.6465796619076816, "grad_norm": 1.0893949645177892, "learning_rate": 1.9635036496350366e-06, "loss": 0.6665, "step": 22146 }, { "epoch": 0.6466088581355289, "grad_norm": 0.750672715847961, "learning_rate": 1.9633414436334146e-06, "loss": 0.6465, "step": 22147 }, { "epoch": 0.6466380543633763, "grad_norm": 0.8951279651157509, "learning_rate": 1.9631792376317926e-06, "loss": 0.7101, "step": 22148 }, { "epoch": 0.6466672505912237, "grad_norm": 0.7814500010867297, "learning_rate": 1.9630170316301706e-06, "loss": 0.6613, "step": 22149 }, { "epoch": 0.646696446819071, "grad_norm": 0.736830275232299, "learning_rate": 1.9628548256285486e-06, "loss": 0.6758, "step": 22150 }, { "epoch": 0.6467256430469184, "grad_norm": 0.7434468646535707, "learning_rate": 1.9626926196269262e-06, "loss": 0.7094, "step": 22151 }, { "epoch": 0.6467548392747657, "grad_norm": 0.7462503046331204, "learning_rate": 1.9625304136253042e-06, "loss": 0.6295, "step": 22152 }, { "epoch": 0.6467840355026131, "grad_norm": 0.7333233777384761, "learning_rate": 1.9623682076236822e-06, "loss": 0.6077, "step": 22153 }, { "epoch": 0.6468132317304605, "grad_norm": 0.7008265168199196, "learning_rate": 1.9622060016220602e-06, "loss": 0.556, "step": 22154 }, { "epoch": 0.6468424279583078, "grad_norm": 0.7229888530143445, "learning_rate": 1.962043795620438e-06, "loss": 0.6633, "step": 22155 }, { "epoch": 0.6468716241861552, "grad_norm": 0.7199060898618008, "learning_rate": 1.961881589618816e-06, "loss": 0.6502, "step": 22156 }, { "epoch": 0.6469008204140025, "grad_norm": 0.7136645520627608, "learning_rate": 1.9617193836171943e-06, "loss": 0.6129, "step": 22157 }, { "epoch": 0.6469300166418499, "grad_norm": 0.690092571002993, "learning_rate": 1.961557177615572e-06, "loss": 0.5706, "step": 22158 }, { "epoch": 0.6469592128696973, "grad_norm": 0.7330201469367782, "learning_rate": 1.96139497161395e-06, "loss": 0.6634, "step": 22159 }, { "epoch": 0.6469884090975446, "grad_norm": 0.7176973679534274, "learning_rate": 1.961232765612328e-06, "loss": 0.6338, "step": 22160 }, { "epoch": 0.647017605325392, "grad_norm": 0.8212461533838475, "learning_rate": 1.961070559610706e-06, "loss": 0.7877, "step": 22161 }, { "epoch": 0.6470468015532393, "grad_norm": 0.6896725024213579, "learning_rate": 1.9609083536090835e-06, "loss": 0.6661, "step": 22162 }, { "epoch": 0.6470759977810867, "grad_norm": 0.7240069652005897, "learning_rate": 1.9607461476074615e-06, "loss": 0.6583, "step": 22163 }, { "epoch": 0.6471051940089341, "grad_norm": 0.6921845506198756, "learning_rate": 1.9605839416058395e-06, "loss": 0.6393, "step": 22164 }, { "epoch": 0.6471343902367814, "grad_norm": 0.72838420892796, "learning_rate": 1.9604217356042175e-06, "loss": 0.6375, "step": 22165 }, { "epoch": 0.6471635864646288, "grad_norm": 0.838034596388238, "learning_rate": 1.9602595296025955e-06, "loss": 0.6501, "step": 22166 }, { "epoch": 0.6471927826924762, "grad_norm": 0.9137825228813641, "learning_rate": 1.9600973236009735e-06, "loss": 0.7276, "step": 22167 }, { "epoch": 0.6472219789203235, "grad_norm": 0.7895540168958243, "learning_rate": 1.9599351175993515e-06, "loss": 0.6588, "step": 22168 }, { "epoch": 0.6472511751481709, "grad_norm": 0.6710604373651836, "learning_rate": 1.9597729115977295e-06, "loss": 0.596, "step": 22169 }, { "epoch": 0.6472803713760182, "grad_norm": 0.779897975298678, "learning_rate": 1.959610705596107e-06, "loss": 0.6558, "step": 22170 }, { "epoch": 0.6473095676038656, "grad_norm": 0.7267237923805393, "learning_rate": 1.959448499594485e-06, "loss": 0.6335, "step": 22171 }, { "epoch": 0.647338763831713, "grad_norm": 0.7090667403648114, "learning_rate": 1.959286293592863e-06, "loss": 0.6389, "step": 22172 }, { "epoch": 0.6473679600595603, "grad_norm": 0.6980200955043705, "learning_rate": 1.959124087591241e-06, "loss": 0.5711, "step": 22173 }, { "epoch": 0.6473971562874077, "grad_norm": 0.7247403440750404, "learning_rate": 1.9589618815896187e-06, "loss": 0.656, "step": 22174 }, { "epoch": 0.647426352515255, "grad_norm": 0.7047196566318865, "learning_rate": 1.958799675587997e-06, "loss": 0.6089, "step": 22175 }, { "epoch": 0.6474555487431024, "grad_norm": 0.7295457009251296, "learning_rate": 1.958637469586375e-06, "loss": 0.6649, "step": 22176 }, { "epoch": 0.6474847449709498, "grad_norm": 0.7602351677597632, "learning_rate": 1.9584752635847527e-06, "loss": 0.6975, "step": 22177 }, { "epoch": 0.6475139411987971, "grad_norm": 0.7251941476190401, "learning_rate": 1.9583130575831307e-06, "loss": 0.6816, "step": 22178 }, { "epoch": 0.6475431374266445, "grad_norm": 0.6932395531981532, "learning_rate": 1.9581508515815087e-06, "loss": 0.6461, "step": 22179 }, { "epoch": 0.6475723336544918, "grad_norm": 0.6973970312615857, "learning_rate": 1.9579886455798867e-06, "loss": 0.5918, "step": 22180 }, { "epoch": 0.6476015298823392, "grad_norm": 0.6990708247024989, "learning_rate": 1.9578264395782643e-06, "loss": 0.6513, "step": 22181 }, { "epoch": 0.6476307261101866, "grad_norm": 0.769406672683346, "learning_rate": 1.9576642335766423e-06, "loss": 0.6802, "step": 22182 }, { "epoch": 0.6476599223380339, "grad_norm": 0.788915136981336, "learning_rate": 1.9575020275750203e-06, "loss": 0.6988, "step": 22183 }, { "epoch": 0.6476891185658813, "grad_norm": 0.6722882136222278, "learning_rate": 1.9573398215733983e-06, "loss": 0.5561, "step": 22184 }, { "epoch": 0.6477183147937287, "grad_norm": 0.7574636015672893, "learning_rate": 1.9571776155717763e-06, "loss": 0.6493, "step": 22185 }, { "epoch": 0.647747511021576, "grad_norm": 0.6981710088842261, "learning_rate": 1.9570154095701543e-06, "loss": 0.5721, "step": 22186 }, { "epoch": 0.6477767072494234, "grad_norm": 0.6979252137123009, "learning_rate": 1.9568532035685324e-06, "loss": 0.6382, "step": 22187 }, { "epoch": 0.6478059034772707, "grad_norm": 0.6877161841027734, "learning_rate": 1.9566909975669104e-06, "loss": 0.5857, "step": 22188 }, { "epoch": 0.6478350997051181, "grad_norm": 0.7769286643176201, "learning_rate": 1.956528791565288e-06, "loss": 0.7269, "step": 22189 }, { "epoch": 0.6478642959329655, "grad_norm": 0.7327818006122433, "learning_rate": 1.956366585563666e-06, "loss": 0.5734, "step": 22190 }, { "epoch": 0.6478934921608128, "grad_norm": 0.7376279834822294, "learning_rate": 1.956204379562044e-06, "loss": 0.6364, "step": 22191 }, { "epoch": 0.6479226883886602, "grad_norm": 0.7538487407102475, "learning_rate": 1.956042173560422e-06, "loss": 0.7526, "step": 22192 }, { "epoch": 0.6479518846165075, "grad_norm": 0.7140119837737222, "learning_rate": 1.9558799675587995e-06, "loss": 0.644, "step": 22193 }, { "epoch": 0.6479810808443549, "grad_norm": 0.7555283227330999, "learning_rate": 1.955717761557178e-06, "loss": 0.6604, "step": 22194 }, { "epoch": 0.6480102770722023, "grad_norm": 0.7521412380642782, "learning_rate": 1.955555555555556e-06, "loss": 0.72, "step": 22195 }, { "epoch": 0.6480394733000496, "grad_norm": 0.7612075110959455, "learning_rate": 1.9553933495539336e-06, "loss": 0.6879, "step": 22196 }, { "epoch": 0.648068669527897, "grad_norm": 0.795795982876793, "learning_rate": 1.9552311435523116e-06, "loss": 0.6901, "step": 22197 }, { "epoch": 0.6480978657557444, "grad_norm": 0.7247276817803209, "learning_rate": 1.9550689375506896e-06, "loss": 0.6059, "step": 22198 }, { "epoch": 0.6481270619835917, "grad_norm": 0.7151871459039233, "learning_rate": 1.9549067315490676e-06, "loss": 0.6341, "step": 22199 }, { "epoch": 0.6481562582114391, "grad_norm": 0.6980945145201016, "learning_rate": 1.954744525547445e-06, "loss": 0.5897, "step": 22200 }, { "epoch": 0.6481854544392864, "grad_norm": 0.6988048803081749, "learning_rate": 1.954582319545823e-06, "loss": 0.5558, "step": 22201 }, { "epoch": 0.6482146506671338, "grad_norm": 0.7014346192474247, "learning_rate": 1.954420113544201e-06, "loss": 0.5807, "step": 22202 }, { "epoch": 0.6482438468949812, "grad_norm": 0.6833712372544977, "learning_rate": 1.954257907542579e-06, "loss": 0.6175, "step": 22203 }, { "epoch": 0.6482730431228285, "grad_norm": 0.7636807773381372, "learning_rate": 1.954095701540957e-06, "loss": 0.6727, "step": 22204 }, { "epoch": 0.6483022393506759, "grad_norm": 0.6643379585878717, "learning_rate": 1.953933495539335e-06, "loss": 0.5651, "step": 22205 }, { "epoch": 0.6483314355785232, "grad_norm": 0.6676944877163029, "learning_rate": 1.9537712895377132e-06, "loss": 0.5535, "step": 22206 }, { "epoch": 0.6483606318063706, "grad_norm": 0.7918629609678878, "learning_rate": 1.9536090835360912e-06, "loss": 0.6927, "step": 22207 }, { "epoch": 0.648389828034218, "grad_norm": 0.6910048311412053, "learning_rate": 1.953446877534469e-06, "loss": 0.5693, "step": 22208 }, { "epoch": 0.6484190242620653, "grad_norm": 0.7146795214689218, "learning_rate": 1.953284671532847e-06, "loss": 0.6235, "step": 22209 }, { "epoch": 0.6484482204899127, "grad_norm": 0.6905425196115486, "learning_rate": 1.953122465531225e-06, "loss": 0.5942, "step": 22210 }, { "epoch": 0.64847741671776, "grad_norm": 0.7361835819330386, "learning_rate": 1.952960259529603e-06, "loss": 0.6666, "step": 22211 }, { "epoch": 0.6485066129456074, "grad_norm": 0.6792986735192608, "learning_rate": 1.9527980535279804e-06, "loss": 0.5849, "step": 22212 }, { "epoch": 0.6485358091734548, "grad_norm": 0.759020364400711, "learning_rate": 1.952635847526359e-06, "loss": 0.6689, "step": 22213 }, { "epoch": 0.6485650054013021, "grad_norm": 0.762569080559028, "learning_rate": 1.952473641524737e-06, "loss": 0.6754, "step": 22214 }, { "epoch": 0.6485942016291495, "grad_norm": 0.8137238136823658, "learning_rate": 1.9523114355231144e-06, "loss": 0.7604, "step": 22215 }, { "epoch": 0.6486233978569969, "grad_norm": 0.6838606517423164, "learning_rate": 1.9521492295214924e-06, "loss": 0.6153, "step": 22216 }, { "epoch": 0.6486525940848442, "grad_norm": 0.8277617989064582, "learning_rate": 1.9519870235198704e-06, "loss": 0.6554, "step": 22217 }, { "epoch": 0.6486817903126916, "grad_norm": 0.7209418710610629, "learning_rate": 1.9518248175182484e-06, "loss": 0.6652, "step": 22218 }, { "epoch": 0.6487109865405389, "grad_norm": 0.7368036421691779, "learning_rate": 1.951662611516626e-06, "loss": 0.6529, "step": 22219 }, { "epoch": 0.6487401827683863, "grad_norm": 0.724883595610729, "learning_rate": 1.951500405515004e-06, "loss": 0.6742, "step": 22220 }, { "epoch": 0.6487693789962337, "grad_norm": 0.7621733808167205, "learning_rate": 1.951338199513382e-06, "loss": 0.6315, "step": 22221 }, { "epoch": 0.648798575224081, "grad_norm": 0.7037053518365255, "learning_rate": 1.95117599351176e-06, "loss": 0.5922, "step": 22222 }, { "epoch": 0.6488277714519284, "grad_norm": 0.7567291386859303, "learning_rate": 1.951013787510138e-06, "loss": 0.6303, "step": 22223 }, { "epoch": 0.6488569676797757, "grad_norm": 0.7094426913512181, "learning_rate": 1.950851581508516e-06, "loss": 0.6095, "step": 22224 }, { "epoch": 0.6488861639076231, "grad_norm": 0.7354076341288425, "learning_rate": 1.950689375506894e-06, "loss": 0.6127, "step": 22225 }, { "epoch": 0.6489153601354705, "grad_norm": 0.7188860724309575, "learning_rate": 1.950527169505272e-06, "loss": 0.599, "step": 22226 }, { "epoch": 0.6489445563633178, "grad_norm": 0.757358423818002, "learning_rate": 1.9503649635036497e-06, "loss": 0.6858, "step": 22227 }, { "epoch": 0.6489737525911652, "grad_norm": 0.7499947581883418, "learning_rate": 1.9502027575020277e-06, "loss": 0.7011, "step": 22228 }, { "epoch": 0.6490029488190125, "grad_norm": 0.7617966127368762, "learning_rate": 1.9500405515004057e-06, "loss": 0.7001, "step": 22229 }, { "epoch": 0.6490321450468599, "grad_norm": 0.7201950994004854, "learning_rate": 1.9498783454987837e-06, "loss": 0.6361, "step": 22230 }, { "epoch": 0.6490613412747073, "grad_norm": 0.7183453012045818, "learning_rate": 1.9497161394971613e-06, "loss": 0.5904, "step": 22231 }, { "epoch": 0.6490905375025546, "grad_norm": 0.7192669495321163, "learning_rate": 1.9495539334955397e-06, "loss": 0.6434, "step": 22232 }, { "epoch": 0.649119733730402, "grad_norm": 0.7428220949970616, "learning_rate": 1.9493917274939177e-06, "loss": 0.6193, "step": 22233 }, { "epoch": 0.6491489299582494, "grad_norm": 0.7537868569795922, "learning_rate": 1.9492295214922953e-06, "loss": 0.6573, "step": 22234 }, { "epoch": 0.6491781261860967, "grad_norm": 0.7216323531331459, "learning_rate": 1.9490673154906733e-06, "loss": 0.6402, "step": 22235 }, { "epoch": 0.6492073224139441, "grad_norm": 0.6851453627105967, "learning_rate": 1.9489051094890513e-06, "loss": 0.581, "step": 22236 }, { "epoch": 0.6492365186417914, "grad_norm": 0.7555793640517945, "learning_rate": 1.9487429034874293e-06, "loss": 0.6593, "step": 22237 }, { "epoch": 0.6492657148696388, "grad_norm": 0.7069890076945814, "learning_rate": 1.948580697485807e-06, "loss": 0.6163, "step": 22238 }, { "epoch": 0.6492949110974862, "grad_norm": 0.7294309093408673, "learning_rate": 1.948418491484185e-06, "loss": 0.6621, "step": 22239 }, { "epoch": 0.6493241073253335, "grad_norm": 0.7763936600028393, "learning_rate": 1.948256285482563e-06, "loss": 0.7438, "step": 22240 }, { "epoch": 0.6493533035531809, "grad_norm": 0.7401116988681712, "learning_rate": 1.948094079480941e-06, "loss": 0.6487, "step": 22241 }, { "epoch": 0.6493824997810282, "grad_norm": 0.6876387404767208, "learning_rate": 1.947931873479319e-06, "loss": 0.5702, "step": 22242 }, { "epoch": 0.6494116960088756, "grad_norm": 0.7765569366259437, "learning_rate": 1.947769667477697e-06, "loss": 0.7059, "step": 22243 }, { "epoch": 0.649440892236723, "grad_norm": 0.7208710851122738, "learning_rate": 1.947607461476075e-06, "loss": 0.656, "step": 22244 }, { "epoch": 0.6494700884645703, "grad_norm": 0.7492184348963501, "learning_rate": 1.947445255474453e-06, "loss": 0.7154, "step": 22245 }, { "epoch": 0.6494992846924177, "grad_norm": 0.7027263308693833, "learning_rate": 1.9472830494728305e-06, "loss": 0.6013, "step": 22246 }, { "epoch": 0.649528480920265, "grad_norm": 0.7041584655186797, "learning_rate": 1.9471208434712085e-06, "loss": 0.6139, "step": 22247 }, { "epoch": 0.6495576771481124, "grad_norm": 0.6988734799093081, "learning_rate": 1.9469586374695865e-06, "loss": 0.5949, "step": 22248 }, { "epoch": 0.6495868733759598, "grad_norm": 0.7065666738604289, "learning_rate": 1.9467964314679645e-06, "loss": 0.6335, "step": 22249 }, { "epoch": 0.6496160696038071, "grad_norm": 0.7834762762314001, "learning_rate": 1.946634225466342e-06, "loss": 0.6799, "step": 22250 }, { "epoch": 0.6496452658316545, "grad_norm": 0.7233452861518013, "learning_rate": 1.9464720194647206e-06, "loss": 0.5893, "step": 22251 }, { "epoch": 0.649674462059502, "grad_norm": 0.7112787902583515, "learning_rate": 1.9463098134630986e-06, "loss": 0.6493, "step": 22252 }, { "epoch": 0.6497036582873493, "grad_norm": 0.7540893795011858, "learning_rate": 1.946147607461476e-06, "loss": 0.6785, "step": 22253 }, { "epoch": 0.6497328545151967, "grad_norm": 0.7033834913334446, "learning_rate": 1.945985401459854e-06, "loss": 0.618, "step": 22254 }, { "epoch": 0.649762050743044, "grad_norm": 0.7083038091285909, "learning_rate": 1.945823195458232e-06, "loss": 0.6245, "step": 22255 }, { "epoch": 0.6497912469708914, "grad_norm": 0.7187159744753265, "learning_rate": 1.94566098945661e-06, "loss": 0.6276, "step": 22256 }, { "epoch": 0.6498204431987388, "grad_norm": 0.7327513480254152, "learning_rate": 1.9454987834549877e-06, "loss": 0.6596, "step": 22257 }, { "epoch": 0.6498496394265861, "grad_norm": 0.7040731353961958, "learning_rate": 1.9453365774533658e-06, "loss": 0.607, "step": 22258 }, { "epoch": 0.6498788356544335, "grad_norm": 0.6807440270011095, "learning_rate": 1.9451743714517438e-06, "loss": 0.5898, "step": 22259 }, { "epoch": 0.6499080318822809, "grad_norm": 0.6695783224456453, "learning_rate": 1.9450121654501218e-06, "loss": 0.5678, "step": 22260 }, { "epoch": 0.6499372281101282, "grad_norm": 0.6535758541725754, "learning_rate": 1.9448499594484998e-06, "loss": 0.5446, "step": 22261 }, { "epoch": 0.6499664243379756, "grad_norm": 0.6613184316709775, "learning_rate": 1.9446877534468778e-06, "loss": 0.5418, "step": 22262 }, { "epoch": 0.6499956205658229, "grad_norm": 0.770886007677254, "learning_rate": 1.944525547445256e-06, "loss": 0.6974, "step": 22263 }, { "epoch": 0.6500248167936703, "grad_norm": 0.7869937219419726, "learning_rate": 1.944363341443634e-06, "loss": 0.7398, "step": 22264 }, { "epoch": 0.6500540130215177, "grad_norm": 0.6691817677771605, "learning_rate": 1.9442011354420114e-06, "loss": 0.492, "step": 22265 }, { "epoch": 0.650083209249365, "grad_norm": 0.778913165879976, "learning_rate": 1.9440389294403894e-06, "loss": 0.6731, "step": 22266 }, { "epoch": 0.6501124054772124, "grad_norm": 0.7131724772880619, "learning_rate": 1.9438767234387674e-06, "loss": 0.5973, "step": 22267 }, { "epoch": 0.6501416017050597, "grad_norm": 0.7626216126532939, "learning_rate": 1.9437145174371454e-06, "loss": 0.6466, "step": 22268 }, { "epoch": 0.6501707979329071, "grad_norm": 0.671914345115836, "learning_rate": 1.943552311435523e-06, "loss": 0.5867, "step": 22269 }, { "epoch": 0.6501999941607545, "grad_norm": 0.7003080716915462, "learning_rate": 1.9433901054339014e-06, "loss": 0.5738, "step": 22270 }, { "epoch": 0.6502291903886018, "grad_norm": 0.7331510721467183, "learning_rate": 1.9432278994322794e-06, "loss": 0.655, "step": 22271 }, { "epoch": 0.6502583866164492, "grad_norm": 0.7005773113840187, "learning_rate": 1.943065693430657e-06, "loss": 0.619, "step": 22272 }, { "epoch": 0.6502875828442966, "grad_norm": 0.7290236792861613, "learning_rate": 1.942903487429035e-06, "loss": 0.6383, "step": 22273 }, { "epoch": 0.6503167790721439, "grad_norm": 0.8145983375344361, "learning_rate": 1.942741281427413e-06, "loss": 0.6828, "step": 22274 }, { "epoch": 0.6503459752999913, "grad_norm": 0.6940100260939178, "learning_rate": 1.942579075425791e-06, "loss": 0.6256, "step": 22275 }, { "epoch": 0.6503751715278386, "grad_norm": 0.7455002673643172, "learning_rate": 1.9424168694241686e-06, "loss": 0.6595, "step": 22276 }, { "epoch": 0.650404367755686, "grad_norm": 0.7224661980763194, "learning_rate": 1.9422546634225466e-06, "loss": 0.6294, "step": 22277 }, { "epoch": 0.6504335639835334, "grad_norm": 0.8537267054038078, "learning_rate": 1.9420924574209246e-06, "loss": 0.6978, "step": 22278 }, { "epoch": 0.6504627602113807, "grad_norm": 0.766204865801851, "learning_rate": 1.9419302514193026e-06, "loss": 0.6625, "step": 22279 }, { "epoch": 0.6504919564392281, "grad_norm": 0.9203448289402361, "learning_rate": 1.9417680454176806e-06, "loss": 0.6415, "step": 22280 }, { "epoch": 0.6505211526670754, "grad_norm": 0.7471368171725727, "learning_rate": 1.9416058394160586e-06, "loss": 0.6817, "step": 22281 }, { "epoch": 0.6505503488949228, "grad_norm": 0.7553342420915192, "learning_rate": 1.9414436334144366e-06, "loss": 0.6857, "step": 22282 }, { "epoch": 0.6505795451227702, "grad_norm": 0.7455782072832322, "learning_rate": 1.9412814274128147e-06, "loss": 0.7052, "step": 22283 }, { "epoch": 0.6506087413506175, "grad_norm": 0.7095835494074686, "learning_rate": 1.9411192214111922e-06, "loss": 0.632, "step": 22284 }, { "epoch": 0.6506379375784649, "grad_norm": 0.7210876546242591, "learning_rate": 1.9409570154095702e-06, "loss": 0.6117, "step": 22285 }, { "epoch": 0.6506671338063122, "grad_norm": 0.8819096459156829, "learning_rate": 1.9407948094079483e-06, "loss": 0.6854, "step": 22286 }, { "epoch": 0.6506963300341596, "grad_norm": 0.7647870122210034, "learning_rate": 1.9406326034063263e-06, "loss": 0.7121, "step": 22287 }, { "epoch": 0.650725526262007, "grad_norm": 0.7545645814535406, "learning_rate": 1.940470397404704e-06, "loss": 0.6748, "step": 22288 }, { "epoch": 0.6507547224898543, "grad_norm": 0.67746467700145, "learning_rate": 1.9403081914030823e-06, "loss": 0.5354, "step": 22289 }, { "epoch": 0.6507839187177017, "grad_norm": 0.6742912915261835, "learning_rate": 1.9401459854014603e-06, "loss": 0.582, "step": 22290 }, { "epoch": 0.650813114945549, "grad_norm": 0.7017899629783695, "learning_rate": 1.939983779399838e-06, "loss": 0.5836, "step": 22291 }, { "epoch": 0.6508423111733964, "grad_norm": 0.7098398747132802, "learning_rate": 1.939821573398216e-06, "loss": 0.6486, "step": 22292 }, { "epoch": 0.6508715074012438, "grad_norm": 0.7130575850903351, "learning_rate": 1.939659367396594e-06, "loss": 0.6081, "step": 22293 }, { "epoch": 0.6509007036290911, "grad_norm": 0.7182181966007258, "learning_rate": 1.939497161394972e-06, "loss": 0.6328, "step": 22294 }, { "epoch": 0.6509298998569385, "grad_norm": 0.6970372542751714, "learning_rate": 1.9393349553933495e-06, "loss": 0.6007, "step": 22295 }, { "epoch": 0.6509590960847859, "grad_norm": 0.681700626428327, "learning_rate": 1.9391727493917275e-06, "loss": 0.6086, "step": 22296 }, { "epoch": 0.6509882923126332, "grad_norm": 0.7135516153180189, "learning_rate": 1.9390105433901055e-06, "loss": 0.6973, "step": 22297 }, { "epoch": 0.6510174885404806, "grad_norm": 0.7418677992726065, "learning_rate": 1.9388483373884835e-06, "loss": 0.6016, "step": 22298 }, { "epoch": 0.6510466847683279, "grad_norm": 0.7590988278532895, "learning_rate": 1.9386861313868615e-06, "loss": 0.6708, "step": 22299 }, { "epoch": 0.6510758809961753, "grad_norm": 0.7179206994828671, "learning_rate": 1.9385239253852395e-06, "loss": 0.6618, "step": 22300 }, { "epoch": 0.6511050772240227, "grad_norm": 0.7953509391962579, "learning_rate": 1.9383617193836175e-06, "loss": 0.6222, "step": 22301 }, { "epoch": 0.65113427345187, "grad_norm": 0.7115942525658868, "learning_rate": 1.9381995133819955e-06, "loss": 0.6257, "step": 22302 }, { "epoch": 0.6511634696797174, "grad_norm": 0.7390387286111271, "learning_rate": 1.938037307380373e-06, "loss": 0.717, "step": 22303 }, { "epoch": 0.6511926659075647, "grad_norm": 0.6805718003843514, "learning_rate": 1.937875101378751e-06, "loss": 0.5833, "step": 22304 }, { "epoch": 0.6512218621354121, "grad_norm": 0.7667906278011953, "learning_rate": 1.937712895377129e-06, "loss": 0.6971, "step": 22305 }, { "epoch": 0.6512510583632595, "grad_norm": 0.7747408930698538, "learning_rate": 1.937550689375507e-06, "loss": 0.6884, "step": 22306 }, { "epoch": 0.6512802545911068, "grad_norm": 0.7296201032558715, "learning_rate": 1.9373884833738847e-06, "loss": 0.6891, "step": 22307 }, { "epoch": 0.6513094508189542, "grad_norm": 0.7234947035339118, "learning_rate": 1.937226277372263e-06, "loss": 0.6576, "step": 22308 }, { "epoch": 0.6513386470468016, "grad_norm": 0.8348507930590435, "learning_rate": 1.937064071370641e-06, "loss": 0.7271, "step": 22309 }, { "epoch": 0.6513678432746489, "grad_norm": 0.7066351655548654, "learning_rate": 1.9369018653690187e-06, "loss": 0.5882, "step": 22310 }, { "epoch": 0.6513970395024963, "grad_norm": 0.7186695905843845, "learning_rate": 1.9367396593673967e-06, "loss": 0.6365, "step": 22311 }, { "epoch": 0.6514262357303436, "grad_norm": 0.6930309931513915, "learning_rate": 1.9365774533657747e-06, "loss": 0.5868, "step": 22312 }, { "epoch": 0.651455431958191, "grad_norm": 0.7259696947460156, "learning_rate": 1.9364152473641527e-06, "loss": 0.6661, "step": 22313 }, { "epoch": 0.6514846281860384, "grad_norm": 0.7412168415571184, "learning_rate": 1.9362530413625303e-06, "loss": 0.6578, "step": 22314 }, { "epoch": 0.6515138244138857, "grad_norm": 0.7261834765344406, "learning_rate": 1.9360908353609083e-06, "loss": 0.625, "step": 22315 }, { "epoch": 0.6515430206417331, "grad_norm": 0.6910828611246345, "learning_rate": 1.9359286293592863e-06, "loss": 0.6009, "step": 22316 }, { "epoch": 0.6515722168695804, "grad_norm": 0.7374879439337233, "learning_rate": 1.9357664233576643e-06, "loss": 0.6562, "step": 22317 }, { "epoch": 0.6516014130974278, "grad_norm": 0.7555390265717973, "learning_rate": 1.9356042173560424e-06, "loss": 0.6668, "step": 22318 }, { "epoch": 0.6516306093252752, "grad_norm": 0.7713070443244594, "learning_rate": 1.9354420113544204e-06, "loss": 0.6853, "step": 22319 }, { "epoch": 0.6516598055531225, "grad_norm": 0.7744149481333197, "learning_rate": 1.9352798053527984e-06, "loss": 0.7426, "step": 22320 }, { "epoch": 0.6516890017809699, "grad_norm": 0.7465971473450523, "learning_rate": 1.935117599351176e-06, "loss": 0.7253, "step": 22321 }, { "epoch": 0.6517181980088173, "grad_norm": 0.7493636092563266, "learning_rate": 1.934955393349554e-06, "loss": 0.7056, "step": 22322 }, { "epoch": 0.6517473942366646, "grad_norm": 0.6930977296572897, "learning_rate": 1.934793187347932e-06, "loss": 0.5627, "step": 22323 }, { "epoch": 0.651776590464512, "grad_norm": 0.7187488236481986, "learning_rate": 1.93463098134631e-06, "loss": 0.6403, "step": 22324 }, { "epoch": 0.6518057866923593, "grad_norm": 0.8108940645761288, "learning_rate": 1.934468775344688e-06, "loss": 0.6482, "step": 22325 }, { "epoch": 0.6518349829202067, "grad_norm": 0.7572294171262417, "learning_rate": 1.934306569343066e-06, "loss": 0.6488, "step": 22326 }, { "epoch": 0.6518641791480541, "grad_norm": 0.7401645538136863, "learning_rate": 1.934144363341444e-06, "loss": 0.7051, "step": 22327 }, { "epoch": 0.6518933753759014, "grad_norm": 0.7457803275715179, "learning_rate": 1.933982157339822e-06, "loss": 0.7042, "step": 22328 }, { "epoch": 0.6519225716037488, "grad_norm": 0.7344122152731313, "learning_rate": 1.9338199513381996e-06, "loss": 0.6748, "step": 22329 }, { "epoch": 0.6519517678315961, "grad_norm": 0.7623164777379751, "learning_rate": 1.9336577453365776e-06, "loss": 0.6804, "step": 22330 }, { "epoch": 0.6519809640594435, "grad_norm": 0.6957837627141513, "learning_rate": 1.9334955393349556e-06, "loss": 0.5803, "step": 22331 }, { "epoch": 0.6520101602872909, "grad_norm": 0.7232374138797834, "learning_rate": 1.9333333333333336e-06, "loss": 0.6637, "step": 22332 }, { "epoch": 0.6520393565151382, "grad_norm": 0.6781080185415435, "learning_rate": 1.933171127331711e-06, "loss": 0.5414, "step": 22333 }, { "epoch": 0.6520685527429856, "grad_norm": 0.7149670353455736, "learning_rate": 1.933008921330089e-06, "loss": 0.6588, "step": 22334 }, { "epoch": 0.652097748970833, "grad_norm": 0.7492766322065642, "learning_rate": 1.932846715328467e-06, "loss": 0.6702, "step": 22335 }, { "epoch": 0.6521269451986803, "grad_norm": 0.7280451709321821, "learning_rate": 1.932684509326845e-06, "loss": 0.6086, "step": 22336 }, { "epoch": 0.6521561414265277, "grad_norm": 0.7384390405687667, "learning_rate": 1.932522303325223e-06, "loss": 0.6631, "step": 22337 }, { "epoch": 0.652185337654375, "grad_norm": 0.7384497599171833, "learning_rate": 1.9323600973236012e-06, "loss": 0.6734, "step": 22338 }, { "epoch": 0.6522145338822224, "grad_norm": 0.7607923284106191, "learning_rate": 1.9321978913219792e-06, "loss": 0.648, "step": 22339 }, { "epoch": 0.6522437301100698, "grad_norm": 0.7944712419370795, "learning_rate": 1.932035685320357e-06, "loss": 0.7111, "step": 22340 }, { "epoch": 0.6522729263379171, "grad_norm": 0.7168693674027813, "learning_rate": 1.931873479318735e-06, "loss": 0.6653, "step": 22341 }, { "epoch": 0.6523021225657645, "grad_norm": 0.7390964529025906, "learning_rate": 1.931711273317113e-06, "loss": 0.6039, "step": 22342 }, { "epoch": 0.6523313187936118, "grad_norm": 0.6474630247464591, "learning_rate": 1.931549067315491e-06, "loss": 0.5575, "step": 22343 }, { "epoch": 0.6523605150214592, "grad_norm": 0.7446379343780449, "learning_rate": 1.931386861313869e-06, "loss": 0.6483, "step": 22344 }, { "epoch": 0.6523897112493066, "grad_norm": 0.7044193998886097, "learning_rate": 1.931224655312247e-06, "loss": 0.5968, "step": 22345 }, { "epoch": 0.6524189074771539, "grad_norm": 0.8065285180975326, "learning_rate": 1.931062449310625e-06, "loss": 0.7219, "step": 22346 }, { "epoch": 0.6524481037050013, "grad_norm": 0.7103366266711131, "learning_rate": 1.930900243309003e-06, "loss": 0.622, "step": 22347 }, { "epoch": 0.6524772999328486, "grad_norm": 0.7284842238691129, "learning_rate": 1.9307380373073804e-06, "loss": 0.6158, "step": 22348 }, { "epoch": 0.652506496160696, "grad_norm": 0.7632478436894174, "learning_rate": 1.9305758313057584e-06, "loss": 0.6718, "step": 22349 }, { "epoch": 0.6525356923885434, "grad_norm": 0.6759699526115946, "learning_rate": 1.9304136253041365e-06, "loss": 0.5677, "step": 22350 }, { "epoch": 0.6525648886163907, "grad_norm": 0.7436293875433516, "learning_rate": 1.9302514193025145e-06, "loss": 0.6399, "step": 22351 }, { "epoch": 0.6525940848442381, "grad_norm": 0.6734151577035918, "learning_rate": 1.930089213300892e-06, "loss": 0.5541, "step": 22352 }, { "epoch": 0.6526232810720854, "grad_norm": 0.7545010819457137, "learning_rate": 1.92992700729927e-06, "loss": 0.6407, "step": 22353 }, { "epoch": 0.6526524772999328, "grad_norm": 0.8200571335526171, "learning_rate": 1.929764801297648e-06, "loss": 0.7636, "step": 22354 }, { "epoch": 0.6526816735277802, "grad_norm": 0.7124916334640884, "learning_rate": 1.929602595296026e-06, "loss": 0.5926, "step": 22355 }, { "epoch": 0.6527108697556275, "grad_norm": 0.7343840064889304, "learning_rate": 1.929440389294404e-06, "loss": 0.6111, "step": 22356 }, { "epoch": 0.6527400659834749, "grad_norm": 0.7176997544606136, "learning_rate": 1.929278183292782e-06, "loss": 0.6402, "step": 22357 }, { "epoch": 0.6527692622113223, "grad_norm": 0.7128612011805392, "learning_rate": 1.92911597729116e-06, "loss": 0.6182, "step": 22358 }, { "epoch": 0.6527984584391696, "grad_norm": 0.7404165712814094, "learning_rate": 1.9289537712895377e-06, "loss": 0.6554, "step": 22359 }, { "epoch": 0.652827654667017, "grad_norm": 0.6577926055399341, "learning_rate": 1.9287915652879157e-06, "loss": 0.581, "step": 22360 }, { "epoch": 0.6528568508948643, "grad_norm": 0.7060473881902601, "learning_rate": 1.9286293592862937e-06, "loss": 0.6764, "step": 22361 }, { "epoch": 0.6528860471227117, "grad_norm": 0.7908383923997014, "learning_rate": 1.9284671532846717e-06, "loss": 0.5942, "step": 22362 }, { "epoch": 0.6529152433505591, "grad_norm": 0.8015647491953001, "learning_rate": 1.9283049472830497e-06, "loss": 0.7432, "step": 22363 }, { "epoch": 0.6529444395784064, "grad_norm": 0.7509367807342124, "learning_rate": 1.9281427412814277e-06, "loss": 0.6055, "step": 22364 }, { "epoch": 0.6529736358062538, "grad_norm": 0.7956463988034971, "learning_rate": 1.9279805352798057e-06, "loss": 0.6924, "step": 22365 }, { "epoch": 0.6530028320341011, "grad_norm": 0.7131837514670671, "learning_rate": 1.9278183292781837e-06, "loss": 0.5968, "step": 22366 }, { "epoch": 0.6530320282619485, "grad_norm": 0.7308336249677944, "learning_rate": 1.9276561232765613e-06, "loss": 0.6276, "step": 22367 }, { "epoch": 0.6530612244897959, "grad_norm": 0.778615738162829, "learning_rate": 1.9274939172749393e-06, "loss": 0.668, "step": 22368 }, { "epoch": 0.6530904207176432, "grad_norm": 0.6638148712861247, "learning_rate": 1.9273317112733173e-06, "loss": 0.5769, "step": 22369 }, { "epoch": 0.6531196169454906, "grad_norm": 0.7243297623584521, "learning_rate": 1.9271695052716953e-06, "loss": 0.6862, "step": 22370 }, { "epoch": 0.653148813173338, "grad_norm": 0.6883606018290046, "learning_rate": 1.927007299270073e-06, "loss": 0.5987, "step": 22371 }, { "epoch": 0.6531780094011854, "grad_norm": 0.718484603022132, "learning_rate": 1.926845093268451e-06, "loss": 0.6825, "step": 22372 }, { "epoch": 0.6532072056290328, "grad_norm": 0.7779736896112986, "learning_rate": 1.926682887266829e-06, "loss": 0.7424, "step": 22373 }, { "epoch": 0.6532364018568801, "grad_norm": 0.7298120199123069, "learning_rate": 1.926520681265207e-06, "loss": 0.6627, "step": 22374 }, { "epoch": 0.6532655980847275, "grad_norm": 0.7499652489359332, "learning_rate": 1.926358475263585e-06, "loss": 0.6613, "step": 22375 }, { "epoch": 0.6532947943125749, "grad_norm": 0.7579186652595762, "learning_rate": 1.926196269261963e-06, "loss": 0.6742, "step": 22376 }, { "epoch": 0.6533239905404222, "grad_norm": 0.7314461692612185, "learning_rate": 1.926034063260341e-06, "loss": 0.6081, "step": 22377 }, { "epoch": 0.6533531867682696, "grad_norm": 0.7338439320696573, "learning_rate": 1.9258718572587185e-06, "loss": 0.6026, "step": 22378 }, { "epoch": 0.653382382996117, "grad_norm": 0.6975250811642261, "learning_rate": 1.9257096512570965e-06, "loss": 0.6561, "step": 22379 }, { "epoch": 0.6534115792239643, "grad_norm": 0.7433166815302205, "learning_rate": 1.9255474452554745e-06, "loss": 0.7083, "step": 22380 }, { "epoch": 0.6534407754518117, "grad_norm": 0.7312043356401166, "learning_rate": 1.9253852392538525e-06, "loss": 0.6704, "step": 22381 }, { "epoch": 0.653469971679659, "grad_norm": 0.713497964985489, "learning_rate": 1.9252230332522306e-06, "loss": 0.6066, "step": 22382 }, { "epoch": 0.6534991679075064, "grad_norm": 0.704939120300533, "learning_rate": 1.9250608272506086e-06, "loss": 0.6143, "step": 22383 }, { "epoch": 0.6535283641353538, "grad_norm": 0.7160240918534018, "learning_rate": 1.9248986212489866e-06, "loss": 0.6443, "step": 22384 }, { "epoch": 0.6535575603632011, "grad_norm": 0.7209083850808521, "learning_rate": 1.9247364152473646e-06, "loss": 0.6771, "step": 22385 }, { "epoch": 0.6535867565910485, "grad_norm": 0.7334494835989227, "learning_rate": 1.924574209245742e-06, "loss": 0.7052, "step": 22386 }, { "epoch": 0.6536159528188958, "grad_norm": 0.813322007153542, "learning_rate": 1.92441200324412e-06, "loss": 0.6964, "step": 22387 }, { "epoch": 0.6536451490467432, "grad_norm": 0.7010581173308065, "learning_rate": 1.924249797242498e-06, "loss": 0.6009, "step": 22388 }, { "epoch": 0.6536743452745906, "grad_norm": 0.7137802288925934, "learning_rate": 1.924087591240876e-06, "loss": 0.5905, "step": 22389 }, { "epoch": 0.6537035415024379, "grad_norm": 0.7435314071651992, "learning_rate": 1.9239253852392538e-06, "loss": 0.664, "step": 22390 }, { "epoch": 0.6537327377302853, "grad_norm": 0.751548853828572, "learning_rate": 1.9237631792376318e-06, "loss": 0.6267, "step": 22391 }, { "epoch": 0.6537619339581326, "grad_norm": 0.7516530864754761, "learning_rate": 1.92360097323601e-06, "loss": 0.6473, "step": 22392 }, { "epoch": 0.65379113018598, "grad_norm": 0.7575285762810261, "learning_rate": 1.9234387672343878e-06, "loss": 0.6725, "step": 22393 }, { "epoch": 0.6538203264138274, "grad_norm": 0.7610009019781687, "learning_rate": 1.9232765612327658e-06, "loss": 0.6942, "step": 22394 }, { "epoch": 0.6538495226416747, "grad_norm": 0.6385553678502217, "learning_rate": 1.923114355231144e-06, "loss": 0.4838, "step": 22395 }, { "epoch": 0.6538787188695221, "grad_norm": 0.6658343594313986, "learning_rate": 1.922952149229522e-06, "loss": 0.5434, "step": 22396 }, { "epoch": 0.6539079150973695, "grad_norm": 0.7911863881940161, "learning_rate": 1.9227899432278994e-06, "loss": 0.6664, "step": 22397 }, { "epoch": 0.6539371113252168, "grad_norm": 0.8092993345837945, "learning_rate": 1.9226277372262774e-06, "loss": 0.6689, "step": 22398 }, { "epoch": 0.6539663075530642, "grad_norm": 0.7119798387657162, "learning_rate": 1.9224655312246554e-06, "loss": 0.6313, "step": 22399 }, { "epoch": 0.6539955037809115, "grad_norm": 0.7061245792431848, "learning_rate": 1.9223033252230334e-06, "loss": 0.6157, "step": 22400 }, { "epoch": 0.6540247000087589, "grad_norm": 0.7155168648086171, "learning_rate": 1.9221411192214114e-06, "loss": 0.6245, "step": 22401 }, { "epoch": 0.6540538962366063, "grad_norm": 0.7289484295592155, "learning_rate": 1.9219789132197894e-06, "loss": 0.6452, "step": 22402 }, { "epoch": 0.6540830924644536, "grad_norm": 0.7420649187938889, "learning_rate": 1.9218167072181674e-06, "loss": 0.7025, "step": 22403 }, { "epoch": 0.654112288692301, "grad_norm": 0.6896046918843824, "learning_rate": 1.9216545012165454e-06, "loss": 0.5768, "step": 22404 }, { "epoch": 0.6541414849201483, "grad_norm": 0.7658271587783131, "learning_rate": 1.921492295214923e-06, "loss": 0.7246, "step": 22405 }, { "epoch": 0.6541706811479957, "grad_norm": 0.7207847049892412, "learning_rate": 1.921330089213301e-06, "loss": 0.6009, "step": 22406 }, { "epoch": 0.6541998773758431, "grad_norm": 0.7516847024143515, "learning_rate": 1.921167883211679e-06, "loss": 0.6275, "step": 22407 }, { "epoch": 0.6542290736036904, "grad_norm": 0.7235427799773427, "learning_rate": 1.921005677210057e-06, "loss": 0.6198, "step": 22408 }, { "epoch": 0.6542582698315378, "grad_norm": 0.6963206762483872, "learning_rate": 1.9208434712084346e-06, "loss": 0.5437, "step": 22409 }, { "epoch": 0.6542874660593851, "grad_norm": 0.8250862792109765, "learning_rate": 1.9206812652068126e-06, "loss": 0.7465, "step": 22410 }, { "epoch": 0.6543166622872325, "grad_norm": 0.7131411493114053, "learning_rate": 1.920519059205191e-06, "loss": 0.615, "step": 22411 }, { "epoch": 0.6543458585150799, "grad_norm": 0.7356792347891946, "learning_rate": 1.9203568532035686e-06, "loss": 0.6352, "step": 22412 }, { "epoch": 0.6543750547429272, "grad_norm": 0.7353607951565486, "learning_rate": 1.9201946472019466e-06, "loss": 0.6132, "step": 22413 }, { "epoch": 0.6544042509707746, "grad_norm": 0.7088274352177802, "learning_rate": 1.9200324412003247e-06, "loss": 0.6647, "step": 22414 }, { "epoch": 0.654433447198622, "grad_norm": 0.6862853322570199, "learning_rate": 1.9198702351987027e-06, "loss": 0.5847, "step": 22415 }, { "epoch": 0.6544626434264693, "grad_norm": 0.6527397331831639, "learning_rate": 1.9197080291970802e-06, "loss": 0.552, "step": 22416 }, { "epoch": 0.6544918396543167, "grad_norm": 0.6809048210046654, "learning_rate": 1.9195458231954582e-06, "loss": 0.5881, "step": 22417 }, { "epoch": 0.654521035882164, "grad_norm": 0.664753254161863, "learning_rate": 1.9193836171938363e-06, "loss": 0.568, "step": 22418 }, { "epoch": 0.6545502321100114, "grad_norm": 0.7300176128464521, "learning_rate": 1.9192214111922143e-06, "loss": 0.6498, "step": 22419 }, { "epoch": 0.6545794283378588, "grad_norm": 0.7937026927333933, "learning_rate": 1.9190592051905923e-06, "loss": 0.7145, "step": 22420 }, { "epoch": 0.6546086245657061, "grad_norm": 0.7550165406418893, "learning_rate": 1.9188969991889703e-06, "loss": 0.6549, "step": 22421 }, { "epoch": 0.6546378207935535, "grad_norm": 0.6569202933313789, "learning_rate": 1.9187347931873483e-06, "loss": 0.5476, "step": 22422 }, { "epoch": 0.6546670170214008, "grad_norm": 0.7251057341387224, "learning_rate": 1.9185725871857263e-06, "loss": 0.6535, "step": 22423 }, { "epoch": 0.6546962132492482, "grad_norm": 0.6768989535401075, "learning_rate": 1.918410381184104e-06, "loss": 0.5993, "step": 22424 }, { "epoch": 0.6547254094770956, "grad_norm": 0.6844817878909626, "learning_rate": 1.918248175182482e-06, "loss": 0.6035, "step": 22425 }, { "epoch": 0.6547546057049429, "grad_norm": 0.6940556503066996, "learning_rate": 1.91808596918086e-06, "loss": 0.6309, "step": 22426 }, { "epoch": 0.6547838019327903, "grad_norm": 0.7673383072356982, "learning_rate": 1.917923763179238e-06, "loss": 0.694, "step": 22427 }, { "epoch": 0.6548129981606376, "grad_norm": 0.6736165489489241, "learning_rate": 1.9177615571776155e-06, "loss": 0.559, "step": 22428 }, { "epoch": 0.654842194388485, "grad_norm": 0.7918851675737653, "learning_rate": 1.9175993511759935e-06, "loss": 0.7335, "step": 22429 }, { "epoch": 0.6548713906163324, "grad_norm": 0.71767492204197, "learning_rate": 1.917437145174372e-06, "loss": 0.6488, "step": 22430 }, { "epoch": 0.6549005868441797, "grad_norm": 0.7673020684593814, "learning_rate": 1.9172749391727495e-06, "loss": 0.7131, "step": 22431 }, { "epoch": 0.6549297830720271, "grad_norm": 0.7302062979364801, "learning_rate": 1.9171127331711275e-06, "loss": 0.6393, "step": 22432 }, { "epoch": 0.6549589792998745, "grad_norm": 0.7292564268572929, "learning_rate": 1.9169505271695055e-06, "loss": 0.6635, "step": 22433 }, { "epoch": 0.6549881755277218, "grad_norm": 0.7202166723522843, "learning_rate": 1.9167883211678835e-06, "loss": 0.6165, "step": 22434 }, { "epoch": 0.6550173717555692, "grad_norm": 0.695645246328547, "learning_rate": 1.916626115166261e-06, "loss": 0.6266, "step": 22435 }, { "epoch": 0.6550465679834165, "grad_norm": 0.7260394574096366, "learning_rate": 1.916463909164639e-06, "loss": 0.6329, "step": 22436 }, { "epoch": 0.6550757642112639, "grad_norm": 0.7455844145906534, "learning_rate": 1.916301703163017e-06, "loss": 0.6874, "step": 22437 }, { "epoch": 0.6551049604391113, "grad_norm": 0.6752810780580313, "learning_rate": 1.916139497161395e-06, "loss": 0.611, "step": 22438 }, { "epoch": 0.6551341566669586, "grad_norm": 0.7719698054136945, "learning_rate": 1.915977291159773e-06, "loss": 0.72, "step": 22439 }, { "epoch": 0.655163352894806, "grad_norm": 0.7241014913771905, "learning_rate": 1.915815085158151e-06, "loss": 0.6513, "step": 22440 }, { "epoch": 0.6551925491226533, "grad_norm": 0.7092604429659878, "learning_rate": 1.915652879156529e-06, "loss": 0.6057, "step": 22441 }, { "epoch": 0.6552217453505007, "grad_norm": 0.7085465513010579, "learning_rate": 1.915490673154907e-06, "loss": 0.5993, "step": 22442 }, { "epoch": 0.6552509415783481, "grad_norm": 0.6854220614759903, "learning_rate": 1.9153284671532847e-06, "loss": 0.6131, "step": 22443 }, { "epoch": 0.6552801378061954, "grad_norm": 0.7244668802074806, "learning_rate": 1.9151662611516627e-06, "loss": 0.6179, "step": 22444 }, { "epoch": 0.6553093340340428, "grad_norm": 0.7829714374833081, "learning_rate": 1.9150040551500407e-06, "loss": 0.6718, "step": 22445 }, { "epoch": 0.6553385302618902, "grad_norm": 0.7764331951711609, "learning_rate": 1.9148418491484188e-06, "loss": 0.6205, "step": 22446 }, { "epoch": 0.6553677264897375, "grad_norm": 0.712187118637164, "learning_rate": 1.9146796431467963e-06, "loss": 0.613, "step": 22447 }, { "epoch": 0.6553969227175849, "grad_norm": 0.6801413132129944, "learning_rate": 1.9145174371451743e-06, "loss": 0.6173, "step": 22448 }, { "epoch": 0.6554261189454322, "grad_norm": 0.6656509842510023, "learning_rate": 1.9143552311435528e-06, "loss": 0.5811, "step": 22449 }, { "epoch": 0.6554553151732796, "grad_norm": 0.6993837173106171, "learning_rate": 1.9141930251419304e-06, "loss": 0.6049, "step": 22450 }, { "epoch": 0.655484511401127, "grad_norm": 0.8302174715972317, "learning_rate": 1.9140308191403084e-06, "loss": 0.6447, "step": 22451 }, { "epoch": 0.6555137076289743, "grad_norm": 0.6878234410634557, "learning_rate": 1.9138686131386864e-06, "loss": 0.5959, "step": 22452 }, { "epoch": 0.6555429038568217, "grad_norm": 0.7203946440663112, "learning_rate": 1.9137064071370644e-06, "loss": 0.6166, "step": 22453 }, { "epoch": 0.655572100084669, "grad_norm": 0.7231223710003685, "learning_rate": 1.913544201135442e-06, "loss": 0.6128, "step": 22454 }, { "epoch": 0.6556012963125164, "grad_norm": 0.7561513162129073, "learning_rate": 1.91338199513382e-06, "loss": 0.7304, "step": 22455 }, { "epoch": 0.6556304925403638, "grad_norm": 0.7351510760712621, "learning_rate": 1.913219789132198e-06, "loss": 0.6534, "step": 22456 }, { "epoch": 0.6556596887682111, "grad_norm": 0.7251444834597104, "learning_rate": 1.913057583130576e-06, "loss": 0.6366, "step": 22457 }, { "epoch": 0.6556888849960585, "grad_norm": 0.7143258530784047, "learning_rate": 1.912895377128954e-06, "loss": 0.6414, "step": 22458 }, { "epoch": 0.6557180812239058, "grad_norm": 0.8079026342498081, "learning_rate": 1.912733171127332e-06, "loss": 0.6027, "step": 22459 }, { "epoch": 0.6557472774517532, "grad_norm": 0.6689316678760576, "learning_rate": 1.91257096512571e-06, "loss": 0.5544, "step": 22460 }, { "epoch": 0.6557764736796006, "grad_norm": 0.7250149708406052, "learning_rate": 1.912408759124088e-06, "loss": 0.6338, "step": 22461 }, { "epoch": 0.6558056699074479, "grad_norm": 0.7108629074340796, "learning_rate": 1.9122465531224656e-06, "loss": 0.6477, "step": 22462 }, { "epoch": 0.6558348661352953, "grad_norm": 0.7352217625894311, "learning_rate": 1.9120843471208436e-06, "loss": 0.6749, "step": 22463 }, { "epoch": 0.6558640623631427, "grad_norm": 0.7977594690896299, "learning_rate": 1.9119221411192216e-06, "loss": 0.7186, "step": 22464 }, { "epoch": 0.65589325859099, "grad_norm": 0.6642611340373585, "learning_rate": 1.9117599351175996e-06, "loss": 0.591, "step": 22465 }, { "epoch": 0.6559224548188374, "grad_norm": 0.7189493744611004, "learning_rate": 1.911597729115977e-06, "loss": 0.6526, "step": 22466 }, { "epoch": 0.6559516510466847, "grad_norm": 0.756783904113476, "learning_rate": 1.911435523114355e-06, "loss": 0.5883, "step": 22467 }, { "epoch": 0.6559808472745321, "grad_norm": 0.7543911457362119, "learning_rate": 1.9112733171127336e-06, "loss": 0.6078, "step": 22468 }, { "epoch": 0.6560100435023795, "grad_norm": 0.640698722898393, "learning_rate": 1.9111111111111112e-06, "loss": 0.5359, "step": 22469 }, { "epoch": 0.6560392397302268, "grad_norm": 0.7351250382204738, "learning_rate": 1.9109489051094892e-06, "loss": 0.6468, "step": 22470 }, { "epoch": 0.6560684359580742, "grad_norm": 0.6968634850874185, "learning_rate": 1.9107866991078672e-06, "loss": 0.5861, "step": 22471 }, { "epoch": 0.6560976321859215, "grad_norm": 0.7283305661453772, "learning_rate": 1.9106244931062452e-06, "loss": 0.7042, "step": 22472 }, { "epoch": 0.6561268284137689, "grad_norm": 0.7095554143460404, "learning_rate": 1.910462287104623e-06, "loss": 0.6089, "step": 22473 }, { "epoch": 0.6561560246416163, "grad_norm": 0.7367738794601062, "learning_rate": 1.910300081103001e-06, "loss": 0.6447, "step": 22474 }, { "epoch": 0.6561852208694636, "grad_norm": 0.679542325012005, "learning_rate": 1.910137875101379e-06, "loss": 0.5679, "step": 22475 }, { "epoch": 0.656214417097311, "grad_norm": 0.6867827176070618, "learning_rate": 1.909975669099757e-06, "loss": 0.6047, "step": 22476 }, { "epoch": 0.6562436133251583, "grad_norm": 0.7169518347890824, "learning_rate": 1.909813463098135e-06, "loss": 0.5998, "step": 22477 }, { "epoch": 0.6562728095530057, "grad_norm": 0.6993718573861751, "learning_rate": 1.909651257096513e-06, "loss": 0.6394, "step": 22478 }, { "epoch": 0.6563020057808531, "grad_norm": 0.7339653737385082, "learning_rate": 1.909489051094891e-06, "loss": 0.654, "step": 22479 }, { "epoch": 0.6563312020087004, "grad_norm": 0.6609102077235447, "learning_rate": 1.909326845093269e-06, "loss": 0.5754, "step": 22480 }, { "epoch": 0.6563603982365478, "grad_norm": 0.7272977653602348, "learning_rate": 1.9091646390916464e-06, "loss": 0.6806, "step": 22481 }, { "epoch": 0.6563895944643952, "grad_norm": 0.7449177196004786, "learning_rate": 1.9090024330900245e-06, "loss": 0.6624, "step": 22482 }, { "epoch": 0.6564187906922425, "grad_norm": 0.6441560207151443, "learning_rate": 1.9088402270884025e-06, "loss": 0.5274, "step": 22483 }, { "epoch": 0.6564479869200899, "grad_norm": 0.711450538181914, "learning_rate": 1.9086780210867805e-06, "loss": 0.6506, "step": 22484 }, { "epoch": 0.6564771831479372, "grad_norm": 0.736745941704697, "learning_rate": 1.908515815085158e-06, "loss": 0.6066, "step": 22485 }, { "epoch": 0.6565063793757846, "grad_norm": 0.7683648037241249, "learning_rate": 1.908353609083536e-06, "loss": 0.7306, "step": 22486 }, { "epoch": 0.656535575603632, "grad_norm": 0.684811934696951, "learning_rate": 1.9081914030819145e-06, "loss": 0.5897, "step": 22487 }, { "epoch": 0.6565647718314793, "grad_norm": 0.7333650540954976, "learning_rate": 1.908029197080292e-06, "loss": 0.6589, "step": 22488 }, { "epoch": 0.6565939680593267, "grad_norm": 0.7231017353591416, "learning_rate": 1.90786699107867e-06, "loss": 0.6203, "step": 22489 }, { "epoch": 0.656623164287174, "grad_norm": 0.696455558908718, "learning_rate": 1.907704785077048e-06, "loss": 0.5587, "step": 22490 }, { "epoch": 0.6566523605150214, "grad_norm": 0.713602576631657, "learning_rate": 1.907542579075426e-06, "loss": 0.5623, "step": 22491 }, { "epoch": 0.6566815567428688, "grad_norm": 0.7733073848465116, "learning_rate": 1.9073803730738037e-06, "loss": 0.6925, "step": 22492 }, { "epoch": 0.6567107529707162, "grad_norm": 0.6982760186443097, "learning_rate": 1.9072181670721817e-06, "loss": 0.6387, "step": 22493 }, { "epoch": 0.6567399491985636, "grad_norm": 0.7315594225721482, "learning_rate": 1.9070559610705597e-06, "loss": 0.6289, "step": 22494 }, { "epoch": 0.656769145426411, "grad_norm": 0.7082214644621831, "learning_rate": 1.9068937550689377e-06, "loss": 0.6423, "step": 22495 }, { "epoch": 0.6567983416542583, "grad_norm": 0.6933212317962288, "learning_rate": 1.9067315490673157e-06, "loss": 0.5218, "step": 22496 }, { "epoch": 0.6568275378821057, "grad_norm": 0.6959385609052715, "learning_rate": 1.9065693430656937e-06, "loss": 0.5974, "step": 22497 }, { "epoch": 0.656856734109953, "grad_norm": 0.7176706122253765, "learning_rate": 1.9064071370640715e-06, "loss": 0.6431, "step": 22498 }, { "epoch": 0.6568859303378004, "grad_norm": 0.6890859360141507, "learning_rate": 1.9062449310624495e-06, "loss": 0.6164, "step": 22499 }, { "epoch": 0.6569151265656478, "grad_norm": 0.7221752851476184, "learning_rate": 1.9060827250608275e-06, "loss": 0.6428, "step": 22500 }, { "epoch": 0.6569443227934951, "grad_norm": 0.733726939304985, "learning_rate": 1.9059205190592053e-06, "loss": 0.6626, "step": 22501 }, { "epoch": 0.6569735190213425, "grad_norm": 0.719648606867901, "learning_rate": 1.9057583130575833e-06, "loss": 0.6374, "step": 22502 }, { "epoch": 0.6570027152491899, "grad_norm": 0.683783769732669, "learning_rate": 1.9055961070559611e-06, "loss": 0.5769, "step": 22503 }, { "epoch": 0.6570319114770372, "grad_norm": 0.6801251868634149, "learning_rate": 1.9054339010543391e-06, "loss": 0.5483, "step": 22504 }, { "epoch": 0.6570611077048846, "grad_norm": 0.8099024470498932, "learning_rate": 1.905271695052717e-06, "loss": 0.719, "step": 22505 }, { "epoch": 0.6570903039327319, "grad_norm": 0.773108646772561, "learning_rate": 1.9051094890510951e-06, "loss": 0.6563, "step": 22506 }, { "epoch": 0.6571195001605793, "grad_norm": 0.7022539926444427, "learning_rate": 1.9049472830494731e-06, "loss": 0.5675, "step": 22507 }, { "epoch": 0.6571486963884267, "grad_norm": 0.7113322282358494, "learning_rate": 1.904785077047851e-06, "loss": 0.6132, "step": 22508 }, { "epoch": 0.657177892616274, "grad_norm": 0.7441757500286174, "learning_rate": 1.904622871046229e-06, "loss": 0.6336, "step": 22509 }, { "epoch": 0.6572070888441214, "grad_norm": 0.6994797861592185, "learning_rate": 1.9044606650446067e-06, "loss": 0.6064, "step": 22510 }, { "epoch": 0.6572362850719687, "grad_norm": 0.6541903973237833, "learning_rate": 1.9042984590429847e-06, "loss": 0.5631, "step": 22511 }, { "epoch": 0.6572654812998161, "grad_norm": 0.7247837496104057, "learning_rate": 1.9041362530413625e-06, "loss": 0.622, "step": 22512 }, { "epoch": 0.6572946775276635, "grad_norm": 0.7612736107681263, "learning_rate": 1.9039740470397405e-06, "loss": 0.6257, "step": 22513 }, { "epoch": 0.6573238737555108, "grad_norm": 0.7409644422955022, "learning_rate": 1.9038118410381183e-06, "loss": 0.5586, "step": 22514 }, { "epoch": 0.6573530699833582, "grad_norm": 0.7456014360855873, "learning_rate": 1.9036496350364966e-06, "loss": 0.6787, "step": 22515 }, { "epoch": 0.6573822662112055, "grad_norm": 0.7703014984776719, "learning_rate": 1.9034874290348746e-06, "loss": 0.709, "step": 22516 }, { "epoch": 0.6574114624390529, "grad_norm": 0.689353501592494, "learning_rate": 1.9033252230332524e-06, "loss": 0.5307, "step": 22517 }, { "epoch": 0.6574406586669003, "grad_norm": 0.7281033423666816, "learning_rate": 1.9031630170316304e-06, "loss": 0.5587, "step": 22518 }, { "epoch": 0.6574698548947476, "grad_norm": 0.7121865457010635, "learning_rate": 1.9030008110300084e-06, "loss": 0.6357, "step": 22519 }, { "epoch": 0.657499051122595, "grad_norm": 0.6796849389777657, "learning_rate": 1.9028386050283862e-06, "loss": 0.5839, "step": 22520 }, { "epoch": 0.6575282473504424, "grad_norm": 0.8312301546267201, "learning_rate": 1.9026763990267642e-06, "loss": 0.6961, "step": 22521 }, { "epoch": 0.6575574435782897, "grad_norm": 0.7747892012995982, "learning_rate": 1.902514193025142e-06, "loss": 0.7327, "step": 22522 }, { "epoch": 0.6575866398061371, "grad_norm": 0.7390676697753074, "learning_rate": 1.90235198702352e-06, "loss": 0.6336, "step": 22523 }, { "epoch": 0.6576158360339844, "grad_norm": 0.7391243130437264, "learning_rate": 1.9021897810218978e-06, "loss": 0.6339, "step": 22524 }, { "epoch": 0.6576450322618318, "grad_norm": 0.7392920020119188, "learning_rate": 1.902027575020276e-06, "loss": 0.6312, "step": 22525 }, { "epoch": 0.6576742284896792, "grad_norm": 0.6898336345275847, "learning_rate": 1.901865369018654e-06, "loss": 0.6264, "step": 22526 }, { "epoch": 0.6577034247175265, "grad_norm": 0.7038050170776564, "learning_rate": 1.9017031630170318e-06, "loss": 0.6311, "step": 22527 }, { "epoch": 0.6577326209453739, "grad_norm": 0.6785001230961841, "learning_rate": 1.9015409570154098e-06, "loss": 0.6014, "step": 22528 }, { "epoch": 0.6577618171732212, "grad_norm": 0.783237010666432, "learning_rate": 1.9013787510137876e-06, "loss": 0.7159, "step": 22529 }, { "epoch": 0.6577910134010686, "grad_norm": 0.7246211657999229, "learning_rate": 1.9012165450121656e-06, "loss": 0.6532, "step": 22530 }, { "epoch": 0.657820209628916, "grad_norm": 0.7042254485225343, "learning_rate": 1.9010543390105434e-06, "loss": 0.6321, "step": 22531 }, { "epoch": 0.6578494058567633, "grad_norm": 0.7389195780557664, "learning_rate": 1.9008921330089214e-06, "loss": 0.6922, "step": 22532 }, { "epoch": 0.6578786020846107, "grad_norm": 0.7517464718177587, "learning_rate": 1.9007299270072992e-06, "loss": 0.641, "step": 22533 }, { "epoch": 0.657907798312458, "grad_norm": 0.692043023677483, "learning_rate": 1.9005677210056774e-06, "loss": 0.6054, "step": 22534 }, { "epoch": 0.6579369945403054, "grad_norm": 0.8012249999459703, "learning_rate": 1.9004055150040554e-06, "loss": 0.6406, "step": 22535 }, { "epoch": 0.6579661907681528, "grad_norm": 0.8006180572911261, "learning_rate": 1.9002433090024332e-06, "loss": 0.6354, "step": 22536 }, { "epoch": 0.6579953869960001, "grad_norm": 0.7352625890380314, "learning_rate": 1.9000811030008112e-06, "loss": 0.5715, "step": 22537 }, { "epoch": 0.6580245832238475, "grad_norm": 0.7377485072512049, "learning_rate": 1.8999188969991892e-06, "loss": 0.6745, "step": 22538 }, { "epoch": 0.6580537794516949, "grad_norm": 0.782867017513567, "learning_rate": 1.899756690997567e-06, "loss": 0.6913, "step": 22539 }, { "epoch": 0.6580829756795422, "grad_norm": 0.6796490281619656, "learning_rate": 1.899594484995945e-06, "loss": 0.5587, "step": 22540 }, { "epoch": 0.6581121719073896, "grad_norm": 0.727295494596719, "learning_rate": 1.8994322789943228e-06, "loss": 0.6717, "step": 22541 }, { "epoch": 0.6581413681352369, "grad_norm": 0.7308498863682884, "learning_rate": 1.8992700729927008e-06, "loss": 0.6267, "step": 22542 }, { "epoch": 0.6581705643630843, "grad_norm": 1.3449996151705736, "learning_rate": 1.899107866991079e-06, "loss": 0.6018, "step": 22543 }, { "epoch": 0.6581997605909317, "grad_norm": 0.7359653974465299, "learning_rate": 1.8989456609894569e-06, "loss": 0.6307, "step": 22544 }, { "epoch": 0.658228956818779, "grad_norm": 0.8560518609259148, "learning_rate": 1.8987834549878349e-06, "loss": 0.6535, "step": 22545 }, { "epoch": 0.6582581530466264, "grad_norm": 0.6944502854350526, "learning_rate": 1.8986212489862127e-06, "loss": 0.5973, "step": 22546 }, { "epoch": 0.6582873492744737, "grad_norm": 0.7386866349661139, "learning_rate": 1.8984590429845907e-06, "loss": 0.6534, "step": 22547 }, { "epoch": 0.6583165455023211, "grad_norm": 0.6911659895192507, "learning_rate": 1.8982968369829685e-06, "loss": 0.6172, "step": 22548 }, { "epoch": 0.6583457417301685, "grad_norm": 0.7385944359643467, "learning_rate": 1.8981346309813465e-06, "loss": 0.6489, "step": 22549 }, { "epoch": 0.6583749379580158, "grad_norm": 0.7039420698209886, "learning_rate": 1.8979724249797243e-06, "loss": 0.6331, "step": 22550 }, { "epoch": 0.6584041341858632, "grad_norm": 0.7145247142600334, "learning_rate": 1.8978102189781023e-06, "loss": 0.611, "step": 22551 }, { "epoch": 0.6584333304137105, "grad_norm": 0.6379405625343053, "learning_rate": 1.89764801297648e-06, "loss": 0.5418, "step": 22552 }, { "epoch": 0.6584625266415579, "grad_norm": 0.6530684201488371, "learning_rate": 1.8974858069748583e-06, "loss": 0.5639, "step": 22553 }, { "epoch": 0.6584917228694053, "grad_norm": 0.7453052163769567, "learning_rate": 1.8973236009732363e-06, "loss": 0.6744, "step": 22554 }, { "epoch": 0.6585209190972526, "grad_norm": 0.7246012493745557, "learning_rate": 1.897161394971614e-06, "loss": 0.6602, "step": 22555 }, { "epoch": 0.6585501153251, "grad_norm": 0.7446227541881015, "learning_rate": 1.896999188969992e-06, "loss": 0.6714, "step": 22556 }, { "epoch": 0.6585793115529474, "grad_norm": 0.7289893466224353, "learning_rate": 1.89683698296837e-06, "loss": 0.6037, "step": 22557 }, { "epoch": 0.6586085077807947, "grad_norm": 0.709248326112674, "learning_rate": 1.8966747769667479e-06, "loss": 0.597, "step": 22558 }, { "epoch": 0.6586377040086421, "grad_norm": 0.7778882191878227, "learning_rate": 1.896512570965126e-06, "loss": 0.6661, "step": 22559 }, { "epoch": 0.6586669002364894, "grad_norm": 0.7153205138673325, "learning_rate": 1.8963503649635037e-06, "loss": 0.5449, "step": 22560 }, { "epoch": 0.6586960964643368, "grad_norm": 0.7541080132231459, "learning_rate": 1.8961881589618817e-06, "loss": 0.7075, "step": 22561 }, { "epoch": 0.6587252926921842, "grad_norm": 0.7158272004672213, "learning_rate": 1.89602595296026e-06, "loss": 0.6114, "step": 22562 }, { "epoch": 0.6587544889200315, "grad_norm": 0.7198184755388658, "learning_rate": 1.8958637469586377e-06, "loss": 0.6583, "step": 22563 }, { "epoch": 0.6587836851478789, "grad_norm": 0.7117172844231615, "learning_rate": 1.8957015409570157e-06, "loss": 0.6131, "step": 22564 }, { "epoch": 0.6588128813757262, "grad_norm": 0.7672623581895835, "learning_rate": 1.8955393349553935e-06, "loss": 0.73, "step": 22565 }, { "epoch": 0.6588420776035736, "grad_norm": 0.7024124106743064, "learning_rate": 1.8953771289537715e-06, "loss": 0.5817, "step": 22566 }, { "epoch": 0.658871273831421, "grad_norm": 0.7851370715274066, "learning_rate": 1.8952149229521493e-06, "loss": 0.6728, "step": 22567 }, { "epoch": 0.6589004700592683, "grad_norm": 0.6584067371779663, "learning_rate": 1.8950527169505273e-06, "loss": 0.5684, "step": 22568 }, { "epoch": 0.6589296662871157, "grad_norm": 0.7089310472063857, "learning_rate": 1.8948905109489051e-06, "loss": 0.5955, "step": 22569 }, { "epoch": 0.658958862514963, "grad_norm": 0.6891706506981411, "learning_rate": 1.8947283049472831e-06, "loss": 0.5805, "step": 22570 }, { "epoch": 0.6589880587428104, "grad_norm": 0.7831488573974484, "learning_rate": 1.894566098945661e-06, "loss": 0.7225, "step": 22571 }, { "epoch": 0.6590172549706578, "grad_norm": 0.7309021547956226, "learning_rate": 1.8944038929440391e-06, "loss": 0.6814, "step": 22572 }, { "epoch": 0.6590464511985051, "grad_norm": 0.7831948330572428, "learning_rate": 1.8942416869424171e-06, "loss": 0.6921, "step": 22573 }, { "epoch": 0.6590756474263525, "grad_norm": 0.7036366373514545, "learning_rate": 1.894079480940795e-06, "loss": 0.5972, "step": 22574 }, { "epoch": 0.6591048436541999, "grad_norm": 0.7427295849579059, "learning_rate": 1.893917274939173e-06, "loss": 0.6337, "step": 22575 }, { "epoch": 0.6591340398820472, "grad_norm": 0.7003216181851685, "learning_rate": 1.8937550689375507e-06, "loss": 0.6273, "step": 22576 }, { "epoch": 0.6591632361098946, "grad_norm": 0.7404680198258329, "learning_rate": 1.8935928629359287e-06, "loss": 0.6861, "step": 22577 }, { "epoch": 0.6591924323377419, "grad_norm": 0.763272156810131, "learning_rate": 1.8934306569343068e-06, "loss": 0.6356, "step": 22578 }, { "epoch": 0.6592216285655893, "grad_norm": 0.738779110082358, "learning_rate": 1.8932684509326845e-06, "loss": 0.6177, "step": 22579 }, { "epoch": 0.6592508247934367, "grad_norm": 0.7050183798220544, "learning_rate": 1.8931062449310626e-06, "loss": 0.6475, "step": 22580 }, { "epoch": 0.659280021021284, "grad_norm": 0.6958992369093712, "learning_rate": 1.8929440389294408e-06, "loss": 0.6411, "step": 22581 }, { "epoch": 0.6593092172491314, "grad_norm": 0.8840163572328371, "learning_rate": 1.8927818329278186e-06, "loss": 0.5977, "step": 22582 }, { "epoch": 0.6593384134769787, "grad_norm": 0.6471447438552494, "learning_rate": 1.8926196269261966e-06, "loss": 0.4671, "step": 22583 }, { "epoch": 0.6593676097048261, "grad_norm": 0.6640869659408176, "learning_rate": 1.8924574209245744e-06, "loss": 0.5435, "step": 22584 }, { "epoch": 0.6593968059326735, "grad_norm": 0.6835930010956853, "learning_rate": 1.8922952149229524e-06, "loss": 0.5763, "step": 22585 }, { "epoch": 0.6594260021605208, "grad_norm": 0.7846667722428778, "learning_rate": 1.8921330089213302e-06, "loss": 0.6967, "step": 22586 }, { "epoch": 0.6594551983883682, "grad_norm": 0.7886548343641657, "learning_rate": 1.8919708029197082e-06, "loss": 0.6672, "step": 22587 }, { "epoch": 0.6594843946162156, "grad_norm": 0.7361278603795571, "learning_rate": 1.891808596918086e-06, "loss": 0.6435, "step": 22588 }, { "epoch": 0.6595135908440629, "grad_norm": 0.7708151186383231, "learning_rate": 1.891646390916464e-06, "loss": 0.7076, "step": 22589 }, { "epoch": 0.6595427870719103, "grad_norm": 0.7583249866923893, "learning_rate": 1.8914841849148418e-06, "loss": 0.7666, "step": 22590 }, { "epoch": 0.6595719832997576, "grad_norm": 0.6943032459052201, "learning_rate": 1.89132197891322e-06, "loss": 0.6019, "step": 22591 }, { "epoch": 0.659601179527605, "grad_norm": 0.6904110282888324, "learning_rate": 1.891159772911598e-06, "loss": 0.5852, "step": 22592 }, { "epoch": 0.6596303757554524, "grad_norm": 0.6968851032481549, "learning_rate": 1.8909975669099758e-06, "loss": 0.5839, "step": 22593 }, { "epoch": 0.6596595719832997, "grad_norm": 0.7325062569799058, "learning_rate": 1.8908353609083538e-06, "loss": 0.6787, "step": 22594 }, { "epoch": 0.6596887682111471, "grad_norm": 0.7228276942380647, "learning_rate": 1.8906731549067316e-06, "loss": 0.6336, "step": 22595 }, { "epoch": 0.6597179644389944, "grad_norm": 0.7716265758120416, "learning_rate": 1.8905109489051096e-06, "loss": 0.6311, "step": 22596 }, { "epoch": 0.6597471606668418, "grad_norm": 0.7494376269992981, "learning_rate": 1.8903487429034876e-06, "loss": 0.6597, "step": 22597 }, { "epoch": 0.6597763568946892, "grad_norm": 0.6965818815142153, "learning_rate": 1.8901865369018654e-06, "loss": 0.6447, "step": 22598 }, { "epoch": 0.6598055531225365, "grad_norm": 0.8198813824907837, "learning_rate": 1.8900243309002434e-06, "loss": 0.709, "step": 22599 }, { "epoch": 0.6598347493503839, "grad_norm": 0.7485469437921333, "learning_rate": 1.8898621248986216e-06, "loss": 0.6511, "step": 22600 }, { "epoch": 0.6598639455782312, "grad_norm": 0.7534926069874472, "learning_rate": 1.8896999188969994e-06, "loss": 0.6451, "step": 22601 }, { "epoch": 0.6598931418060786, "grad_norm": 0.7759740479295147, "learning_rate": 1.8895377128953774e-06, "loss": 0.6496, "step": 22602 }, { "epoch": 0.659922338033926, "grad_norm": 0.719589330986395, "learning_rate": 1.8893755068937552e-06, "loss": 0.6238, "step": 22603 }, { "epoch": 0.6599515342617733, "grad_norm": 0.8952795419611226, "learning_rate": 1.8892133008921332e-06, "loss": 0.7025, "step": 22604 }, { "epoch": 0.6599807304896207, "grad_norm": 0.7160209007912246, "learning_rate": 1.889051094890511e-06, "loss": 0.6169, "step": 22605 }, { "epoch": 0.660009926717468, "grad_norm": 0.7822843793744746, "learning_rate": 1.888888888888889e-06, "loss": 0.6723, "step": 22606 }, { "epoch": 0.6600391229453154, "grad_norm": 0.7467162160630251, "learning_rate": 1.8887266828872668e-06, "loss": 0.6484, "step": 22607 }, { "epoch": 0.6600683191731628, "grad_norm": 0.7332721854747071, "learning_rate": 1.8885644768856448e-06, "loss": 0.6673, "step": 22608 }, { "epoch": 0.6600975154010101, "grad_norm": 0.75055802641826, "learning_rate": 1.8884022708840226e-06, "loss": 0.7177, "step": 22609 }, { "epoch": 0.6601267116288575, "grad_norm": 0.7510692450453057, "learning_rate": 1.8882400648824009e-06, "loss": 0.6926, "step": 22610 }, { "epoch": 0.6601559078567049, "grad_norm": 0.6778189660331894, "learning_rate": 1.8880778588807789e-06, "loss": 0.5718, "step": 22611 }, { "epoch": 0.6601851040845522, "grad_norm": 0.6699557502975974, "learning_rate": 1.8879156528791567e-06, "loss": 0.5772, "step": 22612 }, { "epoch": 0.6602143003123997, "grad_norm": 0.7628169406098219, "learning_rate": 1.8877534468775347e-06, "loss": 0.6935, "step": 22613 }, { "epoch": 0.660243496540247, "grad_norm": 0.7313393011101565, "learning_rate": 1.8875912408759125e-06, "loss": 0.655, "step": 22614 }, { "epoch": 0.6602726927680944, "grad_norm": 0.7533135969214717, "learning_rate": 1.8874290348742905e-06, "loss": 0.6798, "step": 22615 }, { "epoch": 0.6603018889959418, "grad_norm": 0.6742671775375715, "learning_rate": 1.8872668288726685e-06, "loss": 0.5514, "step": 22616 }, { "epoch": 0.6603310852237891, "grad_norm": 0.6940828077032367, "learning_rate": 1.8871046228710463e-06, "loss": 0.6058, "step": 22617 }, { "epoch": 0.6603602814516365, "grad_norm": 0.6844614404629178, "learning_rate": 1.8869424168694243e-06, "loss": 0.5973, "step": 22618 }, { "epoch": 0.6603894776794839, "grad_norm": 0.7272206678147296, "learning_rate": 1.8867802108678025e-06, "loss": 0.6568, "step": 22619 }, { "epoch": 0.6604186739073312, "grad_norm": 0.776777103551506, "learning_rate": 1.8866180048661803e-06, "loss": 0.6988, "step": 22620 }, { "epoch": 0.6604478701351786, "grad_norm": 0.6968093608168616, "learning_rate": 1.8864557988645583e-06, "loss": 0.5748, "step": 22621 }, { "epoch": 0.6604770663630259, "grad_norm": 0.7233724383388366, "learning_rate": 1.886293592862936e-06, "loss": 0.6197, "step": 22622 }, { "epoch": 0.6605062625908733, "grad_norm": 0.6986326860861926, "learning_rate": 1.886131386861314e-06, "loss": 0.6292, "step": 22623 }, { "epoch": 0.6605354588187207, "grad_norm": 0.7197272483776096, "learning_rate": 1.8859691808596919e-06, "loss": 0.6613, "step": 22624 }, { "epoch": 0.660564655046568, "grad_norm": 0.7628816714721751, "learning_rate": 1.88580697485807e-06, "loss": 0.6728, "step": 22625 }, { "epoch": 0.6605938512744154, "grad_norm": 0.6780327473247738, "learning_rate": 1.8856447688564477e-06, "loss": 0.5816, "step": 22626 }, { "epoch": 0.6606230475022628, "grad_norm": 0.7503773550052569, "learning_rate": 1.8854825628548257e-06, "loss": 0.6669, "step": 22627 }, { "epoch": 0.6606522437301101, "grad_norm": 0.7983898293187379, "learning_rate": 1.885320356853204e-06, "loss": 0.7428, "step": 22628 }, { "epoch": 0.6606814399579575, "grad_norm": 0.8244979946195481, "learning_rate": 1.8851581508515817e-06, "loss": 0.7217, "step": 22629 }, { "epoch": 0.6607106361858048, "grad_norm": 0.7521905329876747, "learning_rate": 1.8849959448499597e-06, "loss": 0.6139, "step": 22630 }, { "epoch": 0.6607398324136522, "grad_norm": 0.7080722090732267, "learning_rate": 1.8848337388483375e-06, "loss": 0.647, "step": 22631 }, { "epoch": 0.6607690286414996, "grad_norm": 0.6820684488564895, "learning_rate": 1.8846715328467155e-06, "loss": 0.6309, "step": 22632 }, { "epoch": 0.6607982248693469, "grad_norm": 0.7769394080691584, "learning_rate": 1.8845093268450933e-06, "loss": 0.6898, "step": 22633 }, { "epoch": 0.6608274210971943, "grad_norm": 1.5675850947188663, "learning_rate": 1.8843471208434713e-06, "loss": 0.6909, "step": 22634 }, { "epoch": 0.6608566173250416, "grad_norm": 0.7076149590511888, "learning_rate": 1.8841849148418493e-06, "loss": 0.6165, "step": 22635 }, { "epoch": 0.660885813552889, "grad_norm": 0.7826040102041648, "learning_rate": 1.8840227088402271e-06, "loss": 0.703, "step": 22636 }, { "epoch": 0.6609150097807364, "grad_norm": 0.7015416505283391, "learning_rate": 1.8838605028386051e-06, "loss": 0.5527, "step": 22637 }, { "epoch": 0.6609442060085837, "grad_norm": 0.6738039959192074, "learning_rate": 1.8836982968369831e-06, "loss": 0.5941, "step": 22638 }, { "epoch": 0.6609734022364311, "grad_norm": 0.7635499925403136, "learning_rate": 1.8835360908353611e-06, "loss": 0.7129, "step": 22639 }, { "epoch": 0.6610025984642784, "grad_norm": 0.7969077457916892, "learning_rate": 1.8833738848337392e-06, "loss": 0.698, "step": 22640 }, { "epoch": 0.6610317946921258, "grad_norm": 0.7126870656521213, "learning_rate": 1.883211678832117e-06, "loss": 0.6188, "step": 22641 }, { "epoch": 0.6610609909199732, "grad_norm": 0.730355717884751, "learning_rate": 1.883049472830495e-06, "loss": 0.5967, "step": 22642 }, { "epoch": 0.6610901871478205, "grad_norm": 0.7194449735042929, "learning_rate": 1.8828872668288727e-06, "loss": 0.622, "step": 22643 }, { "epoch": 0.6611193833756679, "grad_norm": 0.7535148080881979, "learning_rate": 1.8827250608272508e-06, "loss": 0.6665, "step": 22644 }, { "epoch": 0.6611485796035153, "grad_norm": 0.7158534298441294, "learning_rate": 1.8825628548256286e-06, "loss": 0.626, "step": 22645 }, { "epoch": 0.6611777758313626, "grad_norm": 0.746328751555331, "learning_rate": 1.8824006488240066e-06, "loss": 0.6851, "step": 22646 }, { "epoch": 0.66120697205921, "grad_norm": 0.7537445151266058, "learning_rate": 1.8822384428223848e-06, "loss": 0.6433, "step": 22647 }, { "epoch": 0.6612361682870573, "grad_norm": 0.6850333755575572, "learning_rate": 1.8820762368207626e-06, "loss": 0.5426, "step": 22648 }, { "epoch": 0.6612653645149047, "grad_norm": 0.7155666356890068, "learning_rate": 1.8819140308191406e-06, "loss": 0.6263, "step": 22649 }, { "epoch": 0.6612945607427521, "grad_norm": 0.7348323221260771, "learning_rate": 1.8817518248175184e-06, "loss": 0.6653, "step": 22650 }, { "epoch": 0.6613237569705994, "grad_norm": 0.7339781381527692, "learning_rate": 1.8815896188158964e-06, "loss": 0.6442, "step": 22651 }, { "epoch": 0.6613529531984468, "grad_norm": 0.7065206281429045, "learning_rate": 1.8814274128142742e-06, "loss": 0.602, "step": 22652 }, { "epoch": 0.6613821494262941, "grad_norm": 0.6944134589269405, "learning_rate": 1.8812652068126522e-06, "loss": 0.5777, "step": 22653 }, { "epoch": 0.6614113456541415, "grad_norm": 0.7169181259920876, "learning_rate": 1.8811030008110302e-06, "loss": 0.6615, "step": 22654 }, { "epoch": 0.6614405418819889, "grad_norm": 0.835134309308094, "learning_rate": 1.880940794809408e-06, "loss": 0.7485, "step": 22655 }, { "epoch": 0.6614697381098362, "grad_norm": 0.7061184244264787, "learning_rate": 1.880778588807786e-06, "loss": 0.6558, "step": 22656 }, { "epoch": 0.6614989343376836, "grad_norm": 0.6828492655156532, "learning_rate": 1.880616382806164e-06, "loss": 0.5938, "step": 22657 }, { "epoch": 0.661528130565531, "grad_norm": 0.7245422284689336, "learning_rate": 1.880454176804542e-06, "loss": 0.6767, "step": 22658 }, { "epoch": 0.6615573267933783, "grad_norm": 0.7202407679949933, "learning_rate": 1.88029197080292e-06, "loss": 0.6374, "step": 22659 }, { "epoch": 0.6615865230212257, "grad_norm": 0.7433768842534982, "learning_rate": 1.8801297648012978e-06, "loss": 0.6294, "step": 22660 }, { "epoch": 0.661615719249073, "grad_norm": 0.7675232126867737, "learning_rate": 1.8799675587996758e-06, "loss": 0.6989, "step": 22661 }, { "epoch": 0.6616449154769204, "grad_norm": 0.7649657027787244, "learning_rate": 1.8798053527980536e-06, "loss": 0.6692, "step": 22662 }, { "epoch": 0.6616741117047678, "grad_norm": 0.7902261249292853, "learning_rate": 1.8796431467964316e-06, "loss": 0.6997, "step": 22663 }, { "epoch": 0.6617033079326151, "grad_norm": 0.80103733482414, "learning_rate": 1.8794809407948094e-06, "loss": 0.751, "step": 22664 }, { "epoch": 0.6617325041604625, "grad_norm": 0.7137173310846583, "learning_rate": 1.8793187347931874e-06, "loss": 0.6328, "step": 22665 }, { "epoch": 0.6617617003883098, "grad_norm": 0.777940577938113, "learning_rate": 1.8791565287915656e-06, "loss": 0.695, "step": 22666 }, { "epoch": 0.6617908966161572, "grad_norm": 0.7308955351279449, "learning_rate": 1.8789943227899434e-06, "loss": 0.6837, "step": 22667 }, { "epoch": 0.6618200928440046, "grad_norm": 0.7125189291116788, "learning_rate": 1.8788321167883214e-06, "loss": 0.6099, "step": 22668 }, { "epoch": 0.6618492890718519, "grad_norm": 0.6679692585808811, "learning_rate": 1.8786699107866992e-06, "loss": 0.585, "step": 22669 }, { "epoch": 0.6618784852996993, "grad_norm": 0.8203504304327829, "learning_rate": 1.8785077047850772e-06, "loss": 0.5829, "step": 22670 }, { "epoch": 0.6619076815275466, "grad_norm": 0.6339353869797587, "learning_rate": 1.878345498783455e-06, "loss": 0.5197, "step": 22671 }, { "epoch": 0.661936877755394, "grad_norm": 0.6744407585448682, "learning_rate": 1.878183292781833e-06, "loss": 0.5677, "step": 22672 }, { "epoch": 0.6619660739832414, "grad_norm": 0.701975924824501, "learning_rate": 1.878021086780211e-06, "loss": 0.5977, "step": 22673 }, { "epoch": 0.6619952702110887, "grad_norm": 0.7559576580139047, "learning_rate": 1.8778588807785888e-06, "loss": 0.689, "step": 22674 }, { "epoch": 0.6620244664389361, "grad_norm": 0.6910482407781785, "learning_rate": 1.8776966747769668e-06, "loss": 0.6159, "step": 22675 }, { "epoch": 0.6620536626667834, "grad_norm": 0.7474881168952247, "learning_rate": 1.8775344687753449e-06, "loss": 0.621, "step": 22676 }, { "epoch": 0.6620828588946308, "grad_norm": 0.708790356188217, "learning_rate": 1.8773722627737229e-06, "loss": 0.6697, "step": 22677 }, { "epoch": 0.6621120551224782, "grad_norm": 0.7548450266680531, "learning_rate": 1.8772100567721009e-06, "loss": 0.6213, "step": 22678 }, { "epoch": 0.6621412513503255, "grad_norm": 0.6531153099288738, "learning_rate": 1.8770478507704787e-06, "loss": 0.5507, "step": 22679 }, { "epoch": 0.6621704475781729, "grad_norm": 0.7543273165923764, "learning_rate": 1.8768856447688567e-06, "loss": 0.6885, "step": 22680 }, { "epoch": 0.6621996438060203, "grad_norm": 0.7318237878307523, "learning_rate": 1.8767234387672345e-06, "loss": 0.6306, "step": 22681 }, { "epoch": 0.6622288400338676, "grad_norm": 0.7327829784351322, "learning_rate": 1.8765612327656125e-06, "loss": 0.7274, "step": 22682 }, { "epoch": 0.662258036261715, "grad_norm": 0.7973789414366987, "learning_rate": 1.8763990267639903e-06, "loss": 0.648, "step": 22683 }, { "epoch": 0.6622872324895623, "grad_norm": 0.6366588287393022, "learning_rate": 1.8762368207623683e-06, "loss": 0.5172, "step": 22684 }, { "epoch": 0.6623164287174097, "grad_norm": 0.8512091636631876, "learning_rate": 1.8760746147607465e-06, "loss": 0.7697, "step": 22685 }, { "epoch": 0.6623456249452571, "grad_norm": 0.6873161790471242, "learning_rate": 1.8759124087591243e-06, "loss": 0.5527, "step": 22686 }, { "epoch": 0.6623748211731044, "grad_norm": 0.7604182176802681, "learning_rate": 1.8757502027575023e-06, "loss": 0.6873, "step": 22687 }, { "epoch": 0.6624040174009518, "grad_norm": 0.7306480951731794, "learning_rate": 1.87558799675588e-06, "loss": 0.6294, "step": 22688 }, { "epoch": 0.6624332136287991, "grad_norm": 0.8249127773688806, "learning_rate": 1.875425790754258e-06, "loss": 0.7333, "step": 22689 }, { "epoch": 0.6624624098566465, "grad_norm": 0.7363187198530413, "learning_rate": 1.8752635847526359e-06, "loss": 0.6561, "step": 22690 }, { "epoch": 0.6624916060844939, "grad_norm": 0.7435349225091538, "learning_rate": 1.875101378751014e-06, "loss": 0.6831, "step": 22691 }, { "epoch": 0.6625208023123412, "grad_norm": 0.7081863986392432, "learning_rate": 1.874939172749392e-06, "loss": 0.6181, "step": 22692 }, { "epoch": 0.6625499985401886, "grad_norm": 0.7610569379183603, "learning_rate": 1.8747769667477697e-06, "loss": 0.6653, "step": 22693 }, { "epoch": 0.662579194768036, "grad_norm": 0.7447844384055595, "learning_rate": 1.874614760746148e-06, "loss": 0.6642, "step": 22694 }, { "epoch": 0.6626083909958833, "grad_norm": 0.7932123610042631, "learning_rate": 1.8744525547445257e-06, "loss": 0.7253, "step": 22695 }, { "epoch": 0.6626375872237307, "grad_norm": 0.7190898144757568, "learning_rate": 1.8742903487429037e-06, "loss": 0.6055, "step": 22696 }, { "epoch": 0.662666783451578, "grad_norm": 0.8072886871757049, "learning_rate": 1.8741281427412817e-06, "loss": 0.662, "step": 22697 }, { "epoch": 0.6626959796794254, "grad_norm": 0.7698526292168445, "learning_rate": 1.8739659367396595e-06, "loss": 0.6707, "step": 22698 }, { "epoch": 0.6627251759072728, "grad_norm": 0.7195177901382399, "learning_rate": 1.8738037307380375e-06, "loss": 0.6425, "step": 22699 }, { "epoch": 0.6627543721351201, "grad_norm": 0.7429523220033806, "learning_rate": 1.8736415247364153e-06, "loss": 0.6882, "step": 22700 }, { "epoch": 0.6627835683629675, "grad_norm": 0.8179969162120687, "learning_rate": 1.8734793187347933e-06, "loss": 0.714, "step": 22701 }, { "epoch": 0.6628127645908148, "grad_norm": 0.7158224888556937, "learning_rate": 1.8733171127331711e-06, "loss": 0.54, "step": 22702 }, { "epoch": 0.6628419608186622, "grad_norm": 0.7124058896561608, "learning_rate": 1.8731549067315491e-06, "loss": 0.6278, "step": 22703 }, { "epoch": 0.6628711570465096, "grad_norm": 0.6939353734383328, "learning_rate": 1.8729927007299274e-06, "loss": 0.5944, "step": 22704 }, { "epoch": 0.6629003532743569, "grad_norm": 0.7436523973902955, "learning_rate": 1.8728304947283051e-06, "loss": 0.6697, "step": 22705 }, { "epoch": 0.6629295495022043, "grad_norm": 0.7274582247278063, "learning_rate": 1.8726682887266832e-06, "loss": 0.6706, "step": 22706 }, { "epoch": 0.6629587457300516, "grad_norm": 0.7194951872487334, "learning_rate": 1.872506082725061e-06, "loss": 0.6584, "step": 22707 }, { "epoch": 0.662987941957899, "grad_norm": 0.8455304488820006, "learning_rate": 1.872343876723439e-06, "loss": 0.7175, "step": 22708 }, { "epoch": 0.6630171381857464, "grad_norm": 0.6512093656926805, "learning_rate": 1.8721816707218168e-06, "loss": 0.5352, "step": 22709 }, { "epoch": 0.6630463344135937, "grad_norm": 0.7058948514961064, "learning_rate": 1.8720194647201948e-06, "loss": 0.6479, "step": 22710 }, { "epoch": 0.6630755306414411, "grad_norm": 0.7755417912411589, "learning_rate": 1.8718572587185726e-06, "loss": 0.6871, "step": 22711 }, { "epoch": 0.6631047268692885, "grad_norm": 0.698782902478492, "learning_rate": 1.8716950527169506e-06, "loss": 0.6013, "step": 22712 }, { "epoch": 0.6631339230971358, "grad_norm": 0.7527134213230133, "learning_rate": 1.8715328467153288e-06, "loss": 0.7199, "step": 22713 }, { "epoch": 0.6631631193249832, "grad_norm": 0.7314892654387697, "learning_rate": 1.8713706407137066e-06, "loss": 0.6664, "step": 22714 }, { "epoch": 0.6631923155528305, "grad_norm": 0.7458410435320708, "learning_rate": 1.8712084347120846e-06, "loss": 0.6678, "step": 22715 }, { "epoch": 0.6632215117806779, "grad_norm": 0.7070844858329729, "learning_rate": 1.8710462287104626e-06, "loss": 0.6001, "step": 22716 }, { "epoch": 0.6632507080085253, "grad_norm": 0.7419270571230873, "learning_rate": 1.8708840227088404e-06, "loss": 0.6486, "step": 22717 }, { "epoch": 0.6632799042363726, "grad_norm": 0.6801842033247273, "learning_rate": 1.8707218167072184e-06, "loss": 0.5413, "step": 22718 }, { "epoch": 0.66330910046422, "grad_norm": 0.76196420580597, "learning_rate": 1.8705596107055962e-06, "loss": 0.6497, "step": 22719 }, { "epoch": 0.6633382966920673, "grad_norm": 0.7798914399910822, "learning_rate": 1.8703974047039742e-06, "loss": 0.7041, "step": 22720 }, { "epoch": 0.6633674929199147, "grad_norm": 0.8253228753006774, "learning_rate": 1.870235198702352e-06, "loss": 0.6481, "step": 22721 }, { "epoch": 0.6633966891477621, "grad_norm": 0.7251093995854784, "learning_rate": 1.87007299270073e-06, "loss": 0.6435, "step": 22722 }, { "epoch": 0.6634258853756094, "grad_norm": 0.6978135262783898, "learning_rate": 1.8699107866991082e-06, "loss": 0.6256, "step": 22723 }, { "epoch": 0.6634550816034568, "grad_norm": 0.6566400951797322, "learning_rate": 1.869748580697486e-06, "loss": 0.5406, "step": 22724 }, { "epoch": 0.6634842778313041, "grad_norm": 0.6666880196175575, "learning_rate": 1.869586374695864e-06, "loss": 0.6002, "step": 22725 }, { "epoch": 0.6635134740591515, "grad_norm": 0.7207003053470499, "learning_rate": 1.8694241686942418e-06, "loss": 0.6298, "step": 22726 }, { "epoch": 0.6635426702869989, "grad_norm": 0.6955270590218983, "learning_rate": 1.8692619626926198e-06, "loss": 0.5994, "step": 22727 }, { "epoch": 0.6635718665148462, "grad_norm": 0.708463187141419, "learning_rate": 1.8690997566909976e-06, "loss": 0.614, "step": 22728 }, { "epoch": 0.6636010627426936, "grad_norm": 0.811334230787808, "learning_rate": 1.8689375506893756e-06, "loss": 0.6592, "step": 22729 }, { "epoch": 0.663630258970541, "grad_norm": 0.6999056489582564, "learning_rate": 1.8687753446877534e-06, "loss": 0.6123, "step": 22730 }, { "epoch": 0.6636594551983883, "grad_norm": 0.7024615778666001, "learning_rate": 1.8686131386861314e-06, "loss": 0.5978, "step": 22731 }, { "epoch": 0.6636886514262357, "grad_norm": 0.6968093185807351, "learning_rate": 1.8684509326845096e-06, "loss": 0.6348, "step": 22732 }, { "epoch": 0.663717847654083, "grad_norm": 0.6890290416341947, "learning_rate": 1.8682887266828874e-06, "loss": 0.5938, "step": 22733 }, { "epoch": 0.6637470438819305, "grad_norm": 0.7119145166530009, "learning_rate": 1.8681265206812654e-06, "loss": 0.6289, "step": 22734 }, { "epoch": 0.6637762401097779, "grad_norm": 0.7218667674611688, "learning_rate": 1.8679643146796434e-06, "loss": 0.692, "step": 22735 }, { "epoch": 0.6638054363376252, "grad_norm": 0.776681860907956, "learning_rate": 1.8678021086780212e-06, "loss": 0.6576, "step": 22736 }, { "epoch": 0.6638346325654726, "grad_norm": 0.6804236282227701, "learning_rate": 1.8676399026763992e-06, "loss": 0.6236, "step": 22737 }, { "epoch": 0.66386382879332, "grad_norm": 0.7075059422463806, "learning_rate": 1.867477696674777e-06, "loss": 0.5915, "step": 22738 }, { "epoch": 0.6638930250211673, "grad_norm": 0.7139981587239488, "learning_rate": 1.867315490673155e-06, "loss": 0.6482, "step": 22739 }, { "epoch": 0.6639222212490147, "grad_norm": 0.6837272662798689, "learning_rate": 1.8671532846715328e-06, "loss": 0.5517, "step": 22740 }, { "epoch": 0.663951417476862, "grad_norm": 0.6802132781039073, "learning_rate": 1.8669910786699109e-06, "loss": 0.5862, "step": 22741 }, { "epoch": 0.6639806137047094, "grad_norm": 0.7363099493604991, "learning_rate": 1.866828872668289e-06, "loss": 0.6579, "step": 22742 }, { "epoch": 0.6640098099325568, "grad_norm": 0.7816837674450701, "learning_rate": 1.8666666666666669e-06, "loss": 0.6664, "step": 22743 }, { "epoch": 0.6640390061604041, "grad_norm": 0.781135368102808, "learning_rate": 1.8665044606650449e-06, "loss": 0.7565, "step": 22744 }, { "epoch": 0.6640682023882515, "grad_norm": 0.7561202924494502, "learning_rate": 1.8663422546634227e-06, "loss": 0.6184, "step": 22745 }, { "epoch": 0.6640973986160988, "grad_norm": 0.7291470019827028, "learning_rate": 1.8661800486618007e-06, "loss": 0.6683, "step": 22746 }, { "epoch": 0.6641265948439462, "grad_norm": 0.7342619626608865, "learning_rate": 1.8660178426601785e-06, "loss": 0.6563, "step": 22747 }, { "epoch": 0.6641557910717936, "grad_norm": 0.8356772032726987, "learning_rate": 1.8658556366585565e-06, "loss": 0.7492, "step": 22748 }, { "epoch": 0.6641849872996409, "grad_norm": 0.7187536545515082, "learning_rate": 1.8656934306569343e-06, "loss": 0.6083, "step": 22749 }, { "epoch": 0.6642141835274883, "grad_norm": 0.6860598082911058, "learning_rate": 1.8655312246553123e-06, "loss": 0.5641, "step": 22750 }, { "epoch": 0.6642433797553357, "grad_norm": 0.731902450842788, "learning_rate": 1.8653690186536905e-06, "loss": 0.6601, "step": 22751 }, { "epoch": 0.664272575983183, "grad_norm": 0.7146842145833657, "learning_rate": 1.8652068126520683e-06, "loss": 0.6469, "step": 22752 }, { "epoch": 0.6643017722110304, "grad_norm": 0.7342223939693981, "learning_rate": 1.8650446066504463e-06, "loss": 0.6881, "step": 22753 }, { "epoch": 0.6643309684388777, "grad_norm": 0.7744818111380601, "learning_rate": 1.8648824006488243e-06, "loss": 0.6425, "step": 22754 }, { "epoch": 0.6643601646667251, "grad_norm": 0.7033751914011194, "learning_rate": 1.864720194647202e-06, "loss": 0.6067, "step": 22755 }, { "epoch": 0.6643893608945725, "grad_norm": 0.7611294844782905, "learning_rate": 1.8645579886455801e-06, "loss": 0.6789, "step": 22756 }, { "epoch": 0.6644185571224198, "grad_norm": 0.7334277519054667, "learning_rate": 1.864395782643958e-06, "loss": 0.5986, "step": 22757 }, { "epoch": 0.6644477533502672, "grad_norm": 0.7114737299109652, "learning_rate": 1.864233576642336e-06, "loss": 0.5987, "step": 22758 }, { "epoch": 0.6644769495781145, "grad_norm": 0.6591514926028403, "learning_rate": 1.8640713706407137e-06, "loss": 0.544, "step": 22759 }, { "epoch": 0.6645061458059619, "grad_norm": 0.7481163666411458, "learning_rate": 1.8639091646390917e-06, "loss": 0.6853, "step": 22760 }, { "epoch": 0.6645353420338093, "grad_norm": 0.7229245414194432, "learning_rate": 1.86374695863747e-06, "loss": 0.6822, "step": 22761 }, { "epoch": 0.6645645382616566, "grad_norm": 0.6617474140944686, "learning_rate": 1.8635847526358477e-06, "loss": 0.5607, "step": 22762 }, { "epoch": 0.664593734489504, "grad_norm": 0.7596324890706053, "learning_rate": 1.8634225466342257e-06, "loss": 0.7003, "step": 22763 }, { "epoch": 0.6646229307173513, "grad_norm": 0.7313420636851471, "learning_rate": 1.8632603406326035e-06, "loss": 0.6193, "step": 22764 }, { "epoch": 0.6646521269451987, "grad_norm": 0.751494466954803, "learning_rate": 1.8630981346309815e-06, "loss": 0.7223, "step": 22765 }, { "epoch": 0.6646813231730461, "grad_norm": 0.7140626164009165, "learning_rate": 1.8629359286293593e-06, "loss": 0.6525, "step": 22766 }, { "epoch": 0.6647105194008934, "grad_norm": 0.819367622273486, "learning_rate": 1.8627737226277373e-06, "loss": 0.7996, "step": 22767 }, { "epoch": 0.6647397156287408, "grad_norm": 0.7188947589292093, "learning_rate": 1.8626115166261151e-06, "loss": 0.6573, "step": 22768 }, { "epoch": 0.6647689118565882, "grad_norm": 0.7422686935853995, "learning_rate": 1.8624493106244931e-06, "loss": 0.5971, "step": 22769 }, { "epoch": 0.6647981080844355, "grad_norm": 0.7865441886627305, "learning_rate": 1.8622871046228714e-06, "loss": 0.7029, "step": 22770 }, { "epoch": 0.6648273043122829, "grad_norm": 0.7500993378669416, "learning_rate": 1.8621248986212491e-06, "loss": 0.6578, "step": 22771 }, { "epoch": 0.6648565005401302, "grad_norm": 0.6871167589582579, "learning_rate": 1.8619626926196272e-06, "loss": 0.5683, "step": 22772 }, { "epoch": 0.6648856967679776, "grad_norm": 0.7202035504371105, "learning_rate": 1.861800486618005e-06, "loss": 0.6564, "step": 22773 }, { "epoch": 0.664914892995825, "grad_norm": 0.7227406023790558, "learning_rate": 1.861638280616383e-06, "loss": 0.6269, "step": 22774 }, { "epoch": 0.6649440892236723, "grad_norm": 0.7592825416349016, "learning_rate": 1.861476074614761e-06, "loss": 0.7088, "step": 22775 }, { "epoch": 0.6649732854515197, "grad_norm": 0.6731447999150157, "learning_rate": 1.8613138686131388e-06, "loss": 0.5552, "step": 22776 }, { "epoch": 0.665002481679367, "grad_norm": 0.6584115819469151, "learning_rate": 1.8611516626115168e-06, "loss": 0.5539, "step": 22777 }, { "epoch": 0.6650316779072144, "grad_norm": 0.6939040360323412, "learning_rate": 1.8609894566098946e-06, "loss": 0.5984, "step": 22778 }, { "epoch": 0.6650608741350618, "grad_norm": 0.7084203435089039, "learning_rate": 1.8608272506082728e-06, "loss": 0.5707, "step": 22779 }, { "epoch": 0.6650900703629091, "grad_norm": 0.7375532328944956, "learning_rate": 1.8606650446066508e-06, "loss": 0.6613, "step": 22780 }, { "epoch": 0.6651192665907565, "grad_norm": 0.6428892889763987, "learning_rate": 1.8605028386050286e-06, "loss": 0.5257, "step": 22781 }, { "epoch": 0.6651484628186038, "grad_norm": 0.7084404506212932, "learning_rate": 1.8603406326034066e-06, "loss": 0.6462, "step": 22782 }, { "epoch": 0.6651776590464512, "grad_norm": 0.6776571157631889, "learning_rate": 1.8601784266017844e-06, "loss": 0.621, "step": 22783 }, { "epoch": 0.6652068552742986, "grad_norm": 0.6720585282295762, "learning_rate": 1.8600162206001624e-06, "loss": 0.5386, "step": 22784 }, { "epoch": 0.6652360515021459, "grad_norm": 0.744819654598823, "learning_rate": 1.8598540145985402e-06, "loss": 0.6776, "step": 22785 }, { "epoch": 0.6652652477299933, "grad_norm": 0.7344117954608781, "learning_rate": 1.8596918085969182e-06, "loss": 0.6857, "step": 22786 }, { "epoch": 0.6652944439578407, "grad_norm": 0.7195882371276391, "learning_rate": 1.859529602595296e-06, "loss": 0.5946, "step": 22787 }, { "epoch": 0.665323640185688, "grad_norm": 0.7658347665081672, "learning_rate": 1.859367396593674e-06, "loss": 0.6996, "step": 22788 }, { "epoch": 0.6653528364135354, "grad_norm": 0.7561919137185199, "learning_rate": 1.8592051905920522e-06, "loss": 0.6804, "step": 22789 }, { "epoch": 0.6653820326413827, "grad_norm": 0.6894191464043518, "learning_rate": 1.85904298459043e-06, "loss": 0.5985, "step": 22790 }, { "epoch": 0.6654112288692301, "grad_norm": 0.6834082956265289, "learning_rate": 1.858880778588808e-06, "loss": 0.5729, "step": 22791 }, { "epoch": 0.6654404250970775, "grad_norm": 0.6790097861785007, "learning_rate": 1.8587185725871858e-06, "loss": 0.5938, "step": 22792 }, { "epoch": 0.6654696213249248, "grad_norm": 0.7791795372371013, "learning_rate": 1.8585563665855638e-06, "loss": 0.6221, "step": 22793 }, { "epoch": 0.6654988175527722, "grad_norm": 0.6938003605687094, "learning_rate": 1.8583941605839418e-06, "loss": 0.613, "step": 22794 }, { "epoch": 0.6655280137806195, "grad_norm": 0.7091261478319084, "learning_rate": 1.8582319545823196e-06, "loss": 0.6268, "step": 22795 }, { "epoch": 0.6655572100084669, "grad_norm": 0.6737629222720382, "learning_rate": 1.8580697485806976e-06, "loss": 0.5849, "step": 22796 }, { "epoch": 0.6655864062363143, "grad_norm": 0.7514589019980832, "learning_rate": 1.8579075425790754e-06, "loss": 0.6775, "step": 22797 }, { "epoch": 0.6656156024641616, "grad_norm": 0.7485599858676625, "learning_rate": 1.8577453365774536e-06, "loss": 0.68, "step": 22798 }, { "epoch": 0.665644798692009, "grad_norm": 0.683250178373716, "learning_rate": 1.8575831305758316e-06, "loss": 0.5403, "step": 22799 }, { "epoch": 0.6656739949198563, "grad_norm": 0.6541901309456948, "learning_rate": 1.8574209245742094e-06, "loss": 0.4959, "step": 22800 }, { "epoch": 0.6657031911477037, "grad_norm": 0.7199716996117129, "learning_rate": 1.8572587185725874e-06, "loss": 0.6525, "step": 22801 }, { "epoch": 0.6657323873755511, "grad_norm": 0.6949522097064327, "learning_rate": 1.8570965125709652e-06, "loss": 0.5591, "step": 22802 }, { "epoch": 0.6657615836033984, "grad_norm": 0.7248566026542838, "learning_rate": 1.8569343065693432e-06, "loss": 0.6521, "step": 22803 }, { "epoch": 0.6657907798312458, "grad_norm": 0.7262240301755691, "learning_rate": 1.856772100567721e-06, "loss": 0.6877, "step": 22804 }, { "epoch": 0.6658199760590932, "grad_norm": 0.713712318156784, "learning_rate": 1.856609894566099e-06, "loss": 0.6321, "step": 22805 }, { "epoch": 0.6658491722869405, "grad_norm": 0.7243477256424614, "learning_rate": 1.8564476885644768e-06, "loss": 0.6287, "step": 22806 }, { "epoch": 0.6658783685147879, "grad_norm": 0.7189378629539286, "learning_rate": 1.8562854825628549e-06, "loss": 0.656, "step": 22807 }, { "epoch": 0.6659075647426352, "grad_norm": 0.660601807313098, "learning_rate": 1.856123276561233e-06, "loss": 0.5501, "step": 22808 }, { "epoch": 0.6659367609704826, "grad_norm": 0.6231977448969406, "learning_rate": 1.8559610705596109e-06, "loss": 0.4828, "step": 22809 }, { "epoch": 0.66596595719833, "grad_norm": 0.7141951248939313, "learning_rate": 1.8557988645579889e-06, "loss": 0.6113, "step": 22810 }, { "epoch": 0.6659951534261773, "grad_norm": 0.7363519834519989, "learning_rate": 1.8556366585563667e-06, "loss": 0.6682, "step": 22811 }, { "epoch": 0.6660243496540247, "grad_norm": 0.7464475240626641, "learning_rate": 1.8554744525547447e-06, "loss": 0.6628, "step": 22812 }, { "epoch": 0.666053545881872, "grad_norm": 0.7503484574007558, "learning_rate": 1.8553122465531227e-06, "loss": 0.6423, "step": 22813 }, { "epoch": 0.6660827421097194, "grad_norm": 0.6831327311099095, "learning_rate": 1.8551500405515005e-06, "loss": 0.6014, "step": 22814 }, { "epoch": 0.6661119383375668, "grad_norm": 0.7336095017372342, "learning_rate": 1.8549878345498785e-06, "loss": 0.6318, "step": 22815 }, { "epoch": 0.6661411345654141, "grad_norm": 0.6862209591706525, "learning_rate": 1.8548256285482563e-06, "loss": 0.6277, "step": 22816 }, { "epoch": 0.6661703307932615, "grad_norm": 0.721879706848305, "learning_rate": 1.8546634225466345e-06, "loss": 0.6692, "step": 22817 }, { "epoch": 0.6661995270211089, "grad_norm": 0.6742924186261924, "learning_rate": 1.8545012165450125e-06, "loss": 0.5644, "step": 22818 }, { "epoch": 0.6662287232489562, "grad_norm": 0.6970494047773464, "learning_rate": 1.8543390105433903e-06, "loss": 0.5554, "step": 22819 }, { "epoch": 0.6662579194768036, "grad_norm": 0.6875439790063432, "learning_rate": 1.8541768045417683e-06, "loss": 0.5934, "step": 22820 }, { "epoch": 0.6662871157046509, "grad_norm": 0.7182065382508807, "learning_rate": 1.854014598540146e-06, "loss": 0.5981, "step": 22821 }, { "epoch": 0.6663163119324983, "grad_norm": 0.7100531344426139, "learning_rate": 1.8538523925385241e-06, "loss": 0.5207, "step": 22822 }, { "epoch": 0.6663455081603457, "grad_norm": 0.7736136194589538, "learning_rate": 1.853690186536902e-06, "loss": 0.6615, "step": 22823 }, { "epoch": 0.666374704388193, "grad_norm": 0.7474435645722911, "learning_rate": 1.85352798053528e-06, "loss": 0.6712, "step": 22824 }, { "epoch": 0.6664039006160404, "grad_norm": 0.790250793283511, "learning_rate": 1.8533657745336577e-06, "loss": 0.6881, "step": 22825 }, { "epoch": 0.6664330968438877, "grad_norm": 0.6999586258427801, "learning_rate": 1.8532035685320357e-06, "loss": 0.5682, "step": 22826 }, { "epoch": 0.6664622930717351, "grad_norm": 0.8573778629303316, "learning_rate": 1.853041362530414e-06, "loss": 0.7048, "step": 22827 }, { "epoch": 0.6664914892995825, "grad_norm": 0.7367440266996469, "learning_rate": 1.8528791565287917e-06, "loss": 0.6353, "step": 22828 }, { "epoch": 0.6665206855274298, "grad_norm": 0.7401617780354857, "learning_rate": 1.8527169505271697e-06, "loss": 0.678, "step": 22829 }, { "epoch": 0.6665498817552772, "grad_norm": 0.6958281014993231, "learning_rate": 1.8525547445255475e-06, "loss": 0.6166, "step": 22830 }, { "epoch": 0.6665790779831245, "grad_norm": 0.80278388277434, "learning_rate": 1.8523925385239255e-06, "loss": 0.7621, "step": 22831 }, { "epoch": 0.6666082742109719, "grad_norm": 0.762528437595265, "learning_rate": 1.8522303325223035e-06, "loss": 0.6398, "step": 22832 }, { "epoch": 0.6666374704388193, "grad_norm": 0.7999096666319424, "learning_rate": 1.8520681265206813e-06, "loss": 0.6943, "step": 22833 }, { "epoch": 0.6666666666666666, "grad_norm": 0.6882151890213324, "learning_rate": 1.8519059205190593e-06, "loss": 0.6331, "step": 22834 }, { "epoch": 0.666695862894514, "grad_norm": 0.8072352593937109, "learning_rate": 1.8517437145174371e-06, "loss": 0.7112, "step": 22835 }, { "epoch": 0.6667250591223614, "grad_norm": 0.7118153361936783, "learning_rate": 1.8515815085158154e-06, "loss": 0.5955, "step": 22836 }, { "epoch": 0.6667542553502087, "grad_norm": 0.6689300137044669, "learning_rate": 1.8514193025141934e-06, "loss": 0.5565, "step": 22837 }, { "epoch": 0.6667834515780561, "grad_norm": 0.7182119851507, "learning_rate": 1.8512570965125712e-06, "loss": 0.5527, "step": 22838 }, { "epoch": 0.6668126478059034, "grad_norm": 0.7378415165184017, "learning_rate": 1.8510948905109492e-06, "loss": 0.6505, "step": 22839 }, { "epoch": 0.6668418440337508, "grad_norm": 0.7210481276043801, "learning_rate": 1.850932684509327e-06, "loss": 0.6233, "step": 22840 }, { "epoch": 0.6668710402615982, "grad_norm": 0.7471549852075755, "learning_rate": 1.850770478507705e-06, "loss": 0.6665, "step": 22841 }, { "epoch": 0.6669002364894455, "grad_norm": 0.7262331943822358, "learning_rate": 1.8506082725060828e-06, "loss": 0.6491, "step": 22842 }, { "epoch": 0.6669294327172929, "grad_norm": 0.7268262130695886, "learning_rate": 1.8504460665044608e-06, "loss": 0.6231, "step": 22843 }, { "epoch": 0.6669586289451402, "grad_norm": 0.775959744248038, "learning_rate": 1.8502838605028386e-06, "loss": 0.7628, "step": 22844 }, { "epoch": 0.6669878251729876, "grad_norm": 0.8787620676163089, "learning_rate": 1.8501216545012168e-06, "loss": 0.673, "step": 22845 }, { "epoch": 0.667017021400835, "grad_norm": 0.7414666077727325, "learning_rate": 1.8499594484995948e-06, "loss": 0.718, "step": 22846 }, { "epoch": 0.6670462176286823, "grad_norm": 0.7829194256357161, "learning_rate": 1.8497972424979726e-06, "loss": 0.7598, "step": 22847 }, { "epoch": 0.6670754138565297, "grad_norm": 0.7177902035406651, "learning_rate": 1.8496350364963506e-06, "loss": 0.6423, "step": 22848 }, { "epoch": 0.667104610084377, "grad_norm": 0.6965734465426072, "learning_rate": 1.8494728304947284e-06, "loss": 0.6442, "step": 22849 }, { "epoch": 0.6671338063122244, "grad_norm": 0.7813883359407198, "learning_rate": 1.8493106244931064e-06, "loss": 0.7693, "step": 22850 }, { "epoch": 0.6671630025400718, "grad_norm": 0.6444797589219604, "learning_rate": 1.8491484184914844e-06, "loss": 0.5134, "step": 22851 }, { "epoch": 0.6671921987679191, "grad_norm": 0.8057657414656487, "learning_rate": 1.8489862124898622e-06, "loss": 0.6988, "step": 22852 }, { "epoch": 0.6672213949957665, "grad_norm": 0.7395405904693396, "learning_rate": 1.8488240064882402e-06, "loss": 0.6477, "step": 22853 }, { "epoch": 0.6672505912236139, "grad_norm": 0.7690281597488288, "learning_rate": 1.848661800486618e-06, "loss": 0.6957, "step": 22854 }, { "epoch": 0.6672797874514613, "grad_norm": 0.7072962197559058, "learning_rate": 1.8484995944849962e-06, "loss": 0.6247, "step": 22855 }, { "epoch": 0.6673089836793087, "grad_norm": 0.7199106230489144, "learning_rate": 1.8483373884833742e-06, "loss": 0.6558, "step": 22856 }, { "epoch": 0.667338179907156, "grad_norm": 0.7540206844905893, "learning_rate": 1.848175182481752e-06, "loss": 0.7154, "step": 22857 }, { "epoch": 0.6673673761350034, "grad_norm": 0.7695695172878896, "learning_rate": 1.84801297648013e-06, "loss": 0.6422, "step": 22858 }, { "epoch": 0.6673965723628508, "grad_norm": 0.7309343620410407, "learning_rate": 1.8478507704785078e-06, "loss": 0.6408, "step": 22859 }, { "epoch": 0.6674257685906981, "grad_norm": 0.7442397976957768, "learning_rate": 1.8476885644768858e-06, "loss": 0.6634, "step": 22860 }, { "epoch": 0.6674549648185455, "grad_norm": 0.7616873127737456, "learning_rate": 1.8475263584752636e-06, "loss": 0.6583, "step": 22861 }, { "epoch": 0.6674841610463929, "grad_norm": 0.7472019562077542, "learning_rate": 1.8473641524736416e-06, "loss": 0.58, "step": 22862 }, { "epoch": 0.6675133572742402, "grad_norm": 0.7631210921180847, "learning_rate": 1.8472019464720194e-06, "loss": 0.6812, "step": 22863 }, { "epoch": 0.6675425535020876, "grad_norm": 0.8040148130389402, "learning_rate": 1.8470397404703976e-06, "loss": 0.728, "step": 22864 }, { "epoch": 0.6675717497299349, "grad_norm": 0.6743667313960512, "learning_rate": 1.8468775344687756e-06, "loss": 0.5843, "step": 22865 }, { "epoch": 0.6676009459577823, "grad_norm": 0.7457088014513861, "learning_rate": 1.8467153284671534e-06, "loss": 0.6311, "step": 22866 }, { "epoch": 0.6676301421856297, "grad_norm": 0.7230920525271648, "learning_rate": 1.8465531224655314e-06, "loss": 0.6288, "step": 22867 }, { "epoch": 0.667659338413477, "grad_norm": 0.7093452763633903, "learning_rate": 1.8463909164639092e-06, "loss": 0.6406, "step": 22868 }, { "epoch": 0.6676885346413244, "grad_norm": 0.7382628982004346, "learning_rate": 1.8462287104622873e-06, "loss": 0.651, "step": 22869 }, { "epoch": 0.6677177308691717, "grad_norm": 0.700356502883836, "learning_rate": 1.8460665044606653e-06, "loss": 0.6098, "step": 22870 }, { "epoch": 0.6677469270970191, "grad_norm": 0.7075150267564052, "learning_rate": 1.845904298459043e-06, "loss": 0.6192, "step": 22871 }, { "epoch": 0.6677761233248665, "grad_norm": 0.6916168466099205, "learning_rate": 1.845742092457421e-06, "loss": 0.5875, "step": 22872 }, { "epoch": 0.6678053195527138, "grad_norm": 0.7018397154047523, "learning_rate": 1.8455798864557989e-06, "loss": 0.6625, "step": 22873 }, { "epoch": 0.6678345157805612, "grad_norm": 0.737435049440399, "learning_rate": 1.845417680454177e-06, "loss": 0.6578, "step": 22874 }, { "epoch": 0.6678637120084085, "grad_norm": 0.7342468539195937, "learning_rate": 1.845255474452555e-06, "loss": 0.663, "step": 22875 }, { "epoch": 0.6678929082362559, "grad_norm": 0.7620642137474061, "learning_rate": 1.8450932684509329e-06, "loss": 0.6785, "step": 22876 }, { "epoch": 0.6679221044641033, "grad_norm": 0.678967775541063, "learning_rate": 1.8449310624493109e-06, "loss": 0.5755, "step": 22877 }, { "epoch": 0.6679513006919506, "grad_norm": 0.6831929648061581, "learning_rate": 1.8447688564476887e-06, "loss": 0.554, "step": 22878 }, { "epoch": 0.667980496919798, "grad_norm": 0.6422712139736083, "learning_rate": 1.8446066504460667e-06, "loss": 0.5293, "step": 22879 }, { "epoch": 0.6680096931476454, "grad_norm": 0.7287304980613346, "learning_rate": 1.8444444444444445e-06, "loss": 0.6286, "step": 22880 }, { "epoch": 0.6680388893754927, "grad_norm": 0.6963413289284428, "learning_rate": 1.8442822384428225e-06, "loss": 0.6214, "step": 22881 }, { "epoch": 0.6680680856033401, "grad_norm": 0.755336623387219, "learning_rate": 1.8441200324412003e-06, "loss": 0.7189, "step": 22882 }, { "epoch": 0.6680972818311874, "grad_norm": 0.6996551644128359, "learning_rate": 1.8439578264395785e-06, "loss": 0.6511, "step": 22883 }, { "epoch": 0.6681264780590348, "grad_norm": 0.7206743076496669, "learning_rate": 1.8437956204379565e-06, "loss": 0.6197, "step": 22884 }, { "epoch": 0.6681556742868822, "grad_norm": 0.7042774678144582, "learning_rate": 1.8436334144363343e-06, "loss": 0.6045, "step": 22885 }, { "epoch": 0.6681848705147295, "grad_norm": 0.7323110531807978, "learning_rate": 1.8434712084347123e-06, "loss": 0.6517, "step": 22886 }, { "epoch": 0.6682140667425769, "grad_norm": 0.7990956138770375, "learning_rate": 1.84330900243309e-06, "loss": 0.7249, "step": 22887 }, { "epoch": 0.6682432629704242, "grad_norm": 0.7168969301199175, "learning_rate": 1.8431467964314681e-06, "loss": 0.6509, "step": 22888 }, { "epoch": 0.6682724591982716, "grad_norm": 0.8246330039368762, "learning_rate": 1.842984590429846e-06, "loss": 0.673, "step": 22889 }, { "epoch": 0.668301655426119, "grad_norm": 0.7521419575571632, "learning_rate": 1.842822384428224e-06, "loss": 0.6288, "step": 22890 }, { "epoch": 0.6683308516539663, "grad_norm": 0.7242337500734641, "learning_rate": 1.842660178426602e-06, "loss": 0.6469, "step": 22891 }, { "epoch": 0.6683600478818137, "grad_norm": 0.7094177368151039, "learning_rate": 1.8424979724249797e-06, "loss": 0.652, "step": 22892 }, { "epoch": 0.668389244109661, "grad_norm": 0.7255885754681854, "learning_rate": 1.842335766423358e-06, "loss": 0.6207, "step": 22893 }, { "epoch": 0.6684184403375084, "grad_norm": 0.7245542165716036, "learning_rate": 1.842173560421736e-06, "loss": 0.6541, "step": 22894 }, { "epoch": 0.6684476365653558, "grad_norm": 0.641650614173242, "learning_rate": 1.8420113544201137e-06, "loss": 0.5187, "step": 22895 }, { "epoch": 0.6684768327932031, "grad_norm": 0.7053391151689506, "learning_rate": 1.8418491484184917e-06, "loss": 0.5941, "step": 22896 }, { "epoch": 0.6685060290210505, "grad_norm": 0.7112899066765351, "learning_rate": 1.8416869424168695e-06, "loss": 0.6243, "step": 22897 }, { "epoch": 0.6685352252488979, "grad_norm": 0.7796420685169055, "learning_rate": 1.8415247364152475e-06, "loss": 0.7043, "step": 22898 }, { "epoch": 0.6685644214767452, "grad_norm": 0.6523872004274821, "learning_rate": 1.8413625304136253e-06, "loss": 0.5707, "step": 22899 }, { "epoch": 0.6685936177045926, "grad_norm": 0.7180160550891851, "learning_rate": 1.8412003244120033e-06, "loss": 0.619, "step": 22900 }, { "epoch": 0.6686228139324399, "grad_norm": 0.7233089549605348, "learning_rate": 1.8410381184103811e-06, "loss": 0.6174, "step": 22901 }, { "epoch": 0.6686520101602873, "grad_norm": 0.7151320832947513, "learning_rate": 1.8408759124087594e-06, "loss": 0.5749, "step": 22902 }, { "epoch": 0.6686812063881347, "grad_norm": 0.6552923109729504, "learning_rate": 1.8407137064071374e-06, "loss": 0.5141, "step": 22903 }, { "epoch": 0.668710402615982, "grad_norm": 0.70578106372563, "learning_rate": 1.8405515004055152e-06, "loss": 0.6264, "step": 22904 }, { "epoch": 0.6687395988438294, "grad_norm": 0.68343378231984, "learning_rate": 1.8403892944038932e-06, "loss": 0.5438, "step": 22905 }, { "epoch": 0.6687687950716767, "grad_norm": 0.7170333268038032, "learning_rate": 1.840227088402271e-06, "loss": 0.6478, "step": 22906 }, { "epoch": 0.6687979912995241, "grad_norm": 0.7604228153338186, "learning_rate": 1.840064882400649e-06, "loss": 0.6759, "step": 22907 }, { "epoch": 0.6688271875273715, "grad_norm": 0.7521377149975433, "learning_rate": 1.8399026763990268e-06, "loss": 0.6785, "step": 22908 }, { "epoch": 0.6688563837552188, "grad_norm": 0.7653732577298347, "learning_rate": 1.8397404703974048e-06, "loss": 0.6499, "step": 22909 }, { "epoch": 0.6688855799830662, "grad_norm": 0.7105567005945279, "learning_rate": 1.8395782643957828e-06, "loss": 0.6024, "step": 22910 }, { "epoch": 0.6689147762109136, "grad_norm": 0.7699322074052523, "learning_rate": 1.8394160583941606e-06, "loss": 0.6917, "step": 22911 }, { "epoch": 0.6689439724387609, "grad_norm": 0.8556583818882788, "learning_rate": 1.8392538523925388e-06, "loss": 0.737, "step": 22912 }, { "epoch": 0.6689731686666083, "grad_norm": 0.682695681326543, "learning_rate": 1.8390916463909168e-06, "loss": 0.591, "step": 22913 }, { "epoch": 0.6690023648944556, "grad_norm": 0.7172914332680416, "learning_rate": 1.8389294403892946e-06, "loss": 0.6116, "step": 22914 }, { "epoch": 0.669031561122303, "grad_norm": 0.730655165853112, "learning_rate": 1.8387672343876726e-06, "loss": 0.5999, "step": 22915 }, { "epoch": 0.6690607573501504, "grad_norm": 0.7474766425931095, "learning_rate": 1.8386050283860504e-06, "loss": 0.673, "step": 22916 }, { "epoch": 0.6690899535779977, "grad_norm": 0.7148436863174393, "learning_rate": 1.8384428223844284e-06, "loss": 0.6009, "step": 22917 }, { "epoch": 0.6691191498058451, "grad_norm": 0.7655043629046877, "learning_rate": 1.8382806163828062e-06, "loss": 0.6789, "step": 22918 }, { "epoch": 0.6691483460336924, "grad_norm": 0.7113332241995809, "learning_rate": 1.8381184103811842e-06, "loss": 0.6469, "step": 22919 }, { "epoch": 0.6691775422615398, "grad_norm": 0.6702854510984942, "learning_rate": 1.837956204379562e-06, "loss": 0.5244, "step": 22920 }, { "epoch": 0.6692067384893872, "grad_norm": 0.7110959112275077, "learning_rate": 1.8377939983779402e-06, "loss": 0.6317, "step": 22921 }, { "epoch": 0.6692359347172345, "grad_norm": 0.725834295366961, "learning_rate": 1.8376317923763182e-06, "loss": 0.6142, "step": 22922 }, { "epoch": 0.6692651309450819, "grad_norm": 0.7679017791441831, "learning_rate": 1.837469586374696e-06, "loss": 0.6838, "step": 22923 }, { "epoch": 0.6692943271729292, "grad_norm": 0.7581253480656763, "learning_rate": 1.837307380373074e-06, "loss": 0.6263, "step": 22924 }, { "epoch": 0.6693235234007766, "grad_norm": 0.7161341241036351, "learning_rate": 1.8371451743714518e-06, "loss": 0.6595, "step": 22925 }, { "epoch": 0.669352719628624, "grad_norm": 0.7499688890749857, "learning_rate": 1.8369829683698298e-06, "loss": 0.7041, "step": 22926 }, { "epoch": 0.6693819158564713, "grad_norm": 0.7014790266799754, "learning_rate": 1.8368207623682076e-06, "loss": 0.5895, "step": 22927 }, { "epoch": 0.6694111120843187, "grad_norm": 0.7243303031276596, "learning_rate": 1.8366585563665856e-06, "loss": 0.6857, "step": 22928 }, { "epoch": 0.669440308312166, "grad_norm": 0.6817587640200462, "learning_rate": 1.8364963503649636e-06, "loss": 0.5561, "step": 22929 }, { "epoch": 0.6694695045400134, "grad_norm": 0.7097425365315465, "learning_rate": 1.8363341443633416e-06, "loss": 0.638, "step": 22930 }, { "epoch": 0.6694987007678608, "grad_norm": 0.6930740919054612, "learning_rate": 1.8361719383617196e-06, "loss": 0.6047, "step": 22931 }, { "epoch": 0.6695278969957081, "grad_norm": 0.7160647004861136, "learning_rate": 1.8360097323600977e-06, "loss": 0.6248, "step": 22932 }, { "epoch": 0.6695570932235555, "grad_norm": 0.6975286696019231, "learning_rate": 1.8358475263584755e-06, "loss": 0.5098, "step": 22933 }, { "epoch": 0.6695862894514029, "grad_norm": 0.7164664116837461, "learning_rate": 1.8356853203568535e-06, "loss": 0.6653, "step": 22934 }, { "epoch": 0.6696154856792502, "grad_norm": 0.6400692618244663, "learning_rate": 1.8355231143552313e-06, "loss": 0.5083, "step": 22935 }, { "epoch": 0.6696446819070976, "grad_norm": 0.6995183775555752, "learning_rate": 1.8353609083536093e-06, "loss": 0.6105, "step": 22936 }, { "epoch": 0.6696738781349449, "grad_norm": 0.7918931796888566, "learning_rate": 1.835198702351987e-06, "loss": 0.7788, "step": 22937 }, { "epoch": 0.6697030743627923, "grad_norm": 0.7632396896936884, "learning_rate": 1.835036496350365e-06, "loss": 0.7263, "step": 22938 }, { "epoch": 0.6697322705906397, "grad_norm": 0.6641979617937848, "learning_rate": 1.8348742903487429e-06, "loss": 0.5389, "step": 22939 }, { "epoch": 0.669761466818487, "grad_norm": 0.7098487426217388, "learning_rate": 1.834712084347121e-06, "loss": 0.6277, "step": 22940 }, { "epoch": 0.6697906630463344, "grad_norm": 0.861998750571976, "learning_rate": 1.834549878345499e-06, "loss": 0.6844, "step": 22941 }, { "epoch": 0.6698198592741818, "grad_norm": 0.7219612653382765, "learning_rate": 1.8343876723438769e-06, "loss": 0.6279, "step": 22942 }, { "epoch": 0.6698490555020291, "grad_norm": 0.7153352202479124, "learning_rate": 1.8342254663422549e-06, "loss": 0.6118, "step": 22943 }, { "epoch": 0.6698782517298765, "grad_norm": 0.6659987481005362, "learning_rate": 1.8340632603406327e-06, "loss": 0.5654, "step": 22944 }, { "epoch": 0.6699074479577238, "grad_norm": 0.7555060665403834, "learning_rate": 1.8339010543390107e-06, "loss": 0.7417, "step": 22945 }, { "epoch": 0.6699366441855712, "grad_norm": 0.7187703934616185, "learning_rate": 1.8337388483373885e-06, "loss": 0.6354, "step": 22946 }, { "epoch": 0.6699658404134186, "grad_norm": 0.6710999855961861, "learning_rate": 1.8335766423357665e-06, "loss": 0.5416, "step": 22947 }, { "epoch": 0.6699950366412659, "grad_norm": 0.7598002006094786, "learning_rate": 1.8334144363341445e-06, "loss": 0.6979, "step": 22948 }, { "epoch": 0.6700242328691133, "grad_norm": 0.6983779708453635, "learning_rate": 1.8332522303325225e-06, "loss": 0.5827, "step": 22949 }, { "epoch": 0.6700534290969606, "grad_norm": 0.6768771318978746, "learning_rate": 1.8330900243309005e-06, "loss": 0.5614, "step": 22950 }, { "epoch": 0.670082625324808, "grad_norm": 0.7049079559312058, "learning_rate": 1.8329278183292783e-06, "loss": 0.6457, "step": 22951 }, { "epoch": 0.6701118215526554, "grad_norm": 0.6678904287661007, "learning_rate": 1.8327656123276563e-06, "loss": 0.5773, "step": 22952 }, { "epoch": 0.6701410177805027, "grad_norm": 0.6464021550189997, "learning_rate": 1.8326034063260343e-06, "loss": 0.5489, "step": 22953 }, { "epoch": 0.6701702140083501, "grad_norm": 0.7372963865788369, "learning_rate": 1.8324412003244121e-06, "loss": 0.6798, "step": 22954 }, { "epoch": 0.6701994102361974, "grad_norm": 0.7489640894030166, "learning_rate": 1.8322789943227901e-06, "loss": 0.6708, "step": 22955 }, { "epoch": 0.6702286064640448, "grad_norm": 0.7346184101913423, "learning_rate": 1.832116788321168e-06, "loss": 0.6661, "step": 22956 }, { "epoch": 0.6702578026918922, "grad_norm": 0.7204146325589331, "learning_rate": 1.831954582319546e-06, "loss": 0.6263, "step": 22957 }, { "epoch": 0.6702869989197395, "grad_norm": 0.7024863953060279, "learning_rate": 1.8317923763179237e-06, "loss": 0.6055, "step": 22958 }, { "epoch": 0.6703161951475869, "grad_norm": 0.7830364473025353, "learning_rate": 1.831630170316302e-06, "loss": 0.6619, "step": 22959 }, { "epoch": 0.6703453913754343, "grad_norm": 0.6967331038930115, "learning_rate": 1.83146796431468e-06, "loss": 0.5966, "step": 22960 }, { "epoch": 0.6703745876032816, "grad_norm": 0.7189328215301528, "learning_rate": 1.8313057583130577e-06, "loss": 0.5619, "step": 22961 }, { "epoch": 0.670403783831129, "grad_norm": 0.7240170839389722, "learning_rate": 1.8311435523114357e-06, "loss": 0.6201, "step": 22962 }, { "epoch": 0.6704329800589763, "grad_norm": 0.696106448834946, "learning_rate": 1.8309813463098135e-06, "loss": 0.6038, "step": 22963 }, { "epoch": 0.6704621762868237, "grad_norm": 0.7109240095243267, "learning_rate": 1.8308191403081915e-06, "loss": 0.5894, "step": 22964 }, { "epoch": 0.6704913725146711, "grad_norm": 0.775527033393862, "learning_rate": 1.8306569343065693e-06, "loss": 0.6953, "step": 22965 }, { "epoch": 0.6705205687425184, "grad_norm": 0.6993744885887492, "learning_rate": 1.8304947283049473e-06, "loss": 0.6497, "step": 22966 }, { "epoch": 0.6705497649703658, "grad_norm": 0.7000123432669023, "learning_rate": 1.8303325223033254e-06, "loss": 0.5872, "step": 22967 }, { "epoch": 0.6705789611982131, "grad_norm": 0.7384955387764724, "learning_rate": 1.8301703163017034e-06, "loss": 0.6603, "step": 22968 }, { "epoch": 0.6706081574260605, "grad_norm": 0.7682054696077246, "learning_rate": 1.8300081103000814e-06, "loss": 0.6416, "step": 22969 }, { "epoch": 0.6706373536539079, "grad_norm": 0.6551902567066796, "learning_rate": 1.8298459042984592e-06, "loss": 0.536, "step": 22970 }, { "epoch": 0.6706665498817552, "grad_norm": 0.7139749306677486, "learning_rate": 1.8296836982968372e-06, "loss": 0.6144, "step": 22971 }, { "epoch": 0.6706957461096026, "grad_norm": 0.684196618094806, "learning_rate": 1.8295214922952152e-06, "loss": 0.5831, "step": 22972 }, { "epoch": 0.67072494233745, "grad_norm": 0.7179771354687549, "learning_rate": 1.829359286293593e-06, "loss": 0.5887, "step": 22973 }, { "epoch": 0.6707541385652973, "grad_norm": 0.727769007932277, "learning_rate": 1.829197080291971e-06, "loss": 0.6804, "step": 22974 }, { "epoch": 0.6707833347931448, "grad_norm": 0.7162837539165982, "learning_rate": 1.8290348742903488e-06, "loss": 0.6526, "step": 22975 }, { "epoch": 0.6708125310209921, "grad_norm": 0.7789907449895753, "learning_rate": 1.8288726682887268e-06, "loss": 0.6923, "step": 22976 }, { "epoch": 0.6708417272488395, "grad_norm": 0.7779949757956801, "learning_rate": 1.8287104622871046e-06, "loss": 0.7244, "step": 22977 }, { "epoch": 0.6708709234766869, "grad_norm": 0.7330590003762216, "learning_rate": 1.8285482562854828e-06, "loss": 0.6677, "step": 22978 }, { "epoch": 0.6709001197045342, "grad_norm": 0.6807607340838118, "learning_rate": 1.8283860502838608e-06, "loss": 0.5625, "step": 22979 }, { "epoch": 0.6709293159323816, "grad_norm": 0.7113312616693181, "learning_rate": 1.8282238442822386e-06, "loss": 0.6004, "step": 22980 }, { "epoch": 0.670958512160229, "grad_norm": 0.7148546548195853, "learning_rate": 1.8280616382806166e-06, "loss": 0.6309, "step": 22981 }, { "epoch": 0.6709877083880763, "grad_norm": 0.8165618184976101, "learning_rate": 1.8278994322789944e-06, "loss": 0.723, "step": 22982 }, { "epoch": 0.6710169046159237, "grad_norm": 0.7544806119133998, "learning_rate": 1.8277372262773724e-06, "loss": 0.6937, "step": 22983 }, { "epoch": 0.671046100843771, "grad_norm": 0.7217081893348917, "learning_rate": 1.8275750202757502e-06, "loss": 0.6632, "step": 22984 }, { "epoch": 0.6710752970716184, "grad_norm": 0.9017884725375463, "learning_rate": 1.8274128142741282e-06, "loss": 0.6716, "step": 22985 }, { "epoch": 0.6711044932994658, "grad_norm": 0.7057794591904076, "learning_rate": 1.8272506082725062e-06, "loss": 0.5952, "step": 22986 }, { "epoch": 0.6711336895273131, "grad_norm": 0.724516801366736, "learning_rate": 1.8270884022708842e-06, "loss": 0.6386, "step": 22987 }, { "epoch": 0.6711628857551605, "grad_norm": 0.7036264708603767, "learning_rate": 1.8269261962692622e-06, "loss": 0.6185, "step": 22988 }, { "epoch": 0.6711920819830078, "grad_norm": 0.7663541543801295, "learning_rate": 1.82676399026764e-06, "loss": 0.656, "step": 22989 }, { "epoch": 0.6712212782108552, "grad_norm": 0.7537181156007127, "learning_rate": 1.826601784266018e-06, "loss": 0.6925, "step": 22990 }, { "epoch": 0.6712504744387026, "grad_norm": 0.7479665804329628, "learning_rate": 1.826439578264396e-06, "loss": 0.6788, "step": 22991 }, { "epoch": 0.6712796706665499, "grad_norm": 0.6966341060101227, "learning_rate": 1.8262773722627738e-06, "loss": 0.6, "step": 22992 }, { "epoch": 0.6713088668943973, "grad_norm": 0.7536386771481539, "learning_rate": 1.8261151662611518e-06, "loss": 0.6728, "step": 22993 }, { "epoch": 0.6713380631222446, "grad_norm": 0.8379947185206278, "learning_rate": 1.8259529602595296e-06, "loss": 0.6507, "step": 22994 }, { "epoch": 0.671367259350092, "grad_norm": 0.7084217915730447, "learning_rate": 1.8257907542579076e-06, "loss": 0.6189, "step": 22995 }, { "epoch": 0.6713964555779394, "grad_norm": 0.7175115558483723, "learning_rate": 1.8256285482562854e-06, "loss": 0.6491, "step": 22996 }, { "epoch": 0.6714256518057867, "grad_norm": 0.7419193221659965, "learning_rate": 1.8254663422546637e-06, "loss": 0.6907, "step": 22997 }, { "epoch": 0.6714548480336341, "grad_norm": 0.7431320829151329, "learning_rate": 1.8253041362530417e-06, "loss": 0.6783, "step": 22998 }, { "epoch": 0.6714840442614814, "grad_norm": 0.6720046619282227, "learning_rate": 1.8251419302514195e-06, "loss": 0.5913, "step": 22999 }, { "epoch": 0.6715132404893288, "grad_norm": 0.7507117969518493, "learning_rate": 1.8249797242497975e-06, "loss": 0.657, "step": 23000 }, { "epoch": 0.6715424367171762, "grad_norm": 0.7059667933657847, "learning_rate": 1.8248175182481753e-06, "loss": 0.5991, "step": 23001 }, { "epoch": 0.6715716329450235, "grad_norm": 0.7664665532571477, "learning_rate": 1.8246553122465533e-06, "loss": 0.68, "step": 23002 }, { "epoch": 0.6716008291728709, "grad_norm": 0.7090418933608386, "learning_rate": 1.824493106244931e-06, "loss": 0.6272, "step": 23003 }, { "epoch": 0.6716300254007183, "grad_norm": 0.709258183054679, "learning_rate": 1.824330900243309e-06, "loss": 0.6465, "step": 23004 }, { "epoch": 0.6716592216285656, "grad_norm": 0.7286892742797133, "learning_rate": 1.824168694241687e-06, "loss": 0.6548, "step": 23005 }, { "epoch": 0.671688417856413, "grad_norm": 0.7102521075017314, "learning_rate": 1.824006488240065e-06, "loss": 0.6559, "step": 23006 }, { "epoch": 0.6717176140842603, "grad_norm": 0.715623495871481, "learning_rate": 1.823844282238443e-06, "loss": 0.6616, "step": 23007 }, { "epoch": 0.6717468103121077, "grad_norm": 0.7320041484875763, "learning_rate": 1.8236820762368209e-06, "loss": 0.6947, "step": 23008 }, { "epoch": 0.6717760065399551, "grad_norm": 0.7656398836527077, "learning_rate": 1.8235198702351989e-06, "loss": 0.7124, "step": 23009 }, { "epoch": 0.6718052027678024, "grad_norm": 0.6486012131031117, "learning_rate": 1.8233576642335769e-06, "loss": 0.473, "step": 23010 }, { "epoch": 0.6718343989956498, "grad_norm": 0.7667345789505841, "learning_rate": 1.8231954582319547e-06, "loss": 0.6551, "step": 23011 }, { "epoch": 0.6718635952234971, "grad_norm": 0.7434540252202043, "learning_rate": 1.8230332522303327e-06, "loss": 0.7003, "step": 23012 }, { "epoch": 0.6718927914513445, "grad_norm": 0.7614400649498309, "learning_rate": 1.8228710462287105e-06, "loss": 0.7413, "step": 23013 }, { "epoch": 0.6719219876791919, "grad_norm": 0.7301814268099563, "learning_rate": 1.8227088402270885e-06, "loss": 0.6977, "step": 23014 }, { "epoch": 0.6719511839070392, "grad_norm": 0.73066932832935, "learning_rate": 1.8225466342254667e-06, "loss": 0.6353, "step": 23015 }, { "epoch": 0.6719803801348866, "grad_norm": 0.700707531273361, "learning_rate": 1.8223844282238445e-06, "loss": 0.5879, "step": 23016 }, { "epoch": 0.672009576362734, "grad_norm": 0.6741792961750995, "learning_rate": 1.8222222222222225e-06, "loss": 0.566, "step": 23017 }, { "epoch": 0.6720387725905813, "grad_norm": 0.7563357509746226, "learning_rate": 1.8220600162206003e-06, "loss": 0.5904, "step": 23018 }, { "epoch": 0.6720679688184287, "grad_norm": 0.759298302915877, "learning_rate": 1.8218978102189783e-06, "loss": 0.6828, "step": 23019 }, { "epoch": 0.672097165046276, "grad_norm": 0.7193042198458832, "learning_rate": 1.8217356042173561e-06, "loss": 0.6445, "step": 23020 }, { "epoch": 0.6721263612741234, "grad_norm": 0.7422156350209399, "learning_rate": 1.8215733982157341e-06, "loss": 0.6764, "step": 23021 }, { "epoch": 0.6721555575019708, "grad_norm": 0.6831697555838423, "learning_rate": 1.821411192214112e-06, "loss": 0.5926, "step": 23022 }, { "epoch": 0.6721847537298181, "grad_norm": 0.7086550387610026, "learning_rate": 1.82124898621249e-06, "loss": 0.5666, "step": 23023 }, { "epoch": 0.6722139499576655, "grad_norm": 0.7032963590829804, "learning_rate": 1.8210867802108677e-06, "loss": 0.6199, "step": 23024 }, { "epoch": 0.6722431461855128, "grad_norm": 0.7429103115550426, "learning_rate": 1.820924574209246e-06, "loss": 0.6575, "step": 23025 }, { "epoch": 0.6722723424133602, "grad_norm": 0.6751942229019136, "learning_rate": 1.820762368207624e-06, "loss": 0.5746, "step": 23026 }, { "epoch": 0.6723015386412076, "grad_norm": 0.711171752997716, "learning_rate": 1.8206001622060017e-06, "loss": 0.6022, "step": 23027 }, { "epoch": 0.6723307348690549, "grad_norm": 0.7051033653095282, "learning_rate": 1.8204379562043797e-06, "loss": 0.62, "step": 23028 }, { "epoch": 0.6723599310969023, "grad_norm": 0.7316965089079895, "learning_rate": 1.8202757502027578e-06, "loss": 0.663, "step": 23029 }, { "epoch": 0.6723891273247496, "grad_norm": 0.7153823155749691, "learning_rate": 1.8201135442011355e-06, "loss": 0.6355, "step": 23030 }, { "epoch": 0.672418323552597, "grad_norm": 0.8253970015429787, "learning_rate": 1.8199513381995136e-06, "loss": 0.6632, "step": 23031 }, { "epoch": 0.6724475197804444, "grad_norm": 0.6668106394379407, "learning_rate": 1.8197891321978913e-06, "loss": 0.5488, "step": 23032 }, { "epoch": 0.6724767160082917, "grad_norm": 0.8106507914781859, "learning_rate": 1.8196269261962694e-06, "loss": 0.6765, "step": 23033 }, { "epoch": 0.6725059122361391, "grad_norm": 0.6656566568728681, "learning_rate": 1.8194647201946476e-06, "loss": 0.559, "step": 23034 }, { "epoch": 0.6725351084639865, "grad_norm": 0.7438661302305203, "learning_rate": 1.8193025141930254e-06, "loss": 0.697, "step": 23035 }, { "epoch": 0.6725643046918338, "grad_norm": 0.737349592174546, "learning_rate": 1.8191403081914034e-06, "loss": 0.6537, "step": 23036 }, { "epoch": 0.6725935009196812, "grad_norm": 0.7131857147204624, "learning_rate": 1.8189781021897812e-06, "loss": 0.5995, "step": 23037 }, { "epoch": 0.6726226971475285, "grad_norm": 0.7240882640853323, "learning_rate": 1.8188158961881592e-06, "loss": 0.6455, "step": 23038 }, { "epoch": 0.6726518933753759, "grad_norm": 0.6822155965914686, "learning_rate": 1.818653690186537e-06, "loss": 0.6064, "step": 23039 }, { "epoch": 0.6726810896032233, "grad_norm": 0.7567960187392465, "learning_rate": 1.818491484184915e-06, "loss": 0.6084, "step": 23040 }, { "epoch": 0.6727102858310706, "grad_norm": 0.7957824244359749, "learning_rate": 1.8183292781832928e-06, "loss": 0.699, "step": 23041 }, { "epoch": 0.672739482058918, "grad_norm": 0.7275715127295049, "learning_rate": 1.8181670721816708e-06, "loss": 0.5946, "step": 23042 }, { "epoch": 0.6727686782867653, "grad_norm": 0.6612218269295188, "learning_rate": 1.8180048661800486e-06, "loss": 0.5362, "step": 23043 }, { "epoch": 0.6727978745146127, "grad_norm": 0.759426560320239, "learning_rate": 1.8178426601784268e-06, "loss": 0.6776, "step": 23044 }, { "epoch": 0.6728270707424601, "grad_norm": 0.7663722839367908, "learning_rate": 1.8176804541768048e-06, "loss": 0.6974, "step": 23045 }, { "epoch": 0.6728562669703074, "grad_norm": 0.6862157680517532, "learning_rate": 1.8175182481751826e-06, "loss": 0.5485, "step": 23046 }, { "epoch": 0.6728854631981548, "grad_norm": 0.6673086809010191, "learning_rate": 1.8173560421735606e-06, "loss": 0.542, "step": 23047 }, { "epoch": 0.6729146594260021, "grad_norm": 0.7098644411840133, "learning_rate": 1.8171938361719386e-06, "loss": 0.6272, "step": 23048 }, { "epoch": 0.6729438556538495, "grad_norm": 0.7338833003862637, "learning_rate": 1.8170316301703164e-06, "loss": 0.6652, "step": 23049 }, { "epoch": 0.6729730518816969, "grad_norm": 0.7117471368844472, "learning_rate": 1.8168694241686944e-06, "loss": 0.6105, "step": 23050 }, { "epoch": 0.6730022481095442, "grad_norm": 0.7104376488686267, "learning_rate": 1.8167072181670722e-06, "loss": 0.6415, "step": 23051 }, { "epoch": 0.6730314443373916, "grad_norm": 0.7487754923017131, "learning_rate": 1.8165450121654502e-06, "loss": 0.6432, "step": 23052 }, { "epoch": 0.673060640565239, "grad_norm": 0.6999317053453786, "learning_rate": 1.8163828061638284e-06, "loss": 0.6115, "step": 23053 }, { "epoch": 0.6730898367930863, "grad_norm": 0.6925380271280828, "learning_rate": 1.8162206001622062e-06, "loss": 0.6307, "step": 23054 }, { "epoch": 0.6731190330209337, "grad_norm": 0.7971315849266356, "learning_rate": 1.8160583941605842e-06, "loss": 0.6699, "step": 23055 }, { "epoch": 0.673148229248781, "grad_norm": 0.7043140402948812, "learning_rate": 1.815896188158962e-06, "loss": 0.6301, "step": 23056 }, { "epoch": 0.6731774254766284, "grad_norm": 0.7034901691802069, "learning_rate": 1.81573398215734e-06, "loss": 0.6, "step": 23057 }, { "epoch": 0.6732066217044758, "grad_norm": 0.7165372875396607, "learning_rate": 1.8155717761557178e-06, "loss": 0.644, "step": 23058 }, { "epoch": 0.6732358179323231, "grad_norm": 0.7218549837014575, "learning_rate": 1.8154095701540958e-06, "loss": 0.6484, "step": 23059 }, { "epoch": 0.6732650141601705, "grad_norm": 0.7004616618215143, "learning_rate": 1.8152473641524736e-06, "loss": 0.5994, "step": 23060 }, { "epoch": 0.6732942103880178, "grad_norm": 0.6291226044036313, "learning_rate": 1.8150851581508516e-06, "loss": 0.5035, "step": 23061 }, { "epoch": 0.6733234066158652, "grad_norm": 0.732046463087729, "learning_rate": 1.8149229521492294e-06, "loss": 0.6861, "step": 23062 }, { "epoch": 0.6733526028437126, "grad_norm": 0.7261287438758471, "learning_rate": 1.8147607461476077e-06, "loss": 0.6127, "step": 23063 }, { "epoch": 0.6733817990715599, "grad_norm": 0.7150688810928016, "learning_rate": 1.8145985401459857e-06, "loss": 0.6049, "step": 23064 }, { "epoch": 0.6734109952994073, "grad_norm": 0.6986524092211722, "learning_rate": 1.8144363341443635e-06, "loss": 0.5876, "step": 23065 }, { "epoch": 0.6734401915272547, "grad_norm": 0.7032808174332437, "learning_rate": 1.8142741281427415e-06, "loss": 0.6401, "step": 23066 }, { "epoch": 0.673469387755102, "grad_norm": 0.6714467598599391, "learning_rate": 1.8141119221411195e-06, "loss": 0.5372, "step": 23067 }, { "epoch": 0.6734985839829494, "grad_norm": 0.7163453405646086, "learning_rate": 1.8139497161394973e-06, "loss": 0.6354, "step": 23068 }, { "epoch": 0.6735277802107967, "grad_norm": 0.7262228502908877, "learning_rate": 1.8137875101378753e-06, "loss": 0.588, "step": 23069 }, { "epoch": 0.6735569764386441, "grad_norm": 0.7953330929357435, "learning_rate": 1.813625304136253e-06, "loss": 0.7327, "step": 23070 }, { "epoch": 0.6735861726664915, "grad_norm": 0.9033518018123594, "learning_rate": 1.813463098134631e-06, "loss": 0.6854, "step": 23071 }, { "epoch": 0.6736153688943388, "grad_norm": 0.6994486727096051, "learning_rate": 1.8133008921330093e-06, "loss": 0.5993, "step": 23072 }, { "epoch": 0.6736445651221862, "grad_norm": 0.737003140547863, "learning_rate": 1.813138686131387e-06, "loss": 0.628, "step": 23073 }, { "epoch": 0.6736737613500335, "grad_norm": 0.7573165197151929, "learning_rate": 1.812976480129765e-06, "loss": 0.6027, "step": 23074 }, { "epoch": 0.6737029575778809, "grad_norm": 0.7233332402986381, "learning_rate": 1.8128142741281429e-06, "loss": 0.6198, "step": 23075 }, { "epoch": 0.6737321538057283, "grad_norm": 0.7229785853728313, "learning_rate": 1.8126520681265209e-06, "loss": 0.6184, "step": 23076 }, { "epoch": 0.6737613500335756, "grad_norm": 0.726404820150189, "learning_rate": 1.8124898621248987e-06, "loss": 0.6091, "step": 23077 }, { "epoch": 0.673790546261423, "grad_norm": 0.6587925909477471, "learning_rate": 1.8123276561232767e-06, "loss": 0.5377, "step": 23078 }, { "epoch": 0.6738197424892703, "grad_norm": 0.7625877080304689, "learning_rate": 1.8121654501216545e-06, "loss": 0.7007, "step": 23079 }, { "epoch": 0.6738489387171177, "grad_norm": 0.6968532135998498, "learning_rate": 1.8120032441200325e-06, "loss": 0.6069, "step": 23080 }, { "epoch": 0.6738781349449651, "grad_norm": 0.7543530052765443, "learning_rate": 1.8118410381184107e-06, "loss": 0.6388, "step": 23081 }, { "epoch": 0.6739073311728124, "grad_norm": 0.7551650404180726, "learning_rate": 1.8116788321167885e-06, "loss": 0.6476, "step": 23082 }, { "epoch": 0.6739365274006598, "grad_norm": 0.7180533791145534, "learning_rate": 1.8115166261151665e-06, "loss": 0.5785, "step": 23083 }, { "epoch": 0.6739657236285072, "grad_norm": 0.713710325544415, "learning_rate": 1.8113544201135443e-06, "loss": 0.5918, "step": 23084 }, { "epoch": 0.6739949198563545, "grad_norm": 0.7492412712641577, "learning_rate": 1.8111922141119223e-06, "loss": 0.6383, "step": 23085 }, { "epoch": 0.6740241160842019, "grad_norm": 0.7919207804173185, "learning_rate": 1.8110300081103001e-06, "loss": 0.6573, "step": 23086 }, { "epoch": 0.6740533123120492, "grad_norm": 0.7938488142708426, "learning_rate": 1.8108678021086781e-06, "loss": 0.6978, "step": 23087 }, { "epoch": 0.6740825085398966, "grad_norm": 0.7520156414048936, "learning_rate": 1.8107055961070561e-06, "loss": 0.6219, "step": 23088 }, { "epoch": 0.674111704767744, "grad_norm": 0.6696412531142931, "learning_rate": 1.810543390105434e-06, "loss": 0.5295, "step": 23089 }, { "epoch": 0.6741409009955913, "grad_norm": 0.7288503545192611, "learning_rate": 1.810381184103812e-06, "loss": 0.6443, "step": 23090 }, { "epoch": 0.6741700972234387, "grad_norm": 0.7649872896081876, "learning_rate": 1.8102189781021901e-06, "loss": 0.6808, "step": 23091 }, { "epoch": 0.674199293451286, "grad_norm": 0.6960219416420117, "learning_rate": 1.810056772100568e-06, "loss": 0.608, "step": 23092 }, { "epoch": 0.6742284896791334, "grad_norm": 0.7678378431773653, "learning_rate": 1.809894566098946e-06, "loss": 0.7266, "step": 23093 }, { "epoch": 0.6742576859069808, "grad_norm": 0.7392113951397324, "learning_rate": 1.8097323600973237e-06, "loss": 0.6849, "step": 23094 }, { "epoch": 0.6742868821348281, "grad_norm": 0.6773818795595646, "learning_rate": 1.8095701540957018e-06, "loss": 0.5548, "step": 23095 }, { "epoch": 0.6743160783626756, "grad_norm": 0.7154019714248895, "learning_rate": 1.8094079480940795e-06, "loss": 0.6437, "step": 23096 }, { "epoch": 0.674345274590523, "grad_norm": 0.7678733389796644, "learning_rate": 1.8092457420924576e-06, "loss": 0.685, "step": 23097 }, { "epoch": 0.6743744708183703, "grad_norm": 0.7291192735150904, "learning_rate": 1.8090835360908353e-06, "loss": 0.5888, "step": 23098 }, { "epoch": 0.6744036670462177, "grad_norm": 0.724038766172499, "learning_rate": 1.8089213300892134e-06, "loss": 0.6181, "step": 23099 }, { "epoch": 0.674432863274065, "grad_norm": 0.736773214780886, "learning_rate": 1.8087591240875916e-06, "loss": 0.6618, "step": 23100 }, { "epoch": 0.6744620595019124, "grad_norm": 0.7077813069587594, "learning_rate": 1.8085969180859694e-06, "loss": 0.6404, "step": 23101 }, { "epoch": 0.6744912557297598, "grad_norm": 0.7524269159399903, "learning_rate": 1.8084347120843474e-06, "loss": 0.6822, "step": 23102 }, { "epoch": 0.6745204519576071, "grad_norm": 0.7589903250730763, "learning_rate": 1.8082725060827252e-06, "loss": 0.6797, "step": 23103 }, { "epoch": 0.6745496481854545, "grad_norm": 0.739603391913392, "learning_rate": 1.8081103000811032e-06, "loss": 0.6811, "step": 23104 }, { "epoch": 0.6745788444133018, "grad_norm": 0.7387243684328613, "learning_rate": 1.807948094079481e-06, "loss": 0.6616, "step": 23105 }, { "epoch": 0.6746080406411492, "grad_norm": 0.7409604337087341, "learning_rate": 1.807785888077859e-06, "loss": 0.5403, "step": 23106 }, { "epoch": 0.6746372368689966, "grad_norm": 0.7617876528934773, "learning_rate": 1.807623682076237e-06, "loss": 0.6978, "step": 23107 }, { "epoch": 0.6746664330968439, "grad_norm": 0.7043072898170343, "learning_rate": 1.8074614760746148e-06, "loss": 0.6117, "step": 23108 }, { "epoch": 0.6746956293246913, "grad_norm": 0.770513928609114, "learning_rate": 1.8072992700729928e-06, "loss": 0.7087, "step": 23109 }, { "epoch": 0.6747248255525387, "grad_norm": 0.6476282223351042, "learning_rate": 1.807137064071371e-06, "loss": 0.5536, "step": 23110 }, { "epoch": 0.674754021780386, "grad_norm": 0.7225111599757361, "learning_rate": 1.8069748580697488e-06, "loss": 0.5817, "step": 23111 }, { "epoch": 0.6747832180082334, "grad_norm": 0.7529374335231888, "learning_rate": 1.8068126520681268e-06, "loss": 0.6393, "step": 23112 }, { "epoch": 0.6748124142360807, "grad_norm": 0.6936385515833313, "learning_rate": 1.8066504460665046e-06, "loss": 0.5737, "step": 23113 }, { "epoch": 0.6748416104639281, "grad_norm": 0.7061317584585317, "learning_rate": 1.8064882400648826e-06, "loss": 0.6166, "step": 23114 }, { "epoch": 0.6748708066917755, "grad_norm": 0.7366462971739228, "learning_rate": 1.8063260340632604e-06, "loss": 0.6796, "step": 23115 }, { "epoch": 0.6749000029196228, "grad_norm": 0.718231264452097, "learning_rate": 1.8061638280616384e-06, "loss": 0.5861, "step": 23116 }, { "epoch": 0.6749291991474702, "grad_norm": 0.6938153567226827, "learning_rate": 1.8060016220600162e-06, "loss": 0.645, "step": 23117 }, { "epoch": 0.6749583953753175, "grad_norm": 0.7231693051695488, "learning_rate": 1.8058394160583942e-06, "loss": 0.5999, "step": 23118 }, { "epoch": 0.6749875916031649, "grad_norm": 0.7506988328192682, "learning_rate": 1.8056772100567724e-06, "loss": 0.6887, "step": 23119 }, { "epoch": 0.6750167878310123, "grad_norm": 0.7204253493978207, "learning_rate": 1.8055150040551502e-06, "loss": 0.5685, "step": 23120 }, { "epoch": 0.6750459840588596, "grad_norm": 0.699406161352004, "learning_rate": 1.8053527980535282e-06, "loss": 0.5555, "step": 23121 }, { "epoch": 0.675075180286707, "grad_norm": 0.7726376933428358, "learning_rate": 1.805190592051906e-06, "loss": 0.7075, "step": 23122 }, { "epoch": 0.6751043765145543, "grad_norm": 0.763195333729123, "learning_rate": 1.805028386050284e-06, "loss": 0.6392, "step": 23123 }, { "epoch": 0.6751335727424017, "grad_norm": 0.6790276490431897, "learning_rate": 1.8048661800486618e-06, "loss": 0.5819, "step": 23124 }, { "epoch": 0.6751627689702491, "grad_norm": 0.7541394404096824, "learning_rate": 1.8047039740470398e-06, "loss": 0.6422, "step": 23125 }, { "epoch": 0.6751919651980964, "grad_norm": 0.7715620338788, "learning_rate": 1.8045417680454178e-06, "loss": 0.701, "step": 23126 }, { "epoch": 0.6752211614259438, "grad_norm": 0.749155050647346, "learning_rate": 1.8043795620437956e-06, "loss": 0.6317, "step": 23127 }, { "epoch": 0.6752503576537912, "grad_norm": 0.8031502410224105, "learning_rate": 1.8042173560421736e-06, "loss": 0.7528, "step": 23128 }, { "epoch": 0.6752795538816385, "grad_norm": 0.702087419383255, "learning_rate": 1.8040551500405519e-06, "loss": 0.6395, "step": 23129 }, { "epoch": 0.6753087501094859, "grad_norm": 0.70113797445039, "learning_rate": 1.8038929440389297e-06, "loss": 0.5923, "step": 23130 }, { "epoch": 0.6753379463373332, "grad_norm": 0.7511311712787185, "learning_rate": 1.8037307380373077e-06, "loss": 0.58, "step": 23131 }, { "epoch": 0.6753671425651806, "grad_norm": 0.7336150435187573, "learning_rate": 1.8035685320356855e-06, "loss": 0.5959, "step": 23132 }, { "epoch": 0.675396338793028, "grad_norm": 0.6805325037009018, "learning_rate": 1.8034063260340635e-06, "loss": 0.5931, "step": 23133 }, { "epoch": 0.6754255350208753, "grad_norm": 0.7094006142168924, "learning_rate": 1.8032441200324413e-06, "loss": 0.6287, "step": 23134 }, { "epoch": 0.6754547312487227, "grad_norm": 0.6811299123698901, "learning_rate": 1.8030819140308193e-06, "loss": 0.5442, "step": 23135 }, { "epoch": 0.67548392747657, "grad_norm": 0.7608505077189353, "learning_rate": 1.802919708029197e-06, "loss": 0.7052, "step": 23136 }, { "epoch": 0.6755131237044174, "grad_norm": 0.6755295665576478, "learning_rate": 1.802757502027575e-06, "loss": 0.5727, "step": 23137 }, { "epoch": 0.6755423199322648, "grad_norm": 0.6822764005180558, "learning_rate": 1.8025952960259533e-06, "loss": 0.5385, "step": 23138 }, { "epoch": 0.6755715161601121, "grad_norm": 0.7374552032862514, "learning_rate": 1.802433090024331e-06, "loss": 0.635, "step": 23139 }, { "epoch": 0.6756007123879595, "grad_norm": 0.7485607456061726, "learning_rate": 1.802270884022709e-06, "loss": 0.6419, "step": 23140 }, { "epoch": 0.6756299086158069, "grad_norm": 0.676527113176251, "learning_rate": 1.8021086780210869e-06, "loss": 0.5697, "step": 23141 }, { "epoch": 0.6756591048436542, "grad_norm": 0.6924896163205495, "learning_rate": 1.801946472019465e-06, "loss": 0.6254, "step": 23142 }, { "epoch": 0.6756883010715016, "grad_norm": 0.7547483785498473, "learning_rate": 1.8017842660178427e-06, "loss": 0.685, "step": 23143 }, { "epoch": 0.6757174972993489, "grad_norm": 0.732363507769251, "learning_rate": 1.8016220600162207e-06, "loss": 0.6239, "step": 23144 }, { "epoch": 0.6757466935271963, "grad_norm": 0.7501241146110054, "learning_rate": 1.8014598540145987e-06, "loss": 0.6387, "step": 23145 }, { "epoch": 0.6757758897550437, "grad_norm": 0.7808969000422763, "learning_rate": 1.8012976480129765e-06, "loss": 0.6947, "step": 23146 }, { "epoch": 0.675805085982891, "grad_norm": 0.6980429888396742, "learning_rate": 1.8011354420113545e-06, "loss": 0.5894, "step": 23147 }, { "epoch": 0.6758342822107384, "grad_norm": 0.7344786460921232, "learning_rate": 1.8009732360097325e-06, "loss": 0.6432, "step": 23148 }, { "epoch": 0.6758634784385857, "grad_norm": 0.6965661900877196, "learning_rate": 1.8008110300081105e-06, "loss": 0.5766, "step": 23149 }, { "epoch": 0.6758926746664331, "grad_norm": 0.6832296551219177, "learning_rate": 1.8006488240064885e-06, "loss": 0.5564, "step": 23150 }, { "epoch": 0.6759218708942805, "grad_norm": 0.6884355493934616, "learning_rate": 1.8004866180048663e-06, "loss": 0.6135, "step": 23151 }, { "epoch": 0.6759510671221278, "grad_norm": 0.7102911612149073, "learning_rate": 1.8003244120032443e-06, "loss": 0.6166, "step": 23152 }, { "epoch": 0.6759802633499752, "grad_norm": 0.723505530841304, "learning_rate": 1.8001622060016221e-06, "loss": 0.6183, "step": 23153 }, { "epoch": 0.6760094595778225, "grad_norm": 0.7867709844646336, "learning_rate": 1.8000000000000001e-06, "loss": 0.7625, "step": 23154 }, { "epoch": 0.6760386558056699, "grad_norm": 0.7274157777817385, "learning_rate": 1.799837793998378e-06, "loss": 0.6652, "step": 23155 }, { "epoch": 0.6760678520335173, "grad_norm": 0.7066996142824072, "learning_rate": 1.799675587996756e-06, "loss": 0.5813, "step": 23156 }, { "epoch": 0.6760970482613646, "grad_norm": 0.742684885632223, "learning_rate": 1.7995133819951342e-06, "loss": 0.6433, "step": 23157 }, { "epoch": 0.676126244489212, "grad_norm": 0.7452229808353705, "learning_rate": 1.799351175993512e-06, "loss": 0.6518, "step": 23158 }, { "epoch": 0.6761554407170594, "grad_norm": 0.6975657666907709, "learning_rate": 1.79918896999189e-06, "loss": 0.5812, "step": 23159 }, { "epoch": 0.6761846369449067, "grad_norm": 0.7051227534400224, "learning_rate": 1.7990267639902677e-06, "loss": 0.6212, "step": 23160 }, { "epoch": 0.6762138331727541, "grad_norm": 0.7345860612398745, "learning_rate": 1.7988645579886458e-06, "loss": 0.6694, "step": 23161 }, { "epoch": 0.6762430294006014, "grad_norm": 0.7167405996165899, "learning_rate": 1.7987023519870235e-06, "loss": 0.6728, "step": 23162 }, { "epoch": 0.6762722256284488, "grad_norm": 0.749028202506036, "learning_rate": 1.7985401459854016e-06, "loss": 0.6527, "step": 23163 }, { "epoch": 0.6763014218562962, "grad_norm": 0.7131373802290011, "learning_rate": 1.7983779399837796e-06, "loss": 0.6144, "step": 23164 }, { "epoch": 0.6763306180841435, "grad_norm": 0.7197857040454079, "learning_rate": 1.7982157339821574e-06, "loss": 0.6377, "step": 23165 }, { "epoch": 0.6763598143119909, "grad_norm": 0.7972943125823495, "learning_rate": 1.7980535279805356e-06, "loss": 0.694, "step": 23166 }, { "epoch": 0.6763890105398382, "grad_norm": 0.7293971379503479, "learning_rate": 1.7978913219789134e-06, "loss": 0.6813, "step": 23167 }, { "epoch": 0.6764182067676856, "grad_norm": 0.7727196109370883, "learning_rate": 1.7977291159772914e-06, "loss": 0.6828, "step": 23168 }, { "epoch": 0.676447402995533, "grad_norm": 0.7172810365318036, "learning_rate": 1.7975669099756694e-06, "loss": 0.6322, "step": 23169 }, { "epoch": 0.6764765992233803, "grad_norm": 0.6771026187526644, "learning_rate": 1.7974047039740472e-06, "loss": 0.5415, "step": 23170 }, { "epoch": 0.6765057954512277, "grad_norm": 0.7754262352412125, "learning_rate": 1.7972424979724252e-06, "loss": 0.6552, "step": 23171 }, { "epoch": 0.676534991679075, "grad_norm": 0.7607965088955616, "learning_rate": 1.797080291970803e-06, "loss": 0.5937, "step": 23172 }, { "epoch": 0.6765641879069224, "grad_norm": 0.7643110950053469, "learning_rate": 1.796918085969181e-06, "loss": 0.6719, "step": 23173 }, { "epoch": 0.6765933841347698, "grad_norm": 0.7622517030150198, "learning_rate": 1.7967558799675588e-06, "loss": 0.6407, "step": 23174 }, { "epoch": 0.6766225803626171, "grad_norm": 0.7080384264383024, "learning_rate": 1.7965936739659368e-06, "loss": 0.6292, "step": 23175 }, { "epoch": 0.6766517765904645, "grad_norm": 0.8053025010518093, "learning_rate": 1.796431467964315e-06, "loss": 0.7042, "step": 23176 }, { "epoch": 0.6766809728183119, "grad_norm": 0.7219605048368147, "learning_rate": 1.7962692619626928e-06, "loss": 0.619, "step": 23177 }, { "epoch": 0.6767101690461592, "grad_norm": 0.6807428561193436, "learning_rate": 1.7961070559610708e-06, "loss": 0.5372, "step": 23178 }, { "epoch": 0.6767393652740066, "grad_norm": 0.7125039778478981, "learning_rate": 1.7959448499594486e-06, "loss": 0.6086, "step": 23179 }, { "epoch": 0.6767685615018539, "grad_norm": 0.6937258482281666, "learning_rate": 1.7957826439578266e-06, "loss": 0.6307, "step": 23180 }, { "epoch": 0.6767977577297013, "grad_norm": 0.7368250894879819, "learning_rate": 1.7956204379562044e-06, "loss": 0.6181, "step": 23181 }, { "epoch": 0.6768269539575487, "grad_norm": 0.7428400645680392, "learning_rate": 1.7954582319545824e-06, "loss": 0.6268, "step": 23182 }, { "epoch": 0.676856150185396, "grad_norm": 0.8156725864715045, "learning_rate": 1.7952960259529604e-06, "loss": 0.7792, "step": 23183 }, { "epoch": 0.6768853464132434, "grad_norm": 0.6900665862433445, "learning_rate": 1.7951338199513382e-06, "loss": 0.5677, "step": 23184 }, { "epoch": 0.6769145426410907, "grad_norm": 0.6749156711474439, "learning_rate": 1.7949716139497164e-06, "loss": 0.5717, "step": 23185 }, { "epoch": 0.6769437388689381, "grad_norm": 0.7321264180178443, "learning_rate": 1.7948094079480942e-06, "loss": 0.6326, "step": 23186 }, { "epoch": 0.6769729350967855, "grad_norm": 0.7043716307034051, "learning_rate": 1.7946472019464722e-06, "loss": 0.5276, "step": 23187 }, { "epoch": 0.6770021313246328, "grad_norm": 0.7005411474811597, "learning_rate": 1.7944849959448502e-06, "loss": 0.5653, "step": 23188 }, { "epoch": 0.6770313275524802, "grad_norm": 1.0215629579693077, "learning_rate": 1.794322789943228e-06, "loss": 0.7155, "step": 23189 }, { "epoch": 0.6770605237803276, "grad_norm": 0.7335561514246045, "learning_rate": 1.794160583941606e-06, "loss": 0.6096, "step": 23190 }, { "epoch": 0.6770897200081749, "grad_norm": 0.6843126600498578, "learning_rate": 1.7939983779399838e-06, "loss": 0.5825, "step": 23191 }, { "epoch": 0.6771189162360223, "grad_norm": 1.0188503935152835, "learning_rate": 1.7938361719383618e-06, "loss": 0.6338, "step": 23192 }, { "epoch": 0.6771481124638696, "grad_norm": 0.7495432700638559, "learning_rate": 1.7936739659367396e-06, "loss": 0.7322, "step": 23193 }, { "epoch": 0.677177308691717, "grad_norm": 0.7615220279157949, "learning_rate": 1.7935117599351176e-06, "loss": 0.6456, "step": 23194 }, { "epoch": 0.6772065049195644, "grad_norm": 0.7172960415054019, "learning_rate": 1.7933495539334959e-06, "loss": 0.6426, "step": 23195 }, { "epoch": 0.6772357011474117, "grad_norm": 0.7129927751113301, "learning_rate": 1.7931873479318737e-06, "loss": 0.6427, "step": 23196 }, { "epoch": 0.6772648973752591, "grad_norm": 0.7683666575821687, "learning_rate": 1.7930251419302517e-06, "loss": 0.6553, "step": 23197 }, { "epoch": 0.6772940936031064, "grad_norm": 0.7608850703473784, "learning_rate": 1.7928629359286295e-06, "loss": 0.7115, "step": 23198 }, { "epoch": 0.6773232898309538, "grad_norm": 0.715545677910819, "learning_rate": 1.7927007299270075e-06, "loss": 0.6522, "step": 23199 }, { "epoch": 0.6773524860588012, "grad_norm": 0.700141866793243, "learning_rate": 1.7925385239253853e-06, "loss": 0.6805, "step": 23200 }, { "epoch": 0.6773816822866485, "grad_norm": 0.8052119150072368, "learning_rate": 1.7923763179237633e-06, "loss": 0.6689, "step": 23201 }, { "epoch": 0.6774108785144959, "grad_norm": 0.7777511533246045, "learning_rate": 1.7922141119221413e-06, "loss": 0.6634, "step": 23202 }, { "epoch": 0.6774400747423432, "grad_norm": 0.6743046289367995, "learning_rate": 1.792051905920519e-06, "loss": 0.5641, "step": 23203 }, { "epoch": 0.6774692709701906, "grad_norm": 0.7095567203576332, "learning_rate": 1.7918896999188973e-06, "loss": 0.5822, "step": 23204 }, { "epoch": 0.677498467198038, "grad_norm": 0.6938758695392061, "learning_rate": 1.791727493917275e-06, "loss": 0.5869, "step": 23205 }, { "epoch": 0.6775276634258853, "grad_norm": 0.7043027395925964, "learning_rate": 1.791565287915653e-06, "loss": 0.5695, "step": 23206 }, { "epoch": 0.6775568596537327, "grad_norm": 0.7358339691877831, "learning_rate": 1.791403081914031e-06, "loss": 0.6625, "step": 23207 }, { "epoch": 0.67758605588158, "grad_norm": 0.7247055690324096, "learning_rate": 1.791240875912409e-06, "loss": 0.638, "step": 23208 }, { "epoch": 0.6776152521094274, "grad_norm": 0.7340351050384362, "learning_rate": 1.791078669910787e-06, "loss": 0.6038, "step": 23209 }, { "epoch": 0.6776444483372748, "grad_norm": 0.71602569545503, "learning_rate": 1.7909164639091647e-06, "loss": 0.625, "step": 23210 }, { "epoch": 0.6776736445651221, "grad_norm": 0.7848403264134906, "learning_rate": 1.7907542579075427e-06, "loss": 0.6902, "step": 23211 }, { "epoch": 0.6777028407929695, "grad_norm": 0.7294758665820175, "learning_rate": 1.7905920519059205e-06, "loss": 0.5946, "step": 23212 }, { "epoch": 0.6777320370208169, "grad_norm": 0.6584225356628305, "learning_rate": 1.7904298459042985e-06, "loss": 0.5546, "step": 23213 }, { "epoch": 0.6777612332486642, "grad_norm": 0.7137401538201424, "learning_rate": 1.7902676399026767e-06, "loss": 0.5919, "step": 23214 }, { "epoch": 0.6777904294765116, "grad_norm": 0.6881231216476985, "learning_rate": 1.7901054339010545e-06, "loss": 0.5569, "step": 23215 }, { "epoch": 0.677819625704359, "grad_norm": 0.813544073540921, "learning_rate": 1.7899432278994325e-06, "loss": 0.6674, "step": 23216 }, { "epoch": 0.6778488219322064, "grad_norm": 0.6987268365129194, "learning_rate": 1.7897810218978103e-06, "loss": 0.613, "step": 23217 }, { "epoch": 0.6778780181600538, "grad_norm": 0.7123405406432372, "learning_rate": 1.7896188158961883e-06, "loss": 0.6268, "step": 23218 }, { "epoch": 0.6779072143879011, "grad_norm": 0.7471598270164673, "learning_rate": 1.7894566098945661e-06, "loss": 0.6525, "step": 23219 }, { "epoch": 0.6779364106157485, "grad_norm": 0.7380427730428608, "learning_rate": 1.7892944038929441e-06, "loss": 0.6955, "step": 23220 }, { "epoch": 0.6779656068435959, "grad_norm": 0.6646256586984446, "learning_rate": 1.789132197891322e-06, "loss": 0.5588, "step": 23221 }, { "epoch": 0.6779948030714432, "grad_norm": 0.698591150906444, "learning_rate": 1.7889699918897e-06, "loss": 0.6225, "step": 23222 }, { "epoch": 0.6780239992992906, "grad_norm": 1.1853013790372473, "learning_rate": 1.7888077858880782e-06, "loss": 0.7097, "step": 23223 }, { "epoch": 0.6780531955271379, "grad_norm": 0.7141745748642369, "learning_rate": 1.788645579886456e-06, "loss": 0.5949, "step": 23224 }, { "epoch": 0.6780823917549853, "grad_norm": 0.7382233030982429, "learning_rate": 1.788483373884834e-06, "loss": 0.6599, "step": 23225 }, { "epoch": 0.6781115879828327, "grad_norm": 0.7523391771876021, "learning_rate": 1.788321167883212e-06, "loss": 0.7364, "step": 23226 }, { "epoch": 0.67814078421068, "grad_norm": 0.7418916178119604, "learning_rate": 1.7881589618815898e-06, "loss": 0.6731, "step": 23227 }, { "epoch": 0.6781699804385274, "grad_norm": 0.7013622337604745, "learning_rate": 1.7879967558799678e-06, "loss": 0.6061, "step": 23228 }, { "epoch": 0.6781991766663747, "grad_norm": 0.794654117451574, "learning_rate": 1.7878345498783456e-06, "loss": 0.676, "step": 23229 }, { "epoch": 0.6782283728942221, "grad_norm": 0.7014201384590599, "learning_rate": 1.7876723438767236e-06, "loss": 0.6183, "step": 23230 }, { "epoch": 0.6782575691220695, "grad_norm": 0.7200301456634874, "learning_rate": 1.7875101378751014e-06, "loss": 0.6483, "step": 23231 }, { "epoch": 0.6782867653499168, "grad_norm": 0.7478144763471646, "learning_rate": 1.7873479318734796e-06, "loss": 0.6472, "step": 23232 }, { "epoch": 0.6783159615777642, "grad_norm": 0.8127915591578606, "learning_rate": 1.7871857258718576e-06, "loss": 0.6905, "step": 23233 }, { "epoch": 0.6783451578056116, "grad_norm": 0.7538571507240567, "learning_rate": 1.7870235198702354e-06, "loss": 0.7149, "step": 23234 }, { "epoch": 0.6783743540334589, "grad_norm": 0.714852242646395, "learning_rate": 1.7868613138686134e-06, "loss": 0.6469, "step": 23235 }, { "epoch": 0.6784035502613063, "grad_norm": 0.6638836047939036, "learning_rate": 1.7866991078669912e-06, "loss": 0.5774, "step": 23236 }, { "epoch": 0.6784327464891536, "grad_norm": 0.7412068242061177, "learning_rate": 1.7865369018653692e-06, "loss": 0.6735, "step": 23237 }, { "epoch": 0.678461942717001, "grad_norm": 0.7021901983391273, "learning_rate": 1.786374695863747e-06, "loss": 0.6162, "step": 23238 }, { "epoch": 0.6784911389448484, "grad_norm": 0.6976950384952467, "learning_rate": 1.786212489862125e-06, "loss": 0.6266, "step": 23239 }, { "epoch": 0.6785203351726957, "grad_norm": 0.7427845896355, "learning_rate": 1.7860502838605028e-06, "loss": 0.6975, "step": 23240 }, { "epoch": 0.6785495314005431, "grad_norm": 0.6746292888077895, "learning_rate": 1.7858880778588808e-06, "loss": 0.5719, "step": 23241 }, { "epoch": 0.6785787276283904, "grad_norm": 0.7478924174281024, "learning_rate": 1.785725871857259e-06, "loss": 0.6229, "step": 23242 }, { "epoch": 0.6786079238562378, "grad_norm": 0.6821585613656969, "learning_rate": 1.7855636658556368e-06, "loss": 0.6052, "step": 23243 }, { "epoch": 0.6786371200840852, "grad_norm": 0.7043554960289022, "learning_rate": 1.7854014598540148e-06, "loss": 0.6123, "step": 23244 }, { "epoch": 0.6786663163119325, "grad_norm": 0.768844658571989, "learning_rate": 1.7852392538523928e-06, "loss": 0.7361, "step": 23245 }, { "epoch": 0.6786955125397799, "grad_norm": 0.6909337578395933, "learning_rate": 1.7850770478507706e-06, "loss": 0.5842, "step": 23246 }, { "epoch": 0.6787247087676272, "grad_norm": 0.650569953345666, "learning_rate": 1.7849148418491486e-06, "loss": 0.5296, "step": 23247 }, { "epoch": 0.6787539049954746, "grad_norm": 0.7311716339504869, "learning_rate": 1.7847526358475264e-06, "loss": 0.6251, "step": 23248 }, { "epoch": 0.678783101223322, "grad_norm": 0.7358604612747197, "learning_rate": 1.7845904298459044e-06, "loss": 0.632, "step": 23249 }, { "epoch": 0.6788122974511693, "grad_norm": 0.7635740802092668, "learning_rate": 1.7844282238442822e-06, "loss": 0.7018, "step": 23250 }, { "epoch": 0.6788414936790167, "grad_norm": 0.7508268174663941, "learning_rate": 1.7842660178426604e-06, "loss": 0.6987, "step": 23251 }, { "epoch": 0.678870689906864, "grad_norm": 0.7038540641185907, "learning_rate": 1.7841038118410384e-06, "loss": 0.6215, "step": 23252 }, { "epoch": 0.6788998861347114, "grad_norm": 0.7275834345545535, "learning_rate": 1.7839416058394162e-06, "loss": 0.6632, "step": 23253 }, { "epoch": 0.6789290823625588, "grad_norm": 0.7132104310171913, "learning_rate": 1.7837793998377942e-06, "loss": 0.6176, "step": 23254 }, { "epoch": 0.6789582785904061, "grad_norm": 0.7440261548652257, "learning_rate": 1.783617193836172e-06, "loss": 0.6574, "step": 23255 }, { "epoch": 0.6789874748182535, "grad_norm": 0.7312607660822102, "learning_rate": 1.78345498783455e-06, "loss": 0.6463, "step": 23256 }, { "epoch": 0.6790166710461009, "grad_norm": 0.6949949508671293, "learning_rate": 1.7832927818329278e-06, "loss": 0.6527, "step": 23257 }, { "epoch": 0.6790458672739482, "grad_norm": 0.7386282001307267, "learning_rate": 1.7831305758313058e-06, "loss": 0.7091, "step": 23258 }, { "epoch": 0.6790750635017956, "grad_norm": 0.6808030937002496, "learning_rate": 1.7829683698296836e-06, "loss": 0.6151, "step": 23259 }, { "epoch": 0.679104259729643, "grad_norm": 0.7543388272592726, "learning_rate": 1.7828061638280617e-06, "loss": 0.6569, "step": 23260 }, { "epoch": 0.6791334559574903, "grad_norm": 0.7431401275392148, "learning_rate": 1.7826439578264399e-06, "loss": 0.6681, "step": 23261 }, { "epoch": 0.6791626521853377, "grad_norm": 0.731063320606027, "learning_rate": 1.7824817518248177e-06, "loss": 0.6444, "step": 23262 }, { "epoch": 0.679191848413185, "grad_norm": 0.6994632076602846, "learning_rate": 1.7823195458231957e-06, "loss": 0.6003, "step": 23263 }, { "epoch": 0.6792210446410324, "grad_norm": 0.7042581487937136, "learning_rate": 1.7821573398215737e-06, "loss": 0.6051, "step": 23264 }, { "epoch": 0.6792502408688798, "grad_norm": 0.6788841761527749, "learning_rate": 1.7819951338199515e-06, "loss": 0.5928, "step": 23265 }, { "epoch": 0.6792794370967271, "grad_norm": 0.7654160282645676, "learning_rate": 1.7818329278183295e-06, "loss": 0.6465, "step": 23266 }, { "epoch": 0.6793086333245745, "grad_norm": 0.7012820865578694, "learning_rate": 1.7816707218167073e-06, "loss": 0.6163, "step": 23267 }, { "epoch": 0.6793378295524218, "grad_norm": 0.7947692101146767, "learning_rate": 1.7815085158150853e-06, "loss": 0.7509, "step": 23268 }, { "epoch": 0.6793670257802692, "grad_norm": 0.772738295937363, "learning_rate": 1.781346309813463e-06, "loss": 0.6641, "step": 23269 }, { "epoch": 0.6793962220081166, "grad_norm": 0.7456350876841645, "learning_rate": 1.7811841038118413e-06, "loss": 0.6514, "step": 23270 }, { "epoch": 0.6794254182359639, "grad_norm": 0.6995040614022472, "learning_rate": 1.7810218978102193e-06, "loss": 0.6571, "step": 23271 }, { "epoch": 0.6794546144638113, "grad_norm": 0.7531425415798624, "learning_rate": 1.780859691808597e-06, "loss": 0.6343, "step": 23272 }, { "epoch": 0.6794838106916586, "grad_norm": 0.7103610871439666, "learning_rate": 1.780697485806975e-06, "loss": 0.64, "step": 23273 }, { "epoch": 0.679513006919506, "grad_norm": 0.7234355584604351, "learning_rate": 1.780535279805353e-06, "loss": 0.678, "step": 23274 }, { "epoch": 0.6795422031473534, "grad_norm": 0.7200939266262988, "learning_rate": 1.780373073803731e-06, "loss": 0.6304, "step": 23275 }, { "epoch": 0.6795713993752007, "grad_norm": 0.7625346326897663, "learning_rate": 1.7802108678021087e-06, "loss": 0.6715, "step": 23276 }, { "epoch": 0.6796005956030481, "grad_norm": 0.7162068534973632, "learning_rate": 1.7800486618004867e-06, "loss": 0.6247, "step": 23277 }, { "epoch": 0.6796297918308954, "grad_norm": 0.706001710749837, "learning_rate": 1.7798864557988645e-06, "loss": 0.6166, "step": 23278 }, { "epoch": 0.6796589880587428, "grad_norm": 0.7315489613330483, "learning_rate": 1.7797242497972425e-06, "loss": 0.6753, "step": 23279 }, { "epoch": 0.6796881842865902, "grad_norm": 0.7527001432052073, "learning_rate": 1.7795620437956207e-06, "loss": 0.6907, "step": 23280 }, { "epoch": 0.6797173805144375, "grad_norm": 0.6949494419029939, "learning_rate": 1.7793998377939985e-06, "loss": 0.6234, "step": 23281 }, { "epoch": 0.6797465767422849, "grad_norm": 0.7543324339185856, "learning_rate": 1.7792376317923765e-06, "loss": 0.6777, "step": 23282 }, { "epoch": 0.6797757729701323, "grad_norm": 0.7673844684482618, "learning_rate": 1.7790754257907543e-06, "loss": 0.6419, "step": 23283 }, { "epoch": 0.6798049691979796, "grad_norm": 0.6960946529910031, "learning_rate": 1.7789132197891323e-06, "loss": 0.5835, "step": 23284 }, { "epoch": 0.679834165425827, "grad_norm": 0.6234980672566778, "learning_rate": 1.7787510137875103e-06, "loss": 0.4966, "step": 23285 }, { "epoch": 0.6798633616536743, "grad_norm": 0.7376851454537344, "learning_rate": 1.7785888077858881e-06, "loss": 0.6014, "step": 23286 }, { "epoch": 0.6798925578815217, "grad_norm": 0.7342685062395551, "learning_rate": 1.7784266017842661e-06, "loss": 0.5851, "step": 23287 }, { "epoch": 0.6799217541093691, "grad_norm": 0.6651978622974527, "learning_rate": 1.778264395782644e-06, "loss": 0.5473, "step": 23288 }, { "epoch": 0.6799509503372164, "grad_norm": 0.8318353745267132, "learning_rate": 1.7781021897810222e-06, "loss": 0.6483, "step": 23289 }, { "epoch": 0.6799801465650638, "grad_norm": 0.7261310561315286, "learning_rate": 1.7779399837794002e-06, "loss": 0.6207, "step": 23290 }, { "epoch": 0.6800093427929111, "grad_norm": 0.7406178317328389, "learning_rate": 1.777777777777778e-06, "loss": 0.6753, "step": 23291 }, { "epoch": 0.6800385390207585, "grad_norm": 0.7287950503085, "learning_rate": 1.777615571776156e-06, "loss": 0.6439, "step": 23292 }, { "epoch": 0.6800677352486059, "grad_norm": 0.7272634068643739, "learning_rate": 1.7774533657745338e-06, "loss": 0.6449, "step": 23293 }, { "epoch": 0.6800969314764532, "grad_norm": 0.7350167501428932, "learning_rate": 1.7772911597729118e-06, "loss": 0.6527, "step": 23294 }, { "epoch": 0.6801261277043006, "grad_norm": 0.7009357527620986, "learning_rate": 1.7771289537712896e-06, "loss": 0.5898, "step": 23295 }, { "epoch": 0.680155323932148, "grad_norm": 0.7858220967282424, "learning_rate": 1.7769667477696676e-06, "loss": 0.6897, "step": 23296 }, { "epoch": 0.6801845201599953, "grad_norm": 0.6815366451302337, "learning_rate": 1.7768045417680454e-06, "loss": 0.5887, "step": 23297 }, { "epoch": 0.6802137163878427, "grad_norm": 0.6999562387718048, "learning_rate": 1.7766423357664234e-06, "loss": 0.5684, "step": 23298 }, { "epoch": 0.68024291261569, "grad_norm": 0.7228146239388352, "learning_rate": 1.7764801297648016e-06, "loss": 0.6251, "step": 23299 }, { "epoch": 0.6802721088435374, "grad_norm": 0.7791917169087639, "learning_rate": 1.7763179237631794e-06, "loss": 0.7991, "step": 23300 }, { "epoch": 0.6803013050713848, "grad_norm": 0.731023582719007, "learning_rate": 1.7761557177615574e-06, "loss": 0.6444, "step": 23301 }, { "epoch": 0.6803305012992321, "grad_norm": 0.8007468992306124, "learning_rate": 1.7759935117599352e-06, "loss": 0.6534, "step": 23302 }, { "epoch": 0.6803596975270795, "grad_norm": 0.7082375043069227, "learning_rate": 1.7758313057583132e-06, "loss": 0.6403, "step": 23303 }, { "epoch": 0.6803888937549268, "grad_norm": 0.6937990987123057, "learning_rate": 1.7756690997566912e-06, "loss": 0.6124, "step": 23304 }, { "epoch": 0.6804180899827742, "grad_norm": 0.7857765424547045, "learning_rate": 1.775506893755069e-06, "loss": 0.7693, "step": 23305 }, { "epoch": 0.6804472862106216, "grad_norm": 0.7087212893372151, "learning_rate": 1.775344687753447e-06, "loss": 0.5817, "step": 23306 }, { "epoch": 0.6804764824384689, "grad_norm": 0.7601999522576923, "learning_rate": 1.7751824817518248e-06, "loss": 0.7186, "step": 23307 }, { "epoch": 0.6805056786663163, "grad_norm": 0.7297983511501283, "learning_rate": 1.775020275750203e-06, "loss": 0.6249, "step": 23308 }, { "epoch": 0.6805348748941636, "grad_norm": 0.7145734524455856, "learning_rate": 1.774858069748581e-06, "loss": 0.6416, "step": 23309 }, { "epoch": 0.680564071122011, "grad_norm": 0.711482896860735, "learning_rate": 1.7746958637469588e-06, "loss": 0.6479, "step": 23310 }, { "epoch": 0.6805932673498584, "grad_norm": 0.7032924116704086, "learning_rate": 1.7745336577453368e-06, "loss": 0.6471, "step": 23311 }, { "epoch": 0.6806224635777057, "grad_norm": 0.7179742190482572, "learning_rate": 1.7743714517437146e-06, "loss": 0.613, "step": 23312 }, { "epoch": 0.6806516598055531, "grad_norm": 0.6710873441279771, "learning_rate": 1.7742092457420926e-06, "loss": 0.5665, "step": 23313 }, { "epoch": 0.6806808560334004, "grad_norm": 0.6769593568826435, "learning_rate": 1.7740470397404704e-06, "loss": 0.584, "step": 23314 }, { "epoch": 0.6807100522612478, "grad_norm": 0.6841951853043492, "learning_rate": 1.7738848337388484e-06, "loss": 0.5848, "step": 23315 }, { "epoch": 0.6807392484890952, "grad_norm": 0.6916464131672143, "learning_rate": 1.7737226277372262e-06, "loss": 0.5849, "step": 23316 }, { "epoch": 0.6807684447169425, "grad_norm": 0.7745875009740337, "learning_rate": 1.7735604217356044e-06, "loss": 0.6598, "step": 23317 }, { "epoch": 0.6807976409447899, "grad_norm": 0.7309226765792616, "learning_rate": 1.7733982157339824e-06, "loss": 0.6648, "step": 23318 }, { "epoch": 0.6808268371726373, "grad_norm": 0.7106734291990953, "learning_rate": 1.7732360097323602e-06, "loss": 0.6658, "step": 23319 }, { "epoch": 0.6808560334004846, "grad_norm": 0.7610043948778418, "learning_rate": 1.7730738037307382e-06, "loss": 0.6922, "step": 23320 }, { "epoch": 0.680885229628332, "grad_norm": 0.7018498843047315, "learning_rate": 1.772911597729116e-06, "loss": 0.6139, "step": 23321 }, { "epoch": 0.6809144258561793, "grad_norm": 0.771673652339358, "learning_rate": 1.772749391727494e-06, "loss": 0.744, "step": 23322 }, { "epoch": 0.6809436220840267, "grad_norm": 0.6744168798741949, "learning_rate": 1.772587185725872e-06, "loss": 0.593, "step": 23323 }, { "epoch": 0.6809728183118741, "grad_norm": 0.7266916215150238, "learning_rate": 1.7724249797242499e-06, "loss": 0.6514, "step": 23324 }, { "epoch": 0.6810020145397214, "grad_norm": 0.6512072936346455, "learning_rate": 1.7722627737226279e-06, "loss": 0.559, "step": 23325 }, { "epoch": 0.6810312107675688, "grad_norm": 0.7176162796905483, "learning_rate": 1.7721005677210057e-06, "loss": 0.6396, "step": 23326 }, { "epoch": 0.6810604069954161, "grad_norm": 0.7883878720456005, "learning_rate": 1.7719383617193839e-06, "loss": 0.6673, "step": 23327 }, { "epoch": 0.6810896032232635, "grad_norm": 0.734515020103606, "learning_rate": 1.7717761557177619e-06, "loss": 0.6907, "step": 23328 }, { "epoch": 0.6811187994511109, "grad_norm": 0.7190843575687157, "learning_rate": 1.7716139497161397e-06, "loss": 0.6264, "step": 23329 }, { "epoch": 0.6811479956789582, "grad_norm": 0.7250134932489535, "learning_rate": 1.7714517437145177e-06, "loss": 0.6367, "step": 23330 }, { "epoch": 0.6811771919068056, "grad_norm": 0.7477155385231842, "learning_rate": 1.7712895377128955e-06, "loss": 0.6708, "step": 23331 }, { "epoch": 0.681206388134653, "grad_norm": 0.7440591973939654, "learning_rate": 1.7711273317112735e-06, "loss": 0.6496, "step": 23332 }, { "epoch": 0.6812355843625003, "grad_norm": 0.7837636376694423, "learning_rate": 1.7709651257096513e-06, "loss": 0.6819, "step": 23333 }, { "epoch": 0.6812647805903477, "grad_norm": 0.713822650708618, "learning_rate": 1.7708029197080293e-06, "loss": 0.6404, "step": 23334 }, { "epoch": 0.681293976818195, "grad_norm": 0.7469804402057364, "learning_rate": 1.770640713706407e-06, "loss": 0.7158, "step": 23335 }, { "epoch": 0.6813231730460424, "grad_norm": 0.740920550183663, "learning_rate": 1.7704785077047853e-06, "loss": 0.6066, "step": 23336 }, { "epoch": 0.6813523692738899, "grad_norm": 0.7178566801806212, "learning_rate": 1.7703163017031633e-06, "loss": 0.6114, "step": 23337 }, { "epoch": 0.6813815655017372, "grad_norm": 0.6807275072580947, "learning_rate": 1.770154095701541e-06, "loss": 0.5637, "step": 23338 }, { "epoch": 0.6814107617295846, "grad_norm": 0.7443166140418999, "learning_rate": 1.7699918896999191e-06, "loss": 0.6658, "step": 23339 }, { "epoch": 0.681439957957432, "grad_norm": 0.7164964022323886, "learning_rate": 1.769829683698297e-06, "loss": 0.6135, "step": 23340 }, { "epoch": 0.6814691541852793, "grad_norm": 0.719454968569747, "learning_rate": 1.769667477696675e-06, "loss": 0.5871, "step": 23341 }, { "epoch": 0.6814983504131267, "grad_norm": 0.7622577201019638, "learning_rate": 1.769505271695053e-06, "loss": 0.6356, "step": 23342 }, { "epoch": 0.681527546640974, "grad_norm": 0.7702319481385792, "learning_rate": 1.7693430656934307e-06, "loss": 0.7252, "step": 23343 }, { "epoch": 0.6815567428688214, "grad_norm": 0.7196382782531815, "learning_rate": 1.7691808596918087e-06, "loss": 0.63, "step": 23344 }, { "epoch": 0.6815859390966688, "grad_norm": 0.712575590474553, "learning_rate": 1.7690186536901865e-06, "loss": 0.7048, "step": 23345 }, { "epoch": 0.6816151353245161, "grad_norm": 0.7188379020012791, "learning_rate": 1.7688564476885647e-06, "loss": 0.6034, "step": 23346 }, { "epoch": 0.6816443315523635, "grad_norm": 0.7585574964750222, "learning_rate": 1.7686942416869427e-06, "loss": 0.6316, "step": 23347 }, { "epoch": 0.6816735277802108, "grad_norm": 0.6858869912924798, "learning_rate": 1.7685320356853205e-06, "loss": 0.6266, "step": 23348 }, { "epoch": 0.6817027240080582, "grad_norm": 0.6870683417080365, "learning_rate": 1.7683698296836985e-06, "loss": 0.6065, "step": 23349 }, { "epoch": 0.6817319202359056, "grad_norm": 0.7736417201795068, "learning_rate": 1.7682076236820763e-06, "loss": 0.7411, "step": 23350 }, { "epoch": 0.6817611164637529, "grad_norm": 0.7550050002370601, "learning_rate": 1.7680454176804543e-06, "loss": 0.7546, "step": 23351 }, { "epoch": 0.6817903126916003, "grad_norm": 0.7506863096971021, "learning_rate": 1.7678832116788321e-06, "loss": 0.749, "step": 23352 }, { "epoch": 0.6818195089194476, "grad_norm": 0.7278578192840347, "learning_rate": 1.7677210056772101e-06, "loss": 0.6299, "step": 23353 }, { "epoch": 0.681848705147295, "grad_norm": 0.7709664796772077, "learning_rate": 1.767558799675588e-06, "loss": 0.6903, "step": 23354 }, { "epoch": 0.6818779013751424, "grad_norm": 0.7021102239120601, "learning_rate": 1.7673965936739662e-06, "loss": 0.5813, "step": 23355 }, { "epoch": 0.6819070976029897, "grad_norm": 0.7235349059254532, "learning_rate": 1.7672343876723442e-06, "loss": 0.6748, "step": 23356 }, { "epoch": 0.6819362938308371, "grad_norm": 0.6760572515605907, "learning_rate": 1.767072181670722e-06, "loss": 0.5783, "step": 23357 }, { "epoch": 0.6819654900586845, "grad_norm": 0.7446555622495826, "learning_rate": 1.7669099756691e-06, "loss": 0.6825, "step": 23358 }, { "epoch": 0.6819946862865318, "grad_norm": 0.6633942804948046, "learning_rate": 1.7667477696674778e-06, "loss": 0.5439, "step": 23359 }, { "epoch": 0.6820238825143792, "grad_norm": 0.7280150076450655, "learning_rate": 1.7665855636658558e-06, "loss": 0.5864, "step": 23360 }, { "epoch": 0.6820530787422265, "grad_norm": 0.7424688248418174, "learning_rate": 1.7664233576642338e-06, "loss": 0.6568, "step": 23361 }, { "epoch": 0.6820822749700739, "grad_norm": 0.7039997119307583, "learning_rate": 1.7662611516626116e-06, "loss": 0.6011, "step": 23362 }, { "epoch": 0.6821114711979213, "grad_norm": 0.7234642576238755, "learning_rate": 1.7660989456609896e-06, "loss": 0.611, "step": 23363 }, { "epoch": 0.6821406674257686, "grad_norm": 0.7187551484352463, "learning_rate": 1.7659367396593674e-06, "loss": 0.6401, "step": 23364 }, { "epoch": 0.682169863653616, "grad_norm": 0.7108374831555698, "learning_rate": 1.7657745336577456e-06, "loss": 0.636, "step": 23365 }, { "epoch": 0.6821990598814633, "grad_norm": 0.7336175582394753, "learning_rate": 1.7656123276561236e-06, "loss": 0.6036, "step": 23366 }, { "epoch": 0.6822282561093107, "grad_norm": 0.670909588308565, "learning_rate": 1.7654501216545014e-06, "loss": 0.5379, "step": 23367 }, { "epoch": 0.6822574523371581, "grad_norm": 0.7408091126794577, "learning_rate": 1.7652879156528794e-06, "loss": 0.6647, "step": 23368 }, { "epoch": 0.6822866485650054, "grad_norm": 0.707798445553763, "learning_rate": 1.7651257096512572e-06, "loss": 0.6349, "step": 23369 }, { "epoch": 0.6823158447928528, "grad_norm": 0.7512069177061969, "learning_rate": 1.7649635036496352e-06, "loss": 0.6602, "step": 23370 }, { "epoch": 0.6823450410207001, "grad_norm": 0.6791020445625506, "learning_rate": 1.764801297648013e-06, "loss": 0.5814, "step": 23371 }, { "epoch": 0.6823742372485475, "grad_norm": 0.7879013489902328, "learning_rate": 1.764639091646391e-06, "loss": 0.7707, "step": 23372 }, { "epoch": 0.6824034334763949, "grad_norm": 0.775931070596528, "learning_rate": 1.7644768856447688e-06, "loss": 0.7216, "step": 23373 }, { "epoch": 0.6824326297042422, "grad_norm": 0.7043470105783662, "learning_rate": 1.764314679643147e-06, "loss": 0.614, "step": 23374 }, { "epoch": 0.6824618259320896, "grad_norm": 0.750857296084344, "learning_rate": 1.764152473641525e-06, "loss": 0.7151, "step": 23375 }, { "epoch": 0.682491022159937, "grad_norm": 0.7321779248796456, "learning_rate": 1.7639902676399028e-06, "loss": 0.6387, "step": 23376 }, { "epoch": 0.6825202183877843, "grad_norm": 0.7290961807641012, "learning_rate": 1.7638280616382808e-06, "loss": 0.6232, "step": 23377 }, { "epoch": 0.6825494146156317, "grad_norm": 0.7342297663416876, "learning_rate": 1.7636658556366586e-06, "loss": 0.6253, "step": 23378 }, { "epoch": 0.682578610843479, "grad_norm": 0.7418891132331316, "learning_rate": 1.7635036496350366e-06, "loss": 0.6822, "step": 23379 }, { "epoch": 0.6826078070713264, "grad_norm": 0.7221558969873244, "learning_rate": 1.7633414436334146e-06, "loss": 0.659, "step": 23380 }, { "epoch": 0.6826370032991738, "grad_norm": 0.7363027488708352, "learning_rate": 1.7631792376317924e-06, "loss": 0.6655, "step": 23381 }, { "epoch": 0.6826661995270211, "grad_norm": 0.6849844787097821, "learning_rate": 1.7630170316301704e-06, "loss": 0.5644, "step": 23382 }, { "epoch": 0.6826953957548685, "grad_norm": 0.7268385851238582, "learning_rate": 1.7628548256285484e-06, "loss": 0.5927, "step": 23383 }, { "epoch": 0.6827245919827158, "grad_norm": 0.6690871817228665, "learning_rate": 1.7626926196269264e-06, "loss": 0.5665, "step": 23384 }, { "epoch": 0.6827537882105632, "grad_norm": 0.7678398528318665, "learning_rate": 1.7625304136253045e-06, "loss": 0.5999, "step": 23385 }, { "epoch": 0.6827829844384106, "grad_norm": 0.7285250865941629, "learning_rate": 1.7623682076236822e-06, "loss": 0.628, "step": 23386 }, { "epoch": 0.6828121806662579, "grad_norm": 0.7113287996481329, "learning_rate": 1.7622060016220603e-06, "loss": 0.6612, "step": 23387 }, { "epoch": 0.6828413768941053, "grad_norm": 0.7074825696077631, "learning_rate": 1.762043795620438e-06, "loss": 0.6471, "step": 23388 }, { "epoch": 0.6828705731219527, "grad_norm": 0.7149898073216436, "learning_rate": 1.761881589618816e-06, "loss": 0.6108, "step": 23389 }, { "epoch": 0.6828997693498, "grad_norm": 0.7879732437053553, "learning_rate": 1.7617193836171939e-06, "loss": 0.7293, "step": 23390 }, { "epoch": 0.6829289655776474, "grad_norm": 0.787190837504781, "learning_rate": 1.7615571776155719e-06, "loss": 0.688, "step": 23391 }, { "epoch": 0.6829581618054947, "grad_norm": 0.7564754115808605, "learning_rate": 1.7613949716139497e-06, "loss": 0.6714, "step": 23392 }, { "epoch": 0.6829873580333421, "grad_norm": 0.7888148392805474, "learning_rate": 1.7612327656123279e-06, "loss": 0.7629, "step": 23393 }, { "epoch": 0.6830165542611895, "grad_norm": 0.7440604508038157, "learning_rate": 1.7610705596107059e-06, "loss": 0.6442, "step": 23394 }, { "epoch": 0.6830457504890368, "grad_norm": 0.6697059222688223, "learning_rate": 1.7609083536090837e-06, "loss": 0.5349, "step": 23395 }, { "epoch": 0.6830749467168842, "grad_norm": 0.6689625938997779, "learning_rate": 1.7607461476074617e-06, "loss": 0.5651, "step": 23396 }, { "epoch": 0.6831041429447315, "grad_norm": 0.7511686602160244, "learning_rate": 1.7605839416058395e-06, "loss": 0.6178, "step": 23397 }, { "epoch": 0.6831333391725789, "grad_norm": 0.7650505666731358, "learning_rate": 1.7604217356042175e-06, "loss": 0.7639, "step": 23398 }, { "epoch": 0.6831625354004263, "grad_norm": 0.6939281177882671, "learning_rate": 1.7602595296025955e-06, "loss": 0.5731, "step": 23399 }, { "epoch": 0.6831917316282736, "grad_norm": 0.7136285543748532, "learning_rate": 1.7600973236009733e-06, "loss": 0.6138, "step": 23400 }, { "epoch": 0.683220927856121, "grad_norm": 0.7207728311766863, "learning_rate": 1.7599351175993513e-06, "loss": 0.6395, "step": 23401 }, { "epoch": 0.6832501240839683, "grad_norm": 0.7338807444740266, "learning_rate": 1.7597729115977293e-06, "loss": 0.6393, "step": 23402 }, { "epoch": 0.6832793203118157, "grad_norm": 0.7653626152351053, "learning_rate": 1.7596107055961073e-06, "loss": 0.6684, "step": 23403 }, { "epoch": 0.6833085165396631, "grad_norm": 0.6986972612608106, "learning_rate": 1.7594484995944853e-06, "loss": 0.6259, "step": 23404 }, { "epoch": 0.6833377127675104, "grad_norm": 0.7416443108681485, "learning_rate": 1.7592862935928631e-06, "loss": 0.6693, "step": 23405 }, { "epoch": 0.6833669089953578, "grad_norm": 0.7566957335608447, "learning_rate": 1.7591240875912411e-06, "loss": 0.7026, "step": 23406 }, { "epoch": 0.6833961052232052, "grad_norm": 0.7606536176849736, "learning_rate": 1.758961881589619e-06, "loss": 0.6706, "step": 23407 }, { "epoch": 0.6834253014510525, "grad_norm": 0.8108324101564278, "learning_rate": 1.758799675587997e-06, "loss": 0.6932, "step": 23408 }, { "epoch": 0.6834544976788999, "grad_norm": 0.7320631304044257, "learning_rate": 1.7586374695863747e-06, "loss": 0.6262, "step": 23409 }, { "epoch": 0.6834836939067472, "grad_norm": 0.6792861349043285, "learning_rate": 1.7584752635847527e-06, "loss": 0.5818, "step": 23410 }, { "epoch": 0.6835128901345946, "grad_norm": 0.774648949098008, "learning_rate": 1.7583130575831305e-06, "loss": 0.748, "step": 23411 }, { "epoch": 0.683542086362442, "grad_norm": 0.7311369601834627, "learning_rate": 1.7581508515815087e-06, "loss": 0.6364, "step": 23412 }, { "epoch": 0.6835712825902893, "grad_norm": 0.7116174231032324, "learning_rate": 1.7579886455798867e-06, "loss": 0.6054, "step": 23413 }, { "epoch": 0.6836004788181367, "grad_norm": 0.7394817986963987, "learning_rate": 1.7578264395782645e-06, "loss": 0.6739, "step": 23414 }, { "epoch": 0.683629675045984, "grad_norm": 0.7944798799778368, "learning_rate": 1.7576642335766425e-06, "loss": 0.661, "step": 23415 }, { "epoch": 0.6836588712738314, "grad_norm": 0.7515039851821053, "learning_rate": 1.7575020275750203e-06, "loss": 0.7534, "step": 23416 }, { "epoch": 0.6836880675016788, "grad_norm": 0.7054541635736734, "learning_rate": 1.7573398215733983e-06, "loss": 0.6507, "step": 23417 }, { "epoch": 0.6837172637295261, "grad_norm": 0.6616122136037275, "learning_rate": 1.7571776155717761e-06, "loss": 0.551, "step": 23418 }, { "epoch": 0.6837464599573735, "grad_norm": 0.687750107764941, "learning_rate": 1.7570154095701541e-06, "loss": 0.5815, "step": 23419 }, { "epoch": 0.6837756561852208, "grad_norm": 0.7477523261368038, "learning_rate": 1.7568532035685322e-06, "loss": 0.6661, "step": 23420 }, { "epoch": 0.6838048524130682, "grad_norm": 0.763188950164323, "learning_rate": 1.7566909975669102e-06, "loss": 0.6167, "step": 23421 }, { "epoch": 0.6838340486409156, "grad_norm": 0.7452417472113385, "learning_rate": 1.7565287915652882e-06, "loss": 0.6824, "step": 23422 }, { "epoch": 0.6838632448687629, "grad_norm": 0.6867446082093672, "learning_rate": 1.7563665855636662e-06, "loss": 0.5722, "step": 23423 }, { "epoch": 0.6838924410966103, "grad_norm": 0.7181208582578965, "learning_rate": 1.756204379562044e-06, "loss": 0.6781, "step": 23424 }, { "epoch": 0.6839216373244577, "grad_norm": 0.7318475587238924, "learning_rate": 1.756042173560422e-06, "loss": 0.6667, "step": 23425 }, { "epoch": 0.683950833552305, "grad_norm": 0.7390632162885397, "learning_rate": 1.7558799675587998e-06, "loss": 0.6971, "step": 23426 }, { "epoch": 0.6839800297801524, "grad_norm": 0.6858582484521267, "learning_rate": 1.7557177615571778e-06, "loss": 0.5859, "step": 23427 }, { "epoch": 0.6840092260079997, "grad_norm": 0.7443509346328763, "learning_rate": 1.7555555555555556e-06, "loss": 0.7296, "step": 23428 }, { "epoch": 0.6840384222358471, "grad_norm": 0.761124983054152, "learning_rate": 1.7553933495539336e-06, "loss": 0.7014, "step": 23429 }, { "epoch": 0.6840676184636945, "grad_norm": 0.7202248707583706, "learning_rate": 1.7552311435523114e-06, "loss": 0.6274, "step": 23430 }, { "epoch": 0.6840968146915418, "grad_norm": 0.725907982341253, "learning_rate": 1.7550689375506896e-06, "loss": 0.6218, "step": 23431 }, { "epoch": 0.6841260109193892, "grad_norm": 0.712419775348732, "learning_rate": 1.7549067315490676e-06, "loss": 0.5873, "step": 23432 }, { "epoch": 0.6841552071472365, "grad_norm": 0.7286048850183751, "learning_rate": 1.7547445255474454e-06, "loss": 0.6847, "step": 23433 }, { "epoch": 0.6841844033750839, "grad_norm": 0.6664085243601926, "learning_rate": 1.7545823195458234e-06, "loss": 0.5677, "step": 23434 }, { "epoch": 0.6842135996029313, "grad_norm": 0.6896825879838775, "learning_rate": 1.7544201135442012e-06, "loss": 0.5896, "step": 23435 }, { "epoch": 0.6842427958307786, "grad_norm": 0.6619481479641048, "learning_rate": 1.7542579075425792e-06, "loss": 0.5993, "step": 23436 }, { "epoch": 0.684271992058626, "grad_norm": 0.708978417413372, "learning_rate": 1.754095701540957e-06, "loss": 0.6176, "step": 23437 }, { "epoch": 0.6843011882864733, "grad_norm": 0.7616105472425786, "learning_rate": 1.753933495539335e-06, "loss": 0.5935, "step": 23438 }, { "epoch": 0.6843303845143207, "grad_norm": 1.0455428550415597, "learning_rate": 1.753771289537713e-06, "loss": 0.8121, "step": 23439 }, { "epoch": 0.6843595807421681, "grad_norm": 0.7350551820821898, "learning_rate": 1.753609083536091e-06, "loss": 0.7086, "step": 23440 }, { "epoch": 0.6843887769700154, "grad_norm": 0.7443297253460932, "learning_rate": 1.753446877534469e-06, "loss": 0.6214, "step": 23441 }, { "epoch": 0.6844179731978628, "grad_norm": 0.7192806685243962, "learning_rate": 1.753284671532847e-06, "loss": 0.6436, "step": 23442 }, { "epoch": 0.6844471694257102, "grad_norm": 0.670191895638022, "learning_rate": 1.7531224655312248e-06, "loss": 0.5428, "step": 23443 }, { "epoch": 0.6844763656535575, "grad_norm": 0.6936051708271783, "learning_rate": 1.7529602595296028e-06, "loss": 0.5945, "step": 23444 }, { "epoch": 0.6845055618814049, "grad_norm": 0.7098276541543956, "learning_rate": 1.7527980535279806e-06, "loss": 0.6128, "step": 23445 }, { "epoch": 0.6845347581092522, "grad_norm": 0.7386247732516993, "learning_rate": 1.7526358475263586e-06, "loss": 0.641, "step": 23446 }, { "epoch": 0.6845639543370996, "grad_norm": 0.7555186925805333, "learning_rate": 1.7524736415247364e-06, "loss": 0.6916, "step": 23447 }, { "epoch": 0.684593150564947, "grad_norm": 0.7608589287186857, "learning_rate": 1.7523114355231144e-06, "loss": 0.6811, "step": 23448 }, { "epoch": 0.6846223467927943, "grad_norm": 0.7076962901386641, "learning_rate": 1.7521492295214922e-06, "loss": 0.5874, "step": 23449 }, { "epoch": 0.6846515430206417, "grad_norm": 0.7101775430615276, "learning_rate": 1.7519870235198704e-06, "loss": 0.6302, "step": 23450 }, { "epoch": 0.684680739248489, "grad_norm": 0.6999932285802832, "learning_rate": 1.7518248175182485e-06, "loss": 0.6379, "step": 23451 }, { "epoch": 0.6847099354763364, "grad_norm": 0.7966735666307851, "learning_rate": 1.7516626115166263e-06, "loss": 0.6777, "step": 23452 }, { "epoch": 0.6847391317041838, "grad_norm": 0.6639751977401344, "learning_rate": 1.7515004055150043e-06, "loss": 0.5704, "step": 23453 }, { "epoch": 0.6847683279320311, "grad_norm": 0.7618502421142193, "learning_rate": 1.751338199513382e-06, "loss": 0.7191, "step": 23454 }, { "epoch": 0.6847975241598785, "grad_norm": 0.6991493995940082, "learning_rate": 1.75117599351176e-06, "loss": 0.6223, "step": 23455 }, { "epoch": 0.6848267203877259, "grad_norm": 0.7913964331971852, "learning_rate": 1.7510137875101379e-06, "loss": 0.6579, "step": 23456 }, { "epoch": 0.6848559166155732, "grad_norm": 0.7571535325255787, "learning_rate": 1.7508515815085159e-06, "loss": 0.6821, "step": 23457 }, { "epoch": 0.6848851128434207, "grad_norm": 0.7257626933527811, "learning_rate": 1.7506893755068939e-06, "loss": 0.6124, "step": 23458 }, { "epoch": 0.684914309071268, "grad_norm": 0.7399397657430432, "learning_rate": 1.7505271695052719e-06, "loss": 0.6476, "step": 23459 }, { "epoch": 0.6849435052991154, "grad_norm": 0.7913304097765426, "learning_rate": 1.7503649635036499e-06, "loss": 0.6575, "step": 23460 }, { "epoch": 0.6849727015269628, "grad_norm": 0.7316192078241864, "learning_rate": 1.7502027575020277e-06, "loss": 0.6, "step": 23461 }, { "epoch": 0.6850018977548101, "grad_norm": 0.7860980255360278, "learning_rate": 1.7500405515004057e-06, "loss": 0.7068, "step": 23462 }, { "epoch": 0.6850310939826575, "grad_norm": 0.7275043357224402, "learning_rate": 1.7498783454987837e-06, "loss": 0.646, "step": 23463 }, { "epoch": 0.6850602902105049, "grad_norm": 0.7629719641819719, "learning_rate": 1.7497161394971615e-06, "loss": 0.6741, "step": 23464 }, { "epoch": 0.6850894864383522, "grad_norm": 0.7293750883048836, "learning_rate": 1.7495539334955395e-06, "loss": 0.6744, "step": 23465 }, { "epoch": 0.6851186826661996, "grad_norm": 0.711207100660293, "learning_rate": 1.7493917274939173e-06, "loss": 0.6021, "step": 23466 }, { "epoch": 0.6851478788940469, "grad_norm": 0.7256768488830843, "learning_rate": 1.7492295214922953e-06, "loss": 0.6787, "step": 23467 }, { "epoch": 0.6851770751218943, "grad_norm": 0.6891324882627754, "learning_rate": 1.7490673154906735e-06, "loss": 0.6396, "step": 23468 }, { "epoch": 0.6852062713497417, "grad_norm": 0.7289343975396256, "learning_rate": 1.7489051094890513e-06, "loss": 0.6379, "step": 23469 }, { "epoch": 0.685235467577589, "grad_norm": 0.7378338260075471, "learning_rate": 1.7487429034874293e-06, "loss": 0.6614, "step": 23470 }, { "epoch": 0.6852646638054364, "grad_norm": 0.7319522061490376, "learning_rate": 1.7485806974858071e-06, "loss": 0.6859, "step": 23471 }, { "epoch": 0.6852938600332837, "grad_norm": 0.7545245141580215, "learning_rate": 1.7484184914841851e-06, "loss": 0.7553, "step": 23472 }, { "epoch": 0.6853230562611311, "grad_norm": 0.7260403370360377, "learning_rate": 1.748256285482563e-06, "loss": 0.6491, "step": 23473 }, { "epoch": 0.6853522524889785, "grad_norm": 0.7223711212986573, "learning_rate": 1.748094079480941e-06, "loss": 0.6082, "step": 23474 }, { "epoch": 0.6853814487168258, "grad_norm": 0.6677114051310321, "learning_rate": 1.7479318734793187e-06, "loss": 0.5511, "step": 23475 }, { "epoch": 0.6854106449446732, "grad_norm": 0.7187797627711238, "learning_rate": 1.7477696674776967e-06, "loss": 0.6442, "step": 23476 }, { "epoch": 0.6854398411725205, "grad_norm": 0.748704531468454, "learning_rate": 1.7476074614760747e-06, "loss": 0.6447, "step": 23477 }, { "epoch": 0.6854690374003679, "grad_norm": 0.7783763331240846, "learning_rate": 1.7474452554744527e-06, "loss": 0.7071, "step": 23478 }, { "epoch": 0.6854982336282153, "grad_norm": 0.6741070253746622, "learning_rate": 1.7472830494728307e-06, "loss": 0.587, "step": 23479 }, { "epoch": 0.6855274298560626, "grad_norm": 0.7098979416892987, "learning_rate": 1.7471208434712085e-06, "loss": 0.5928, "step": 23480 }, { "epoch": 0.68555662608391, "grad_norm": 0.7117521251086193, "learning_rate": 1.7469586374695865e-06, "loss": 0.5873, "step": 23481 }, { "epoch": 0.6855858223117574, "grad_norm": 0.745605771393611, "learning_rate": 1.7467964314679645e-06, "loss": 0.674, "step": 23482 }, { "epoch": 0.6856150185396047, "grad_norm": 0.742532783779435, "learning_rate": 1.7466342254663423e-06, "loss": 0.6867, "step": 23483 }, { "epoch": 0.6856442147674521, "grad_norm": 0.6782767297331329, "learning_rate": 1.7464720194647204e-06, "loss": 0.5949, "step": 23484 }, { "epoch": 0.6856734109952994, "grad_norm": 0.6982028644117019, "learning_rate": 1.7463098134630981e-06, "loss": 0.5909, "step": 23485 }, { "epoch": 0.6857026072231468, "grad_norm": 0.675584156313341, "learning_rate": 1.7461476074614762e-06, "loss": 0.568, "step": 23486 }, { "epoch": 0.6857318034509942, "grad_norm": 0.7064985120482696, "learning_rate": 1.7459854014598544e-06, "loss": 0.5945, "step": 23487 }, { "epoch": 0.6857609996788415, "grad_norm": 0.7964046976232891, "learning_rate": 1.7458231954582322e-06, "loss": 0.6772, "step": 23488 }, { "epoch": 0.6857901959066889, "grad_norm": 0.7732604855314581, "learning_rate": 1.7456609894566102e-06, "loss": 0.6565, "step": 23489 }, { "epoch": 0.6858193921345362, "grad_norm": 0.7254587682765891, "learning_rate": 1.745498783454988e-06, "loss": 0.5875, "step": 23490 }, { "epoch": 0.6858485883623836, "grad_norm": 0.7306982953975485, "learning_rate": 1.745336577453366e-06, "loss": 0.6316, "step": 23491 }, { "epoch": 0.685877784590231, "grad_norm": 0.7358501208153075, "learning_rate": 1.7451743714517438e-06, "loss": 0.6413, "step": 23492 }, { "epoch": 0.6859069808180783, "grad_norm": 0.7001092867518435, "learning_rate": 1.7450121654501218e-06, "loss": 0.6411, "step": 23493 }, { "epoch": 0.6859361770459257, "grad_norm": 0.7414930200056334, "learning_rate": 1.7448499594484996e-06, "loss": 0.6589, "step": 23494 }, { "epoch": 0.685965373273773, "grad_norm": 0.6752625984482449, "learning_rate": 1.7446877534468776e-06, "loss": 0.5939, "step": 23495 }, { "epoch": 0.6859945695016204, "grad_norm": 0.7443398046690775, "learning_rate": 1.7445255474452556e-06, "loss": 0.6369, "step": 23496 }, { "epoch": 0.6860237657294678, "grad_norm": 0.7494756245561681, "learning_rate": 1.7443633414436336e-06, "loss": 0.6582, "step": 23497 }, { "epoch": 0.6860529619573151, "grad_norm": 0.6740629486621756, "learning_rate": 1.7442011354420116e-06, "loss": 0.5688, "step": 23498 }, { "epoch": 0.6860821581851625, "grad_norm": 0.7852076888409455, "learning_rate": 1.7440389294403894e-06, "loss": 0.6959, "step": 23499 }, { "epoch": 0.6861113544130099, "grad_norm": 0.7284799598179796, "learning_rate": 1.7438767234387674e-06, "loss": 0.6129, "step": 23500 }, { "epoch": 0.6861405506408572, "grad_norm": 0.7126846438516896, "learning_rate": 1.7437145174371454e-06, "loss": 0.6078, "step": 23501 }, { "epoch": 0.6861697468687046, "grad_norm": 0.7793323358365942, "learning_rate": 1.7435523114355232e-06, "loss": 0.64, "step": 23502 }, { "epoch": 0.6861989430965519, "grad_norm": 0.6596231602128072, "learning_rate": 1.7433901054339012e-06, "loss": 0.4948, "step": 23503 }, { "epoch": 0.6862281393243993, "grad_norm": 0.6906826003048768, "learning_rate": 1.743227899432279e-06, "loss": 0.6276, "step": 23504 }, { "epoch": 0.6862573355522467, "grad_norm": 0.7536418922155335, "learning_rate": 1.743065693430657e-06, "loss": 0.7086, "step": 23505 }, { "epoch": 0.686286531780094, "grad_norm": 0.7630184994417128, "learning_rate": 1.7429034874290352e-06, "loss": 0.7013, "step": 23506 }, { "epoch": 0.6863157280079414, "grad_norm": 0.7451209094035999, "learning_rate": 1.742741281427413e-06, "loss": 0.6648, "step": 23507 }, { "epoch": 0.6863449242357887, "grad_norm": 0.79599615355172, "learning_rate": 1.742579075425791e-06, "loss": 0.7012, "step": 23508 }, { "epoch": 0.6863741204636361, "grad_norm": 0.7453019756976398, "learning_rate": 1.7424168694241688e-06, "loss": 0.6925, "step": 23509 }, { "epoch": 0.6864033166914835, "grad_norm": 0.7122251236266838, "learning_rate": 1.7422546634225468e-06, "loss": 0.6186, "step": 23510 }, { "epoch": 0.6864325129193308, "grad_norm": 0.6891704747617491, "learning_rate": 1.7420924574209246e-06, "loss": 0.6214, "step": 23511 }, { "epoch": 0.6864617091471782, "grad_norm": 0.7252661692948975, "learning_rate": 1.7419302514193026e-06, "loss": 0.6623, "step": 23512 }, { "epoch": 0.6864909053750256, "grad_norm": 0.7197143568602563, "learning_rate": 1.7417680454176804e-06, "loss": 0.5822, "step": 23513 }, { "epoch": 0.6865201016028729, "grad_norm": 0.6930650087195768, "learning_rate": 1.7416058394160584e-06, "loss": 0.5925, "step": 23514 }, { "epoch": 0.6865492978307203, "grad_norm": 0.7099238060242117, "learning_rate": 1.7414436334144364e-06, "loss": 0.6091, "step": 23515 }, { "epoch": 0.6865784940585676, "grad_norm": 0.7026651903227893, "learning_rate": 1.7412814274128145e-06, "loss": 0.5691, "step": 23516 }, { "epoch": 0.686607690286415, "grad_norm": 0.774027409404974, "learning_rate": 1.7411192214111925e-06, "loss": 0.7553, "step": 23517 }, { "epoch": 0.6866368865142624, "grad_norm": 0.7023093923744489, "learning_rate": 1.7409570154095703e-06, "loss": 0.6523, "step": 23518 }, { "epoch": 0.6866660827421097, "grad_norm": 0.7360777317710414, "learning_rate": 1.7407948094079483e-06, "loss": 0.6255, "step": 23519 }, { "epoch": 0.6866952789699571, "grad_norm": 0.7066473406169822, "learning_rate": 1.7406326034063263e-06, "loss": 0.606, "step": 23520 }, { "epoch": 0.6867244751978044, "grad_norm": 0.7527060357585191, "learning_rate": 1.740470397404704e-06, "loss": 0.6827, "step": 23521 }, { "epoch": 0.6867536714256518, "grad_norm": 0.7108359948372629, "learning_rate": 1.740308191403082e-06, "loss": 0.5883, "step": 23522 }, { "epoch": 0.6867828676534992, "grad_norm": 0.7898293197167648, "learning_rate": 1.7401459854014599e-06, "loss": 0.7325, "step": 23523 }, { "epoch": 0.6868120638813465, "grad_norm": 0.745627425467236, "learning_rate": 1.7399837793998379e-06, "loss": 0.6954, "step": 23524 }, { "epoch": 0.6868412601091939, "grad_norm": 0.7268881148712947, "learning_rate": 1.739821573398216e-06, "loss": 0.6772, "step": 23525 }, { "epoch": 0.6868704563370412, "grad_norm": 0.7440262932308341, "learning_rate": 1.7396593673965939e-06, "loss": 0.6712, "step": 23526 }, { "epoch": 0.6868996525648886, "grad_norm": 0.7288201274622329, "learning_rate": 1.7394971613949719e-06, "loss": 0.6445, "step": 23527 }, { "epoch": 0.686928848792736, "grad_norm": 0.7848159656279564, "learning_rate": 1.7393349553933497e-06, "loss": 0.6752, "step": 23528 }, { "epoch": 0.6869580450205833, "grad_norm": 0.7001027775691425, "learning_rate": 1.7391727493917277e-06, "loss": 0.6108, "step": 23529 }, { "epoch": 0.6869872412484307, "grad_norm": 0.7242793514194028, "learning_rate": 1.7390105433901055e-06, "loss": 0.6495, "step": 23530 }, { "epoch": 0.687016437476278, "grad_norm": 0.706108600103279, "learning_rate": 1.7388483373884835e-06, "loss": 0.5945, "step": 23531 }, { "epoch": 0.6870456337041254, "grad_norm": 0.6983255443328379, "learning_rate": 1.7386861313868613e-06, "loss": 0.5958, "step": 23532 }, { "epoch": 0.6870748299319728, "grad_norm": 0.7324562412133258, "learning_rate": 1.7385239253852393e-06, "loss": 0.6694, "step": 23533 }, { "epoch": 0.6871040261598201, "grad_norm": 0.6976757905001804, "learning_rate": 1.7383617193836175e-06, "loss": 0.6118, "step": 23534 }, { "epoch": 0.6871332223876675, "grad_norm": 0.6933732465458299, "learning_rate": 1.7381995133819953e-06, "loss": 0.5766, "step": 23535 }, { "epoch": 0.6871624186155149, "grad_norm": 0.7291030414876442, "learning_rate": 1.7380373073803733e-06, "loss": 0.6572, "step": 23536 }, { "epoch": 0.6871916148433622, "grad_norm": 0.7642534876733358, "learning_rate": 1.7378751013787511e-06, "loss": 0.7296, "step": 23537 }, { "epoch": 0.6872208110712096, "grad_norm": 0.6853244651168804, "learning_rate": 1.7377128953771291e-06, "loss": 0.5684, "step": 23538 }, { "epoch": 0.6872500072990569, "grad_norm": 0.7845820803135645, "learning_rate": 1.7375506893755071e-06, "loss": 0.7439, "step": 23539 }, { "epoch": 0.6872792035269043, "grad_norm": 0.6833284943771052, "learning_rate": 1.737388483373885e-06, "loss": 0.61, "step": 23540 }, { "epoch": 0.6873083997547517, "grad_norm": 0.7438754273993167, "learning_rate": 1.737226277372263e-06, "loss": 0.6187, "step": 23541 }, { "epoch": 0.687337595982599, "grad_norm": 0.7386204332338644, "learning_rate": 1.7370640713706407e-06, "loss": 0.6474, "step": 23542 }, { "epoch": 0.6873667922104464, "grad_norm": 0.711916741238062, "learning_rate": 1.7369018653690187e-06, "loss": 0.6565, "step": 23543 }, { "epoch": 0.6873959884382937, "grad_norm": 0.7216705204049868, "learning_rate": 1.736739659367397e-06, "loss": 0.6582, "step": 23544 }, { "epoch": 0.6874251846661411, "grad_norm": 0.7323863262508928, "learning_rate": 1.7365774533657747e-06, "loss": 0.6995, "step": 23545 }, { "epoch": 0.6874543808939885, "grad_norm": 0.7101402433846107, "learning_rate": 1.7364152473641527e-06, "loss": 0.6101, "step": 23546 }, { "epoch": 0.6874835771218358, "grad_norm": 0.7091510365013302, "learning_rate": 1.7362530413625305e-06, "loss": 0.637, "step": 23547 }, { "epoch": 0.6875127733496832, "grad_norm": 0.7360322971124986, "learning_rate": 1.7360908353609086e-06, "loss": 0.6504, "step": 23548 }, { "epoch": 0.6875419695775306, "grad_norm": 0.690676038208386, "learning_rate": 1.7359286293592863e-06, "loss": 0.6052, "step": 23549 }, { "epoch": 0.6875711658053779, "grad_norm": 0.7138478794872105, "learning_rate": 1.7357664233576644e-06, "loss": 0.6223, "step": 23550 }, { "epoch": 0.6876003620332253, "grad_norm": 0.7184280925445011, "learning_rate": 1.7356042173560421e-06, "loss": 0.6256, "step": 23551 }, { "epoch": 0.6876295582610726, "grad_norm": 0.6963239076275186, "learning_rate": 1.7354420113544202e-06, "loss": 0.6265, "step": 23552 }, { "epoch": 0.68765875448892, "grad_norm": 0.777271169436055, "learning_rate": 1.7352798053527984e-06, "loss": 0.6715, "step": 23553 }, { "epoch": 0.6876879507167674, "grad_norm": 0.7053866203542489, "learning_rate": 1.7351175993511762e-06, "loss": 0.601, "step": 23554 }, { "epoch": 0.6877171469446147, "grad_norm": 0.6797909327910474, "learning_rate": 1.7349553933495542e-06, "loss": 0.5946, "step": 23555 }, { "epoch": 0.6877463431724621, "grad_norm": 0.6874053846114963, "learning_rate": 1.734793187347932e-06, "loss": 0.6151, "step": 23556 }, { "epoch": 0.6877755394003094, "grad_norm": 0.7492660050115871, "learning_rate": 1.73463098134631e-06, "loss": 0.6722, "step": 23557 }, { "epoch": 0.6878047356281568, "grad_norm": 0.7807876728736693, "learning_rate": 1.734468775344688e-06, "loss": 0.7237, "step": 23558 }, { "epoch": 0.6878339318560042, "grad_norm": 0.7223411697112353, "learning_rate": 1.7343065693430658e-06, "loss": 0.6526, "step": 23559 }, { "epoch": 0.6878631280838515, "grad_norm": 0.7508257960964816, "learning_rate": 1.7341443633414438e-06, "loss": 0.6257, "step": 23560 }, { "epoch": 0.6878923243116989, "grad_norm": 0.7474899714082476, "learning_rate": 1.7339821573398216e-06, "loss": 0.6977, "step": 23561 }, { "epoch": 0.6879215205395462, "grad_norm": 0.746508116006119, "learning_rate": 1.7338199513381996e-06, "loss": 0.7001, "step": 23562 }, { "epoch": 0.6879507167673936, "grad_norm": 0.7788830381074224, "learning_rate": 1.7336577453365778e-06, "loss": 0.5802, "step": 23563 }, { "epoch": 0.687979912995241, "grad_norm": 0.7266196301331759, "learning_rate": 1.7334955393349556e-06, "loss": 0.632, "step": 23564 }, { "epoch": 0.6880091092230883, "grad_norm": 0.6836797857925996, "learning_rate": 1.7333333333333336e-06, "loss": 0.6173, "step": 23565 }, { "epoch": 0.6880383054509357, "grad_norm": 0.7184017728161233, "learning_rate": 1.7331711273317114e-06, "loss": 0.6309, "step": 23566 }, { "epoch": 0.6880675016787831, "grad_norm": 0.7309211304085024, "learning_rate": 1.7330089213300894e-06, "loss": 0.6455, "step": 23567 }, { "epoch": 0.6880966979066304, "grad_norm": 0.7136502806569768, "learning_rate": 1.7328467153284672e-06, "loss": 0.6466, "step": 23568 }, { "epoch": 0.6881258941344778, "grad_norm": 0.7658193033933833, "learning_rate": 1.7326845093268452e-06, "loss": 0.7502, "step": 23569 }, { "epoch": 0.6881550903623251, "grad_norm": 0.756163895926909, "learning_rate": 1.732522303325223e-06, "loss": 0.6722, "step": 23570 }, { "epoch": 0.6881842865901725, "grad_norm": 0.7219928475268445, "learning_rate": 1.732360097323601e-06, "loss": 0.6023, "step": 23571 }, { "epoch": 0.6882134828180199, "grad_norm": 0.7221591220432172, "learning_rate": 1.7321978913219792e-06, "loss": 0.6635, "step": 23572 }, { "epoch": 0.6882426790458672, "grad_norm": 0.6997343195582006, "learning_rate": 1.732035685320357e-06, "loss": 0.6458, "step": 23573 }, { "epoch": 0.6882718752737146, "grad_norm": 0.7440946807564986, "learning_rate": 1.731873479318735e-06, "loss": 0.6413, "step": 23574 }, { "epoch": 0.688301071501562, "grad_norm": 0.7696915759583297, "learning_rate": 1.7317112733171128e-06, "loss": 0.676, "step": 23575 }, { "epoch": 0.6883302677294093, "grad_norm": 0.717385776297564, "learning_rate": 1.7315490673154908e-06, "loss": 0.6564, "step": 23576 }, { "epoch": 0.6883594639572567, "grad_norm": 0.7322614183981516, "learning_rate": 1.7313868613138688e-06, "loss": 0.6972, "step": 23577 }, { "epoch": 0.6883886601851041, "grad_norm": 0.7141629965954565, "learning_rate": 1.7312246553122466e-06, "loss": 0.6403, "step": 23578 }, { "epoch": 0.6884178564129515, "grad_norm": 0.7384613771283088, "learning_rate": 1.7310624493106246e-06, "loss": 0.6287, "step": 23579 }, { "epoch": 0.6884470526407989, "grad_norm": 0.7495730087348689, "learning_rate": 1.7309002433090024e-06, "loss": 0.6764, "step": 23580 }, { "epoch": 0.6884762488686462, "grad_norm": 0.7235450421482832, "learning_rate": 1.7307380373073804e-06, "loss": 0.6198, "step": 23581 }, { "epoch": 0.6885054450964936, "grad_norm": 0.7497286990353731, "learning_rate": 1.7305758313057587e-06, "loss": 0.6707, "step": 23582 }, { "epoch": 0.688534641324341, "grad_norm": 0.7293854108455072, "learning_rate": 1.7304136253041365e-06, "loss": 0.6333, "step": 23583 }, { "epoch": 0.6885638375521883, "grad_norm": 0.790895698317696, "learning_rate": 1.7302514193025145e-06, "loss": 0.6468, "step": 23584 }, { "epoch": 0.6885930337800357, "grad_norm": 0.7794130065716064, "learning_rate": 1.7300892133008923e-06, "loss": 0.7322, "step": 23585 }, { "epoch": 0.688622230007883, "grad_norm": 0.7151373935565577, "learning_rate": 1.7299270072992703e-06, "loss": 0.572, "step": 23586 }, { "epoch": 0.6886514262357304, "grad_norm": 0.7835376056285883, "learning_rate": 1.729764801297648e-06, "loss": 0.7437, "step": 23587 }, { "epoch": 0.6886806224635778, "grad_norm": 0.7325618787698972, "learning_rate": 1.729602595296026e-06, "loss": 0.6304, "step": 23588 }, { "epoch": 0.6887098186914251, "grad_norm": 0.7284283784192153, "learning_rate": 1.7294403892944039e-06, "loss": 0.6343, "step": 23589 }, { "epoch": 0.6887390149192725, "grad_norm": 0.7680670099214971, "learning_rate": 1.7292781832927819e-06, "loss": 0.6093, "step": 23590 }, { "epoch": 0.6887682111471198, "grad_norm": 0.7762737431346335, "learning_rate": 1.72911597729116e-06, "loss": 0.7123, "step": 23591 }, { "epoch": 0.6887974073749672, "grad_norm": 0.7684636910713334, "learning_rate": 1.7289537712895379e-06, "loss": 0.7038, "step": 23592 }, { "epoch": 0.6888266036028146, "grad_norm": 0.7207904854562065, "learning_rate": 1.7287915652879159e-06, "loss": 0.6002, "step": 23593 }, { "epoch": 0.6888557998306619, "grad_norm": 0.7051717538553014, "learning_rate": 1.7286293592862937e-06, "loss": 0.5922, "step": 23594 }, { "epoch": 0.6888849960585093, "grad_norm": 0.7430805079452338, "learning_rate": 1.7284671532846717e-06, "loss": 0.6373, "step": 23595 }, { "epoch": 0.6889141922863566, "grad_norm": 0.7139012083333831, "learning_rate": 1.7283049472830495e-06, "loss": 0.6096, "step": 23596 }, { "epoch": 0.688943388514204, "grad_norm": 0.6235196184448925, "learning_rate": 1.7281427412814275e-06, "loss": 0.4584, "step": 23597 }, { "epoch": 0.6889725847420514, "grad_norm": 0.6902431066021795, "learning_rate": 1.7279805352798055e-06, "loss": 0.5911, "step": 23598 }, { "epoch": 0.6890017809698987, "grad_norm": 0.714594475466731, "learning_rate": 1.7278183292781833e-06, "loss": 0.6309, "step": 23599 }, { "epoch": 0.6890309771977461, "grad_norm": 0.7646475595413766, "learning_rate": 1.7276561232765613e-06, "loss": 0.698, "step": 23600 }, { "epoch": 0.6890601734255934, "grad_norm": 0.7577146336105157, "learning_rate": 1.7274939172749395e-06, "loss": 0.7199, "step": 23601 }, { "epoch": 0.6890893696534408, "grad_norm": 0.6190603811327233, "learning_rate": 1.7273317112733173e-06, "loss": 0.5191, "step": 23602 }, { "epoch": 0.6891185658812882, "grad_norm": 0.7396262645346862, "learning_rate": 1.7271695052716953e-06, "loss": 0.6517, "step": 23603 }, { "epoch": 0.6891477621091355, "grad_norm": 0.7599672492141848, "learning_rate": 1.7270072992700731e-06, "loss": 0.7054, "step": 23604 }, { "epoch": 0.6891769583369829, "grad_norm": 0.6997987399155576, "learning_rate": 1.7268450932684511e-06, "loss": 0.6733, "step": 23605 }, { "epoch": 0.6892061545648303, "grad_norm": 0.7084161743738914, "learning_rate": 1.726682887266829e-06, "loss": 0.5966, "step": 23606 }, { "epoch": 0.6892353507926776, "grad_norm": 0.7161328561086472, "learning_rate": 1.726520681265207e-06, "loss": 0.6538, "step": 23607 }, { "epoch": 0.689264547020525, "grad_norm": 0.6760326347065257, "learning_rate": 1.7263584752635847e-06, "loss": 0.5381, "step": 23608 }, { "epoch": 0.6892937432483723, "grad_norm": 0.7172675719764562, "learning_rate": 1.7261962692619627e-06, "loss": 0.6393, "step": 23609 }, { "epoch": 0.6893229394762197, "grad_norm": 0.7502564323318887, "learning_rate": 1.726034063260341e-06, "loss": 0.6908, "step": 23610 }, { "epoch": 0.6893521357040671, "grad_norm": 0.6577230039213935, "learning_rate": 1.7258718572587187e-06, "loss": 0.5054, "step": 23611 }, { "epoch": 0.6893813319319144, "grad_norm": 0.791543432099758, "learning_rate": 1.7257096512570968e-06, "loss": 0.6291, "step": 23612 }, { "epoch": 0.6894105281597618, "grad_norm": 0.7268999246290566, "learning_rate": 1.7255474452554745e-06, "loss": 0.6352, "step": 23613 }, { "epoch": 0.6894397243876091, "grad_norm": 0.707244684435573, "learning_rate": 1.7253852392538526e-06, "loss": 0.6225, "step": 23614 }, { "epoch": 0.6894689206154565, "grad_norm": 0.7635324841865846, "learning_rate": 1.7252230332522303e-06, "loss": 0.6527, "step": 23615 }, { "epoch": 0.6894981168433039, "grad_norm": 1.1081539963337945, "learning_rate": 1.7250608272506084e-06, "loss": 0.63, "step": 23616 }, { "epoch": 0.6895273130711512, "grad_norm": 0.7359556879254537, "learning_rate": 1.7248986212489864e-06, "loss": 0.655, "step": 23617 }, { "epoch": 0.6895565092989986, "grad_norm": 0.8833426266981984, "learning_rate": 1.7247364152473642e-06, "loss": 0.6889, "step": 23618 }, { "epoch": 0.689585705526846, "grad_norm": 0.7266651816108949, "learning_rate": 1.7245742092457424e-06, "loss": 0.624, "step": 23619 }, { "epoch": 0.6896149017546933, "grad_norm": 0.6772031784951839, "learning_rate": 1.7244120032441204e-06, "loss": 0.5317, "step": 23620 }, { "epoch": 0.6896440979825407, "grad_norm": 0.6670531245337744, "learning_rate": 1.7242497972424982e-06, "loss": 0.6028, "step": 23621 }, { "epoch": 0.689673294210388, "grad_norm": 0.7650459863550282, "learning_rate": 1.7240875912408762e-06, "loss": 0.6676, "step": 23622 }, { "epoch": 0.6897024904382354, "grad_norm": 0.716252249624542, "learning_rate": 1.723925385239254e-06, "loss": 0.6263, "step": 23623 }, { "epoch": 0.6897316866660828, "grad_norm": 0.8627258230881144, "learning_rate": 1.723763179237632e-06, "loss": 0.6975, "step": 23624 }, { "epoch": 0.6897608828939301, "grad_norm": 0.6911643008207446, "learning_rate": 1.7236009732360098e-06, "loss": 0.6146, "step": 23625 }, { "epoch": 0.6897900791217775, "grad_norm": 0.7550421885993831, "learning_rate": 1.7234387672343878e-06, "loss": 0.702, "step": 23626 }, { "epoch": 0.6898192753496248, "grad_norm": 0.7321215710086775, "learning_rate": 1.7232765612327656e-06, "loss": 0.6967, "step": 23627 }, { "epoch": 0.6898484715774722, "grad_norm": 0.6710733536609508, "learning_rate": 1.7231143552311436e-06, "loss": 0.575, "step": 23628 }, { "epoch": 0.6898776678053196, "grad_norm": 0.7908350691212471, "learning_rate": 1.7229521492295218e-06, "loss": 0.7046, "step": 23629 }, { "epoch": 0.6899068640331669, "grad_norm": 0.7516488544644916, "learning_rate": 1.7227899432278996e-06, "loss": 0.6868, "step": 23630 }, { "epoch": 0.6899360602610143, "grad_norm": 0.7414563110156608, "learning_rate": 1.7226277372262776e-06, "loss": 0.6642, "step": 23631 }, { "epoch": 0.6899652564888616, "grad_norm": 0.7799722021360882, "learning_rate": 1.7224655312246554e-06, "loss": 0.7264, "step": 23632 }, { "epoch": 0.689994452716709, "grad_norm": 0.8012862039478125, "learning_rate": 1.7223033252230334e-06, "loss": 0.7582, "step": 23633 }, { "epoch": 0.6900236489445564, "grad_norm": 0.7175862349542085, "learning_rate": 1.7221411192214112e-06, "loss": 0.632, "step": 23634 }, { "epoch": 0.6900528451724037, "grad_norm": 0.7128958064107758, "learning_rate": 1.7219789132197892e-06, "loss": 0.623, "step": 23635 }, { "epoch": 0.6900820414002511, "grad_norm": 0.7656021772586992, "learning_rate": 1.7218167072181672e-06, "loss": 0.6408, "step": 23636 }, { "epoch": 0.6901112376280985, "grad_norm": 0.7498927006175635, "learning_rate": 1.721654501216545e-06, "loss": 0.6619, "step": 23637 }, { "epoch": 0.6901404338559458, "grad_norm": 0.6553196838953634, "learning_rate": 1.7214922952149232e-06, "loss": 0.5017, "step": 23638 }, { "epoch": 0.6901696300837932, "grad_norm": 0.7501277515356449, "learning_rate": 1.7213300892133012e-06, "loss": 0.6767, "step": 23639 }, { "epoch": 0.6901988263116405, "grad_norm": 0.7578683448867515, "learning_rate": 1.721167883211679e-06, "loss": 0.6797, "step": 23640 }, { "epoch": 0.6902280225394879, "grad_norm": 0.6894399134321312, "learning_rate": 1.721005677210057e-06, "loss": 0.5681, "step": 23641 }, { "epoch": 0.6902572187673353, "grad_norm": 0.7870121851555744, "learning_rate": 1.7208434712084348e-06, "loss": 0.6439, "step": 23642 }, { "epoch": 0.6902864149951826, "grad_norm": 0.7016201314089237, "learning_rate": 1.7206812652068128e-06, "loss": 0.6245, "step": 23643 }, { "epoch": 0.69031561122303, "grad_norm": 0.7302295458052144, "learning_rate": 1.7205190592051906e-06, "loss": 0.6549, "step": 23644 }, { "epoch": 0.6903448074508773, "grad_norm": 0.723002299534697, "learning_rate": 1.7203568532035686e-06, "loss": 0.6643, "step": 23645 }, { "epoch": 0.6903740036787247, "grad_norm": 0.7069751009817756, "learning_rate": 1.7201946472019464e-06, "loss": 0.5942, "step": 23646 }, { "epoch": 0.6904031999065721, "grad_norm": 0.7052474418705962, "learning_rate": 1.7200324412003244e-06, "loss": 0.6994, "step": 23647 }, { "epoch": 0.6904323961344194, "grad_norm": 0.7332046968697576, "learning_rate": 1.7198702351987027e-06, "loss": 0.6324, "step": 23648 }, { "epoch": 0.6904615923622668, "grad_norm": 0.6594635413553991, "learning_rate": 1.7197080291970805e-06, "loss": 0.5582, "step": 23649 }, { "epoch": 0.6904907885901141, "grad_norm": 0.7675887698912662, "learning_rate": 1.7195458231954585e-06, "loss": 0.7173, "step": 23650 }, { "epoch": 0.6905199848179615, "grad_norm": 0.7588593435670353, "learning_rate": 1.7193836171938363e-06, "loss": 0.6217, "step": 23651 }, { "epoch": 0.6905491810458089, "grad_norm": 0.7323194337388163, "learning_rate": 1.7192214111922143e-06, "loss": 0.6095, "step": 23652 }, { "epoch": 0.6905783772736562, "grad_norm": 0.7768000682588249, "learning_rate": 1.719059205190592e-06, "loss": 0.6436, "step": 23653 }, { "epoch": 0.6906075735015036, "grad_norm": 0.6683306612753613, "learning_rate": 1.71889699918897e-06, "loss": 0.5674, "step": 23654 }, { "epoch": 0.690636769729351, "grad_norm": 0.7504143104089401, "learning_rate": 1.718734793187348e-06, "loss": 0.6709, "step": 23655 }, { "epoch": 0.6906659659571983, "grad_norm": 0.733252234040747, "learning_rate": 1.7185725871857259e-06, "loss": 0.664, "step": 23656 }, { "epoch": 0.6906951621850457, "grad_norm": 0.7085227062738249, "learning_rate": 1.718410381184104e-06, "loss": 0.6222, "step": 23657 }, { "epoch": 0.690724358412893, "grad_norm": 0.750406423162109, "learning_rate": 1.7182481751824819e-06, "loss": 0.6502, "step": 23658 }, { "epoch": 0.6907535546407404, "grad_norm": 0.7101711390904667, "learning_rate": 1.71808596918086e-06, "loss": 0.651, "step": 23659 }, { "epoch": 0.6907827508685878, "grad_norm": 0.6938684525844668, "learning_rate": 1.717923763179238e-06, "loss": 0.6435, "step": 23660 }, { "epoch": 0.6908119470964351, "grad_norm": 0.757736782232961, "learning_rate": 1.7177615571776157e-06, "loss": 0.6221, "step": 23661 }, { "epoch": 0.6908411433242825, "grad_norm": 0.755785578221167, "learning_rate": 1.7175993511759937e-06, "loss": 0.6412, "step": 23662 }, { "epoch": 0.6908703395521298, "grad_norm": 0.7564610471023143, "learning_rate": 1.7174371451743715e-06, "loss": 0.7299, "step": 23663 }, { "epoch": 0.6908995357799772, "grad_norm": 0.7445455595725425, "learning_rate": 1.7172749391727495e-06, "loss": 0.6719, "step": 23664 }, { "epoch": 0.6909287320078246, "grad_norm": 0.675010882231025, "learning_rate": 1.7171127331711273e-06, "loss": 0.5671, "step": 23665 }, { "epoch": 0.6909579282356719, "grad_norm": 0.7021548776865664, "learning_rate": 1.7169505271695053e-06, "loss": 0.5946, "step": 23666 }, { "epoch": 0.6909871244635193, "grad_norm": 0.7335535070720254, "learning_rate": 1.7167883211678835e-06, "loss": 0.6395, "step": 23667 }, { "epoch": 0.6910163206913666, "grad_norm": 0.7309717952380886, "learning_rate": 1.7166261151662613e-06, "loss": 0.6345, "step": 23668 }, { "epoch": 0.691045516919214, "grad_norm": 0.7033638623084274, "learning_rate": 1.7164639091646393e-06, "loss": 0.5662, "step": 23669 }, { "epoch": 0.6910747131470614, "grad_norm": 0.6757713858766096, "learning_rate": 1.7163017031630171e-06, "loss": 0.5928, "step": 23670 }, { "epoch": 0.6911039093749087, "grad_norm": 0.7337178437178801, "learning_rate": 1.7161394971613951e-06, "loss": 0.5947, "step": 23671 }, { "epoch": 0.6911331056027561, "grad_norm": 0.7363549061451782, "learning_rate": 1.715977291159773e-06, "loss": 0.6404, "step": 23672 }, { "epoch": 0.6911623018306035, "grad_norm": 0.7651232823993123, "learning_rate": 1.715815085158151e-06, "loss": 0.679, "step": 23673 }, { "epoch": 0.6911914980584508, "grad_norm": 0.6605659121921331, "learning_rate": 1.715652879156529e-06, "loss": 0.5614, "step": 23674 }, { "epoch": 0.6912206942862982, "grad_norm": 0.7114206594598583, "learning_rate": 1.7154906731549067e-06, "loss": 0.6167, "step": 23675 }, { "epoch": 0.6912498905141455, "grad_norm": 0.7620124503601322, "learning_rate": 1.715328467153285e-06, "loss": 0.6709, "step": 23676 }, { "epoch": 0.6912790867419929, "grad_norm": 0.7492303203553716, "learning_rate": 1.7151662611516627e-06, "loss": 0.6933, "step": 23677 }, { "epoch": 0.6913082829698403, "grad_norm": 0.7282985119256061, "learning_rate": 1.7150040551500408e-06, "loss": 0.6681, "step": 23678 }, { "epoch": 0.6913374791976876, "grad_norm": 0.7080088681826469, "learning_rate": 1.7148418491484188e-06, "loss": 0.6259, "step": 23679 }, { "epoch": 0.691366675425535, "grad_norm": 0.7291947549604806, "learning_rate": 1.7146796431467966e-06, "loss": 0.6374, "step": 23680 }, { "epoch": 0.6913958716533823, "grad_norm": 0.7021430487474724, "learning_rate": 1.7145174371451746e-06, "loss": 0.6263, "step": 23681 }, { "epoch": 0.6914250678812297, "grad_norm": 0.7418549351974815, "learning_rate": 1.7143552311435524e-06, "loss": 0.6209, "step": 23682 }, { "epoch": 0.6914542641090771, "grad_norm": 0.7380198635281698, "learning_rate": 1.7141930251419304e-06, "loss": 0.6299, "step": 23683 }, { "epoch": 0.6914834603369244, "grad_norm": 0.6873699623006873, "learning_rate": 1.7140308191403082e-06, "loss": 0.617, "step": 23684 }, { "epoch": 0.6915126565647718, "grad_norm": 0.7711040376769148, "learning_rate": 1.7138686131386864e-06, "loss": 0.6225, "step": 23685 }, { "epoch": 0.6915418527926191, "grad_norm": 0.7083415742114443, "learning_rate": 1.7137064071370644e-06, "loss": 0.5683, "step": 23686 }, { "epoch": 0.6915710490204665, "grad_norm": 20.737661426539272, "learning_rate": 1.7135442011354422e-06, "loss": 1.2285, "step": 23687 }, { "epoch": 0.6916002452483139, "grad_norm": 0.7441102113974545, "learning_rate": 1.7133819951338202e-06, "loss": 0.6446, "step": 23688 }, { "epoch": 0.6916294414761612, "grad_norm": 0.6831437951365483, "learning_rate": 1.713219789132198e-06, "loss": 0.5621, "step": 23689 }, { "epoch": 0.6916586377040086, "grad_norm": 0.8217512237917917, "learning_rate": 1.713057583130576e-06, "loss": 0.5939, "step": 23690 }, { "epoch": 0.691687833931856, "grad_norm": 0.6911410330119588, "learning_rate": 1.7128953771289538e-06, "loss": 0.5493, "step": 23691 }, { "epoch": 0.6917170301597033, "grad_norm": 0.8250625841712862, "learning_rate": 1.7127331711273318e-06, "loss": 0.7772, "step": 23692 }, { "epoch": 0.6917462263875507, "grad_norm": 0.7191456834227343, "learning_rate": 1.7125709651257098e-06, "loss": 0.6441, "step": 23693 }, { "epoch": 0.691775422615398, "grad_norm": 0.6955354292453664, "learning_rate": 1.7124087591240876e-06, "loss": 0.5915, "step": 23694 }, { "epoch": 0.6918046188432454, "grad_norm": 0.7327161585857546, "learning_rate": 1.7122465531224658e-06, "loss": 0.6328, "step": 23695 }, { "epoch": 0.6918338150710928, "grad_norm": 0.7332240936402324, "learning_rate": 1.7120843471208436e-06, "loss": 0.6571, "step": 23696 }, { "epoch": 0.6918630112989401, "grad_norm": 0.7120976378860606, "learning_rate": 1.7119221411192216e-06, "loss": 0.5935, "step": 23697 }, { "epoch": 0.6918922075267875, "grad_norm": 0.6957365698318054, "learning_rate": 1.7117599351175996e-06, "loss": 0.5899, "step": 23698 }, { "epoch": 0.691921403754635, "grad_norm": 0.6995281350440085, "learning_rate": 1.7115977291159774e-06, "loss": 0.5987, "step": 23699 }, { "epoch": 0.6919505999824823, "grad_norm": 0.7184815661673676, "learning_rate": 1.7114355231143554e-06, "loss": 0.5981, "step": 23700 }, { "epoch": 0.6919797962103297, "grad_norm": 0.6783281048655981, "learning_rate": 1.7112733171127332e-06, "loss": 0.592, "step": 23701 }, { "epoch": 0.692008992438177, "grad_norm": 0.7914943697558461, "learning_rate": 1.7111111111111112e-06, "loss": 0.6426, "step": 23702 }, { "epoch": 0.6920381886660244, "grad_norm": 0.6804137208765623, "learning_rate": 1.710948905109489e-06, "loss": 0.5832, "step": 23703 }, { "epoch": 0.6920673848938718, "grad_norm": 0.8160078640530156, "learning_rate": 1.7107866991078672e-06, "loss": 0.6752, "step": 23704 }, { "epoch": 0.6920965811217191, "grad_norm": 0.7333536621239386, "learning_rate": 1.7106244931062452e-06, "loss": 0.6213, "step": 23705 }, { "epoch": 0.6921257773495665, "grad_norm": 0.6836084178912869, "learning_rate": 1.710462287104623e-06, "loss": 0.5751, "step": 23706 }, { "epoch": 0.6921549735774138, "grad_norm": 0.7425311096302794, "learning_rate": 1.710300081103001e-06, "loss": 0.5805, "step": 23707 }, { "epoch": 0.6921841698052612, "grad_norm": 0.6823372956190792, "learning_rate": 1.7101378751013788e-06, "loss": 0.5647, "step": 23708 }, { "epoch": 0.6922133660331086, "grad_norm": 0.6793175454693943, "learning_rate": 1.7099756690997568e-06, "loss": 0.525, "step": 23709 }, { "epoch": 0.6922425622609559, "grad_norm": 0.8945377191766702, "learning_rate": 1.7098134630981346e-06, "loss": 0.6312, "step": 23710 }, { "epoch": 0.6922717584888033, "grad_norm": 0.7429793441296807, "learning_rate": 1.7096512570965126e-06, "loss": 0.6722, "step": 23711 }, { "epoch": 0.6923009547166507, "grad_norm": 0.7723544836281793, "learning_rate": 1.7094890510948907e-06, "loss": 0.6764, "step": 23712 }, { "epoch": 0.692330150944498, "grad_norm": 0.7068005532903496, "learning_rate": 1.7093268450932684e-06, "loss": 0.6164, "step": 23713 }, { "epoch": 0.6923593471723454, "grad_norm": 0.7037609801070035, "learning_rate": 1.7091646390916467e-06, "loss": 0.644, "step": 23714 }, { "epoch": 0.6923885434001927, "grad_norm": 0.7678898060316774, "learning_rate": 1.7090024330900245e-06, "loss": 0.6615, "step": 23715 }, { "epoch": 0.6924177396280401, "grad_norm": 0.7979211087489311, "learning_rate": 1.7088402270884025e-06, "loss": 0.7384, "step": 23716 }, { "epoch": 0.6924469358558875, "grad_norm": 0.6992463047774999, "learning_rate": 1.7086780210867805e-06, "loss": 0.5806, "step": 23717 }, { "epoch": 0.6924761320837348, "grad_norm": 0.7508489214534926, "learning_rate": 1.7085158150851583e-06, "loss": 0.6872, "step": 23718 }, { "epoch": 0.6925053283115822, "grad_norm": 0.7059212202028966, "learning_rate": 1.7083536090835363e-06, "loss": 0.6239, "step": 23719 }, { "epoch": 0.6925345245394295, "grad_norm": 0.7386518085289507, "learning_rate": 1.708191403081914e-06, "loss": 0.6667, "step": 23720 }, { "epoch": 0.6925637207672769, "grad_norm": 0.6934692587086612, "learning_rate": 1.708029197080292e-06, "loss": 0.6141, "step": 23721 }, { "epoch": 0.6925929169951243, "grad_norm": 0.8112097082945343, "learning_rate": 1.7078669910786699e-06, "loss": 0.7089, "step": 23722 }, { "epoch": 0.6926221132229716, "grad_norm": 0.6816360596011225, "learning_rate": 1.707704785077048e-06, "loss": 0.5565, "step": 23723 }, { "epoch": 0.692651309450819, "grad_norm": 0.7045469818268607, "learning_rate": 1.707542579075426e-06, "loss": 0.6411, "step": 23724 }, { "epoch": 0.6926805056786663, "grad_norm": 0.7980220998965285, "learning_rate": 1.707380373073804e-06, "loss": 0.74, "step": 23725 }, { "epoch": 0.6927097019065137, "grad_norm": 0.728612031826845, "learning_rate": 1.707218167072182e-06, "loss": 0.6388, "step": 23726 }, { "epoch": 0.6927388981343611, "grad_norm": 0.7383236635512694, "learning_rate": 1.7070559610705597e-06, "loss": 0.6842, "step": 23727 }, { "epoch": 0.6927680943622084, "grad_norm": 0.7188504255052521, "learning_rate": 1.7068937550689377e-06, "loss": 0.6212, "step": 23728 }, { "epoch": 0.6927972905900558, "grad_norm": 0.7874747353558473, "learning_rate": 1.7067315490673155e-06, "loss": 0.7212, "step": 23729 }, { "epoch": 0.6928264868179032, "grad_norm": 0.7245654606101821, "learning_rate": 1.7065693430656935e-06, "loss": 0.6478, "step": 23730 }, { "epoch": 0.6928556830457505, "grad_norm": 0.6536775545832599, "learning_rate": 1.7064071370640713e-06, "loss": 0.5641, "step": 23731 }, { "epoch": 0.6928848792735979, "grad_norm": 0.7914483799457245, "learning_rate": 1.7062449310624493e-06, "loss": 0.6425, "step": 23732 }, { "epoch": 0.6929140755014452, "grad_norm": 0.7678952605668937, "learning_rate": 1.7060827250608275e-06, "loss": 0.7618, "step": 23733 }, { "epoch": 0.6929432717292926, "grad_norm": 0.7261215320814823, "learning_rate": 1.7059205190592053e-06, "loss": 0.6363, "step": 23734 }, { "epoch": 0.69297246795714, "grad_norm": 0.8042190388550973, "learning_rate": 1.7057583130575833e-06, "loss": 0.6141, "step": 23735 }, { "epoch": 0.6930016641849873, "grad_norm": 0.6959597103273083, "learning_rate": 1.7055961070559613e-06, "loss": 0.6045, "step": 23736 }, { "epoch": 0.6930308604128347, "grad_norm": 0.7675912799104833, "learning_rate": 1.7054339010543391e-06, "loss": 0.646, "step": 23737 }, { "epoch": 0.693060056640682, "grad_norm": 0.6713764685297754, "learning_rate": 1.7052716950527171e-06, "loss": 0.5821, "step": 23738 }, { "epoch": 0.6930892528685294, "grad_norm": 0.7501315067015786, "learning_rate": 1.705109489051095e-06, "loss": 0.6504, "step": 23739 }, { "epoch": 0.6931184490963768, "grad_norm": 0.7022047979291528, "learning_rate": 1.704947283049473e-06, "loss": 0.6079, "step": 23740 }, { "epoch": 0.6931476453242241, "grad_norm": 0.6879495022596669, "learning_rate": 1.7047850770478507e-06, "loss": 0.6138, "step": 23741 }, { "epoch": 0.6931768415520715, "grad_norm": 0.7055698135738434, "learning_rate": 1.704622871046229e-06, "loss": 0.5941, "step": 23742 }, { "epoch": 0.6932060377799188, "grad_norm": 0.7255062863735228, "learning_rate": 1.704460665044607e-06, "loss": 0.6251, "step": 23743 }, { "epoch": 0.6932352340077662, "grad_norm": 0.7408533535006172, "learning_rate": 1.7042984590429848e-06, "loss": 0.5541, "step": 23744 }, { "epoch": 0.6932644302356136, "grad_norm": 0.7439317831977638, "learning_rate": 1.7041362530413628e-06, "loss": 0.6577, "step": 23745 }, { "epoch": 0.6932936264634609, "grad_norm": 0.7395302693379063, "learning_rate": 1.7039740470397406e-06, "loss": 0.7078, "step": 23746 }, { "epoch": 0.6933228226913083, "grad_norm": 0.7482496571686589, "learning_rate": 1.7038118410381186e-06, "loss": 0.7133, "step": 23747 }, { "epoch": 0.6933520189191557, "grad_norm": 0.7259131197538466, "learning_rate": 1.7036496350364964e-06, "loss": 0.6251, "step": 23748 }, { "epoch": 0.693381215147003, "grad_norm": 0.7473164905915886, "learning_rate": 1.7034874290348744e-06, "loss": 0.6619, "step": 23749 }, { "epoch": 0.6934104113748504, "grad_norm": 0.7267549540669379, "learning_rate": 1.7033252230332522e-06, "loss": 0.6824, "step": 23750 }, { "epoch": 0.6934396076026977, "grad_norm": 0.7884229247697946, "learning_rate": 1.7031630170316302e-06, "loss": 0.7674, "step": 23751 }, { "epoch": 0.6934688038305451, "grad_norm": 0.6887389451105532, "learning_rate": 1.7030008110300084e-06, "loss": 0.6071, "step": 23752 }, { "epoch": 0.6934980000583925, "grad_norm": 0.8737592627175401, "learning_rate": 1.7028386050283862e-06, "loss": 0.6092, "step": 23753 }, { "epoch": 0.6935271962862398, "grad_norm": 0.7163331422935838, "learning_rate": 1.7026763990267642e-06, "loss": 0.6465, "step": 23754 }, { "epoch": 0.6935563925140872, "grad_norm": 0.6719103176400963, "learning_rate": 1.7025141930251422e-06, "loss": 0.5823, "step": 23755 }, { "epoch": 0.6935855887419345, "grad_norm": 0.7805117096522136, "learning_rate": 1.70235198702352e-06, "loss": 0.721, "step": 23756 }, { "epoch": 0.6936147849697819, "grad_norm": 0.7083705618424109, "learning_rate": 1.702189781021898e-06, "loss": 0.6382, "step": 23757 }, { "epoch": 0.6936439811976293, "grad_norm": 0.7651143996867023, "learning_rate": 1.7020275750202758e-06, "loss": 0.5331, "step": 23758 }, { "epoch": 0.6936731774254766, "grad_norm": 0.7556002830824491, "learning_rate": 1.7018653690186538e-06, "loss": 0.639, "step": 23759 }, { "epoch": 0.693702373653324, "grad_norm": 0.7883363539197846, "learning_rate": 1.7017031630170316e-06, "loss": 0.7022, "step": 23760 }, { "epoch": 0.6937315698811714, "grad_norm": 0.8083038018074086, "learning_rate": 1.7015409570154098e-06, "loss": 0.8052, "step": 23761 }, { "epoch": 0.6937607661090187, "grad_norm": 0.7492828005763281, "learning_rate": 1.7013787510137878e-06, "loss": 0.736, "step": 23762 }, { "epoch": 0.6937899623368661, "grad_norm": 0.7619550334536743, "learning_rate": 1.7012165450121656e-06, "loss": 0.6818, "step": 23763 }, { "epoch": 0.6938191585647134, "grad_norm": 0.71183033526836, "learning_rate": 1.7010543390105436e-06, "loss": 0.5889, "step": 23764 }, { "epoch": 0.6938483547925608, "grad_norm": 0.7335006586004902, "learning_rate": 1.7008921330089214e-06, "loss": 0.6688, "step": 23765 }, { "epoch": 0.6938775510204082, "grad_norm": 0.7931393882609709, "learning_rate": 1.7007299270072994e-06, "loss": 0.6923, "step": 23766 }, { "epoch": 0.6939067472482555, "grad_norm": 0.6993150673524346, "learning_rate": 1.7005677210056772e-06, "loss": 0.607, "step": 23767 }, { "epoch": 0.6939359434761029, "grad_norm": 0.6931303847554751, "learning_rate": 1.7004055150040552e-06, "loss": 0.6191, "step": 23768 }, { "epoch": 0.6939651397039502, "grad_norm": 0.7487289137693088, "learning_rate": 1.700243309002433e-06, "loss": 0.6781, "step": 23769 }, { "epoch": 0.6939943359317976, "grad_norm": 0.722179241726636, "learning_rate": 1.7000811030008112e-06, "loss": 0.5659, "step": 23770 }, { "epoch": 0.694023532159645, "grad_norm": 0.7486066675495291, "learning_rate": 1.6999188969991892e-06, "loss": 0.6808, "step": 23771 }, { "epoch": 0.6940527283874923, "grad_norm": 0.7045919915109862, "learning_rate": 1.699756690997567e-06, "loss": 0.6714, "step": 23772 }, { "epoch": 0.6940819246153397, "grad_norm": 0.7298821238408089, "learning_rate": 1.699594484995945e-06, "loss": 0.6616, "step": 23773 }, { "epoch": 0.694111120843187, "grad_norm": 0.6958235145822597, "learning_rate": 1.699432278994323e-06, "loss": 0.6031, "step": 23774 }, { "epoch": 0.6941403170710344, "grad_norm": 0.8049127486243668, "learning_rate": 1.6992700729927008e-06, "loss": 0.6679, "step": 23775 }, { "epoch": 0.6941695132988818, "grad_norm": 0.7166509141497458, "learning_rate": 1.6991078669910789e-06, "loss": 0.6505, "step": 23776 }, { "epoch": 0.6941987095267291, "grad_norm": 0.7125262414752983, "learning_rate": 1.6989456609894566e-06, "loss": 0.627, "step": 23777 }, { "epoch": 0.6942279057545765, "grad_norm": 0.670613423513358, "learning_rate": 1.6987834549878347e-06, "loss": 0.5641, "step": 23778 }, { "epoch": 0.6942571019824239, "grad_norm": 0.7416045322411948, "learning_rate": 1.6986212489862125e-06, "loss": 0.6575, "step": 23779 }, { "epoch": 0.6942862982102712, "grad_norm": 0.7190752904091857, "learning_rate": 1.6984590429845907e-06, "loss": 0.6379, "step": 23780 }, { "epoch": 0.6943154944381186, "grad_norm": 0.7847880996818687, "learning_rate": 1.6982968369829687e-06, "loss": 0.6805, "step": 23781 }, { "epoch": 0.6943446906659659, "grad_norm": 0.6942727777261994, "learning_rate": 1.6981346309813465e-06, "loss": 0.606, "step": 23782 }, { "epoch": 0.6943738868938133, "grad_norm": 0.7288887680486048, "learning_rate": 1.6979724249797245e-06, "loss": 0.6452, "step": 23783 }, { "epoch": 0.6944030831216607, "grad_norm": 0.7228504176524819, "learning_rate": 1.6978102189781023e-06, "loss": 0.6208, "step": 23784 }, { "epoch": 0.694432279349508, "grad_norm": 0.910047233121797, "learning_rate": 1.6976480129764803e-06, "loss": 0.7564, "step": 23785 }, { "epoch": 0.6944614755773554, "grad_norm": 0.7095074086821326, "learning_rate": 1.697485806974858e-06, "loss": 0.6266, "step": 23786 }, { "epoch": 0.6944906718052027, "grad_norm": 0.7776356406660433, "learning_rate": 1.697323600973236e-06, "loss": 0.6377, "step": 23787 }, { "epoch": 0.6945198680330501, "grad_norm": 0.7566383971199093, "learning_rate": 1.6971613949716139e-06, "loss": 0.6453, "step": 23788 }, { "epoch": 0.6945490642608975, "grad_norm": 0.7569355831449834, "learning_rate": 1.696999188969992e-06, "loss": 0.7312, "step": 23789 }, { "epoch": 0.6945782604887448, "grad_norm": 0.7657101510279272, "learning_rate": 1.69683698296837e-06, "loss": 0.6247, "step": 23790 }, { "epoch": 0.6946074567165922, "grad_norm": 0.6939007354705983, "learning_rate": 1.696674776966748e-06, "loss": 0.6095, "step": 23791 }, { "epoch": 0.6946366529444395, "grad_norm": 0.7235160101358042, "learning_rate": 1.696512570965126e-06, "loss": 0.6637, "step": 23792 }, { "epoch": 0.6946658491722869, "grad_norm": 0.7099446526867316, "learning_rate": 1.6963503649635037e-06, "loss": 0.657, "step": 23793 }, { "epoch": 0.6946950454001343, "grad_norm": 0.6843790309921503, "learning_rate": 1.6961881589618817e-06, "loss": 0.5697, "step": 23794 }, { "epoch": 0.6947242416279816, "grad_norm": 0.6458047014375569, "learning_rate": 1.6960259529602597e-06, "loss": 0.5365, "step": 23795 }, { "epoch": 0.694753437855829, "grad_norm": 0.6797964714565151, "learning_rate": 1.6958637469586375e-06, "loss": 0.5678, "step": 23796 }, { "epoch": 0.6947826340836764, "grad_norm": 0.7951224371067489, "learning_rate": 1.6957015409570155e-06, "loss": 0.696, "step": 23797 }, { "epoch": 0.6948118303115237, "grad_norm": 0.7018648313461661, "learning_rate": 1.6955393349553933e-06, "loss": 0.5842, "step": 23798 }, { "epoch": 0.6948410265393711, "grad_norm": 0.7156559607212146, "learning_rate": 1.6953771289537715e-06, "loss": 0.6605, "step": 23799 }, { "epoch": 0.6948702227672184, "grad_norm": 0.6519790913694475, "learning_rate": 1.6952149229521495e-06, "loss": 0.539, "step": 23800 }, { "epoch": 0.6948994189950658, "grad_norm": 0.6811605817483335, "learning_rate": 1.6950527169505273e-06, "loss": 0.5712, "step": 23801 }, { "epoch": 0.6949286152229132, "grad_norm": 0.7556358149460158, "learning_rate": 1.6948905109489053e-06, "loss": 0.7219, "step": 23802 }, { "epoch": 0.6949578114507605, "grad_norm": 0.7801548034194331, "learning_rate": 1.6947283049472831e-06, "loss": 0.6538, "step": 23803 }, { "epoch": 0.6949870076786079, "grad_norm": 0.6776339255659102, "learning_rate": 1.6945660989456611e-06, "loss": 0.573, "step": 23804 }, { "epoch": 0.6950162039064552, "grad_norm": 0.6688097451422907, "learning_rate": 1.694403892944039e-06, "loss": 0.5288, "step": 23805 }, { "epoch": 0.6950454001343026, "grad_norm": 0.7630850785822931, "learning_rate": 1.694241686942417e-06, "loss": 0.7044, "step": 23806 }, { "epoch": 0.69507459636215, "grad_norm": 0.7034439727661365, "learning_rate": 1.6940794809407947e-06, "loss": 0.6497, "step": 23807 }, { "epoch": 0.6951037925899973, "grad_norm": 0.7224910549878777, "learning_rate": 1.693917274939173e-06, "loss": 0.6498, "step": 23808 }, { "epoch": 0.6951329888178447, "grad_norm": 0.8061134015252268, "learning_rate": 1.693755068937551e-06, "loss": 0.7337, "step": 23809 }, { "epoch": 0.695162185045692, "grad_norm": 0.7173586859005243, "learning_rate": 1.6935928629359288e-06, "loss": 0.6281, "step": 23810 }, { "epoch": 0.6951913812735394, "grad_norm": 0.7658061327105904, "learning_rate": 1.6934306569343068e-06, "loss": 0.6948, "step": 23811 }, { "epoch": 0.6952205775013868, "grad_norm": 0.6348671091516359, "learning_rate": 1.6932684509326846e-06, "loss": 0.476, "step": 23812 }, { "epoch": 0.6952497737292341, "grad_norm": 0.729605849667471, "learning_rate": 1.6931062449310626e-06, "loss": 0.6304, "step": 23813 }, { "epoch": 0.6952789699570815, "grad_norm": 0.762518537286062, "learning_rate": 1.6929440389294406e-06, "loss": 0.697, "step": 23814 }, { "epoch": 0.6953081661849289, "grad_norm": 0.7352809919529621, "learning_rate": 1.6927818329278184e-06, "loss": 0.7016, "step": 23815 }, { "epoch": 0.6953373624127762, "grad_norm": 0.7120951004895435, "learning_rate": 1.6926196269261964e-06, "loss": 0.6306, "step": 23816 }, { "epoch": 0.6953665586406236, "grad_norm": 0.7795631701496566, "learning_rate": 1.6924574209245742e-06, "loss": 0.7673, "step": 23817 }, { "epoch": 0.6953957548684709, "grad_norm": 0.7767972584950525, "learning_rate": 1.6922952149229524e-06, "loss": 0.7071, "step": 23818 }, { "epoch": 0.6954249510963184, "grad_norm": 0.7227471282809684, "learning_rate": 1.6921330089213304e-06, "loss": 0.6178, "step": 23819 }, { "epoch": 0.6954541473241658, "grad_norm": 0.7560399087479522, "learning_rate": 1.6919708029197082e-06, "loss": 0.668, "step": 23820 }, { "epoch": 0.6954833435520131, "grad_norm": 0.7418334112948046, "learning_rate": 1.6918085969180862e-06, "loss": 0.5969, "step": 23821 }, { "epoch": 0.6955125397798605, "grad_norm": 0.6791379749552221, "learning_rate": 1.691646390916464e-06, "loss": 0.5759, "step": 23822 }, { "epoch": 0.6955417360077079, "grad_norm": 0.7292778655398233, "learning_rate": 1.691484184914842e-06, "loss": 0.6422, "step": 23823 }, { "epoch": 0.6955709322355552, "grad_norm": 0.8014822556550568, "learning_rate": 1.6913219789132198e-06, "loss": 0.7238, "step": 23824 }, { "epoch": 0.6956001284634026, "grad_norm": 0.8184262500577286, "learning_rate": 1.6911597729115978e-06, "loss": 0.6527, "step": 23825 }, { "epoch": 0.6956293246912499, "grad_norm": 0.736225366399509, "learning_rate": 1.6909975669099756e-06, "loss": 0.6231, "step": 23826 }, { "epoch": 0.6956585209190973, "grad_norm": 0.7425108991453176, "learning_rate": 1.6908353609083538e-06, "loss": 0.649, "step": 23827 }, { "epoch": 0.6956877171469447, "grad_norm": 0.7285729307901213, "learning_rate": 1.6906731549067318e-06, "loss": 0.6258, "step": 23828 }, { "epoch": 0.695716913374792, "grad_norm": 0.7088609424052486, "learning_rate": 1.6905109489051096e-06, "loss": 0.6199, "step": 23829 }, { "epoch": 0.6957461096026394, "grad_norm": 0.7390257193893682, "learning_rate": 1.6903487429034876e-06, "loss": 0.5885, "step": 23830 }, { "epoch": 0.6957753058304867, "grad_norm": 0.780296681117317, "learning_rate": 1.6901865369018654e-06, "loss": 0.7118, "step": 23831 }, { "epoch": 0.6958045020583341, "grad_norm": 0.7211053055682282, "learning_rate": 1.6900243309002434e-06, "loss": 0.5982, "step": 23832 }, { "epoch": 0.6958336982861815, "grad_norm": 0.7391247200386765, "learning_rate": 1.6898621248986214e-06, "loss": 0.656, "step": 23833 }, { "epoch": 0.6958628945140288, "grad_norm": 0.6256251406253637, "learning_rate": 1.6896999188969992e-06, "loss": 0.4861, "step": 23834 }, { "epoch": 0.6958920907418762, "grad_norm": 0.7413421073707646, "learning_rate": 1.6895377128953772e-06, "loss": 0.6991, "step": 23835 }, { "epoch": 0.6959212869697236, "grad_norm": 0.7286113399582916, "learning_rate": 1.689375506893755e-06, "loss": 0.6495, "step": 23836 }, { "epoch": 0.6959504831975709, "grad_norm": 0.6932785082709791, "learning_rate": 1.6892133008921332e-06, "loss": 0.5897, "step": 23837 }, { "epoch": 0.6959796794254183, "grad_norm": 0.6933027407366202, "learning_rate": 1.6890510948905113e-06, "loss": 0.6006, "step": 23838 }, { "epoch": 0.6960088756532656, "grad_norm": 0.7299231415064561, "learning_rate": 1.688888888888889e-06, "loss": 0.6456, "step": 23839 }, { "epoch": 0.696038071881113, "grad_norm": 0.760892484281195, "learning_rate": 1.688726682887267e-06, "loss": 0.6731, "step": 23840 }, { "epoch": 0.6960672681089604, "grad_norm": 0.6725546579181131, "learning_rate": 1.6885644768856448e-06, "loss": 0.6179, "step": 23841 }, { "epoch": 0.6960964643368077, "grad_norm": 0.7294839684142221, "learning_rate": 1.6884022708840229e-06, "loss": 0.6409, "step": 23842 }, { "epoch": 0.6961256605646551, "grad_norm": 0.7157560669941403, "learning_rate": 1.6882400648824007e-06, "loss": 0.6519, "step": 23843 }, { "epoch": 0.6961548567925024, "grad_norm": 0.7260180762929725, "learning_rate": 1.6880778588807787e-06, "loss": 0.6816, "step": 23844 }, { "epoch": 0.6961840530203498, "grad_norm": 0.6601326289784485, "learning_rate": 1.6879156528791565e-06, "loss": 0.5674, "step": 23845 }, { "epoch": 0.6962132492481972, "grad_norm": 0.6822456457183222, "learning_rate": 1.6877534468775347e-06, "loss": 0.5746, "step": 23846 }, { "epoch": 0.6962424454760445, "grad_norm": 0.7928925480556186, "learning_rate": 1.6875912408759127e-06, "loss": 0.7242, "step": 23847 }, { "epoch": 0.6962716417038919, "grad_norm": 0.6879030637226552, "learning_rate": 1.6874290348742905e-06, "loss": 0.6191, "step": 23848 }, { "epoch": 0.6963008379317392, "grad_norm": 0.7070613149681464, "learning_rate": 1.6872668288726685e-06, "loss": 0.5805, "step": 23849 }, { "epoch": 0.6963300341595866, "grad_norm": 0.7663387728891463, "learning_rate": 1.6871046228710463e-06, "loss": 0.6661, "step": 23850 }, { "epoch": 0.696359230387434, "grad_norm": 0.773323637022514, "learning_rate": 1.6869424168694243e-06, "loss": 0.5646, "step": 23851 }, { "epoch": 0.6963884266152813, "grad_norm": 0.7705929875133237, "learning_rate": 1.6867802108678023e-06, "loss": 0.6696, "step": 23852 }, { "epoch": 0.6964176228431287, "grad_norm": 0.6890192091491995, "learning_rate": 1.68661800486618e-06, "loss": 0.6032, "step": 23853 }, { "epoch": 0.696446819070976, "grad_norm": 0.6759984707232671, "learning_rate": 1.686455798864558e-06, "loss": 0.6001, "step": 23854 }, { "epoch": 0.6964760152988234, "grad_norm": 0.6872193564746991, "learning_rate": 1.686293592862936e-06, "loss": 0.6143, "step": 23855 }, { "epoch": 0.6965052115266708, "grad_norm": 0.8066244155656338, "learning_rate": 1.686131386861314e-06, "loss": 0.7587, "step": 23856 }, { "epoch": 0.6965344077545181, "grad_norm": 0.6728464932703917, "learning_rate": 1.6859691808596921e-06, "loss": 0.5943, "step": 23857 }, { "epoch": 0.6965636039823655, "grad_norm": 0.6731054664625474, "learning_rate": 1.68580697485807e-06, "loss": 0.5804, "step": 23858 }, { "epoch": 0.6965928002102129, "grad_norm": 0.7445089160510928, "learning_rate": 1.685644768856448e-06, "loss": 0.698, "step": 23859 }, { "epoch": 0.6966219964380602, "grad_norm": 0.7344575241035133, "learning_rate": 1.6854825628548257e-06, "loss": 0.661, "step": 23860 }, { "epoch": 0.6966511926659076, "grad_norm": 0.7409880626528443, "learning_rate": 1.6853203568532037e-06, "loss": 0.6762, "step": 23861 }, { "epoch": 0.6966803888937549, "grad_norm": 0.6557440003883253, "learning_rate": 1.6851581508515815e-06, "loss": 0.5466, "step": 23862 }, { "epoch": 0.6967095851216023, "grad_norm": 0.7283357272423152, "learning_rate": 1.6849959448499595e-06, "loss": 0.6395, "step": 23863 }, { "epoch": 0.6967387813494497, "grad_norm": 0.751437551828517, "learning_rate": 1.6848337388483373e-06, "loss": 0.6501, "step": 23864 }, { "epoch": 0.696767977577297, "grad_norm": 0.8132403289004899, "learning_rate": 1.6846715328467155e-06, "loss": 0.6882, "step": 23865 }, { "epoch": 0.6967971738051444, "grad_norm": 0.747202254275926, "learning_rate": 1.6845093268450935e-06, "loss": 0.6238, "step": 23866 }, { "epoch": 0.6968263700329917, "grad_norm": 0.6873651885170784, "learning_rate": 1.6843471208434713e-06, "loss": 0.5721, "step": 23867 }, { "epoch": 0.6968555662608391, "grad_norm": 0.7539290728973119, "learning_rate": 1.6841849148418493e-06, "loss": 0.686, "step": 23868 }, { "epoch": 0.6968847624886865, "grad_norm": 0.7163189311037417, "learning_rate": 1.6840227088402271e-06, "loss": 0.5861, "step": 23869 }, { "epoch": 0.6969139587165338, "grad_norm": 0.7778441602868719, "learning_rate": 1.6838605028386051e-06, "loss": 0.6222, "step": 23870 }, { "epoch": 0.6969431549443812, "grad_norm": 0.7531364494046981, "learning_rate": 1.6836982968369831e-06, "loss": 0.7264, "step": 23871 }, { "epoch": 0.6969723511722286, "grad_norm": 0.7720503071390552, "learning_rate": 1.683536090835361e-06, "loss": 0.6376, "step": 23872 }, { "epoch": 0.6970015474000759, "grad_norm": 0.7003281021771693, "learning_rate": 1.683373884833739e-06, "loss": 0.6265, "step": 23873 }, { "epoch": 0.6970307436279233, "grad_norm": 0.7062512230094035, "learning_rate": 1.683211678832117e-06, "loss": 0.6212, "step": 23874 }, { "epoch": 0.6970599398557706, "grad_norm": 0.7304733830476241, "learning_rate": 1.683049472830495e-06, "loss": 0.6505, "step": 23875 }, { "epoch": 0.697089136083618, "grad_norm": 0.708271179657132, "learning_rate": 1.682887266828873e-06, "loss": 0.6085, "step": 23876 }, { "epoch": 0.6971183323114654, "grad_norm": 0.6648210822717525, "learning_rate": 1.6827250608272508e-06, "loss": 0.5682, "step": 23877 }, { "epoch": 0.6971475285393127, "grad_norm": 0.7336904697573571, "learning_rate": 1.6825628548256288e-06, "loss": 0.6171, "step": 23878 }, { "epoch": 0.6971767247671601, "grad_norm": 0.649188468006312, "learning_rate": 1.6824006488240066e-06, "loss": 0.5685, "step": 23879 }, { "epoch": 0.6972059209950074, "grad_norm": 0.7363703010131595, "learning_rate": 1.6822384428223846e-06, "loss": 0.6931, "step": 23880 }, { "epoch": 0.6972351172228548, "grad_norm": 0.6891979579501561, "learning_rate": 1.6820762368207624e-06, "loss": 0.5974, "step": 23881 }, { "epoch": 0.6972643134507022, "grad_norm": 0.6787224656654622, "learning_rate": 1.6819140308191404e-06, "loss": 0.5321, "step": 23882 }, { "epoch": 0.6972935096785495, "grad_norm": 0.7020906091280017, "learning_rate": 1.6817518248175182e-06, "loss": 0.6297, "step": 23883 }, { "epoch": 0.6973227059063969, "grad_norm": 0.6829562424417696, "learning_rate": 1.6815896188158964e-06, "loss": 0.5374, "step": 23884 }, { "epoch": 0.6973519021342443, "grad_norm": 0.714936130812089, "learning_rate": 1.6814274128142744e-06, "loss": 0.5911, "step": 23885 }, { "epoch": 0.6973810983620916, "grad_norm": 0.6413735298167038, "learning_rate": 1.6812652068126522e-06, "loss": 0.5255, "step": 23886 }, { "epoch": 0.697410294589939, "grad_norm": 0.7037575379053502, "learning_rate": 1.6811030008110302e-06, "loss": 0.5769, "step": 23887 }, { "epoch": 0.6974394908177863, "grad_norm": 0.7093325321034251, "learning_rate": 1.680940794809408e-06, "loss": 0.6262, "step": 23888 }, { "epoch": 0.6974686870456337, "grad_norm": 0.7438569308307548, "learning_rate": 1.680778588807786e-06, "loss": 0.6405, "step": 23889 }, { "epoch": 0.6974978832734811, "grad_norm": 0.7936046493076016, "learning_rate": 1.680616382806164e-06, "loss": 0.6099, "step": 23890 }, { "epoch": 0.6975270795013284, "grad_norm": 0.703712124869027, "learning_rate": 1.6804541768045418e-06, "loss": 0.6197, "step": 23891 }, { "epoch": 0.6975562757291758, "grad_norm": 0.7358054387691683, "learning_rate": 1.6802919708029198e-06, "loss": 0.6492, "step": 23892 }, { "epoch": 0.6975854719570231, "grad_norm": 0.7452376536246746, "learning_rate": 1.6801297648012978e-06, "loss": 0.6898, "step": 23893 }, { "epoch": 0.6976146681848705, "grad_norm": 0.707288346492461, "learning_rate": 1.6799675587996758e-06, "loss": 0.6223, "step": 23894 }, { "epoch": 0.6976438644127179, "grad_norm": 0.8389526257518757, "learning_rate": 1.6798053527980538e-06, "loss": 0.6276, "step": 23895 }, { "epoch": 0.6976730606405652, "grad_norm": 0.710880572932162, "learning_rate": 1.6796431467964316e-06, "loss": 0.637, "step": 23896 }, { "epoch": 0.6977022568684126, "grad_norm": 0.7210005026247935, "learning_rate": 1.6794809407948096e-06, "loss": 0.6619, "step": 23897 }, { "epoch": 0.69773145309626, "grad_norm": 0.6881850172906528, "learning_rate": 1.6793187347931874e-06, "loss": 0.6175, "step": 23898 }, { "epoch": 0.6977606493241073, "grad_norm": 0.7522893317836227, "learning_rate": 1.6791565287915654e-06, "loss": 0.686, "step": 23899 }, { "epoch": 0.6977898455519547, "grad_norm": 0.734635308665004, "learning_rate": 1.6789943227899432e-06, "loss": 0.6356, "step": 23900 }, { "epoch": 0.697819041779802, "grad_norm": 0.7709440767788815, "learning_rate": 1.6788321167883212e-06, "loss": 0.6826, "step": 23901 }, { "epoch": 0.6978482380076494, "grad_norm": 0.7584591687098603, "learning_rate": 1.678669910786699e-06, "loss": 0.6468, "step": 23902 }, { "epoch": 0.6978774342354968, "grad_norm": 0.7721149822206961, "learning_rate": 1.6785077047850772e-06, "loss": 0.7108, "step": 23903 }, { "epoch": 0.6979066304633441, "grad_norm": 0.7513100277147094, "learning_rate": 1.6783454987834553e-06, "loss": 0.6785, "step": 23904 }, { "epoch": 0.6979358266911915, "grad_norm": 0.7355599561309838, "learning_rate": 1.678183292781833e-06, "loss": 0.6595, "step": 23905 }, { "epoch": 0.6979650229190388, "grad_norm": 0.7071613611463141, "learning_rate": 1.678021086780211e-06, "loss": 0.5243, "step": 23906 }, { "epoch": 0.6979942191468862, "grad_norm": 0.7152520155911678, "learning_rate": 1.6778588807785889e-06, "loss": 0.6248, "step": 23907 }, { "epoch": 0.6980234153747336, "grad_norm": 0.7280228512729349, "learning_rate": 1.6776966747769669e-06, "loss": 0.6748, "step": 23908 }, { "epoch": 0.6980526116025809, "grad_norm": 0.7375885190942215, "learning_rate": 1.6775344687753449e-06, "loss": 0.6613, "step": 23909 }, { "epoch": 0.6980818078304283, "grad_norm": 0.6525235182499917, "learning_rate": 1.6773722627737227e-06, "loss": 0.551, "step": 23910 }, { "epoch": 0.6981110040582756, "grad_norm": 0.6950433856686121, "learning_rate": 1.6772100567721007e-06, "loss": 0.6133, "step": 23911 }, { "epoch": 0.698140200286123, "grad_norm": 0.76464289692011, "learning_rate": 1.6770478507704787e-06, "loss": 0.7026, "step": 23912 }, { "epoch": 0.6981693965139704, "grad_norm": 0.706039886855263, "learning_rate": 1.6768856447688567e-06, "loss": 0.582, "step": 23913 }, { "epoch": 0.6981985927418177, "grad_norm": 0.7026323164259459, "learning_rate": 1.6767234387672347e-06, "loss": 0.5781, "step": 23914 }, { "epoch": 0.6982277889696651, "grad_norm": 0.7360011185365765, "learning_rate": 1.6765612327656125e-06, "loss": 0.6364, "step": 23915 }, { "epoch": 0.6982569851975124, "grad_norm": 0.7076121491224188, "learning_rate": 1.6763990267639905e-06, "loss": 0.6005, "step": 23916 }, { "epoch": 0.6982861814253598, "grad_norm": 0.7435606901049225, "learning_rate": 1.6762368207623683e-06, "loss": 0.6159, "step": 23917 }, { "epoch": 0.6983153776532072, "grad_norm": 0.7631014908433297, "learning_rate": 1.6760746147607463e-06, "loss": 0.6494, "step": 23918 }, { "epoch": 0.6983445738810545, "grad_norm": 0.7289076870086507, "learning_rate": 1.675912408759124e-06, "loss": 0.6693, "step": 23919 }, { "epoch": 0.6983737701089019, "grad_norm": 0.6734377987851022, "learning_rate": 1.675750202757502e-06, "loss": 0.5979, "step": 23920 }, { "epoch": 0.6984029663367493, "grad_norm": 0.7540159454474883, "learning_rate": 1.6755879967558803e-06, "loss": 0.725, "step": 23921 }, { "epoch": 0.6984321625645966, "grad_norm": 0.6894660405382841, "learning_rate": 1.6754257907542581e-06, "loss": 0.5412, "step": 23922 }, { "epoch": 0.698461358792444, "grad_norm": 0.7445434187222637, "learning_rate": 1.6752635847526361e-06, "loss": 0.646, "step": 23923 }, { "epoch": 0.6984905550202913, "grad_norm": 0.707033341845524, "learning_rate": 1.675101378751014e-06, "loss": 0.6302, "step": 23924 }, { "epoch": 0.6985197512481387, "grad_norm": 0.7165008281376802, "learning_rate": 1.674939172749392e-06, "loss": 0.6494, "step": 23925 }, { "epoch": 0.6985489474759861, "grad_norm": 0.7327359472786893, "learning_rate": 1.6747769667477697e-06, "loss": 0.6487, "step": 23926 }, { "epoch": 0.6985781437038334, "grad_norm": 0.6963126434718006, "learning_rate": 1.6746147607461477e-06, "loss": 0.5871, "step": 23927 }, { "epoch": 0.6986073399316808, "grad_norm": 0.8599638037267036, "learning_rate": 1.6744525547445255e-06, "loss": 0.6767, "step": 23928 }, { "epoch": 0.6986365361595281, "grad_norm": 0.7185591320940514, "learning_rate": 1.6742903487429035e-06, "loss": 0.6059, "step": 23929 }, { "epoch": 0.6986657323873755, "grad_norm": 0.7010366315288942, "learning_rate": 1.6741281427412815e-06, "loss": 0.6002, "step": 23930 }, { "epoch": 0.6986949286152229, "grad_norm": 0.8009275443055098, "learning_rate": 1.6739659367396595e-06, "loss": 0.6367, "step": 23931 }, { "epoch": 0.6987241248430702, "grad_norm": 0.7204960692188669, "learning_rate": 1.6738037307380375e-06, "loss": 0.602, "step": 23932 }, { "epoch": 0.6987533210709176, "grad_norm": 0.7087204603472671, "learning_rate": 1.6736415247364155e-06, "loss": 0.5742, "step": 23933 }, { "epoch": 0.698782517298765, "grad_norm": 0.7695205237835673, "learning_rate": 1.6734793187347933e-06, "loss": 0.6802, "step": 23934 }, { "epoch": 0.6988117135266123, "grad_norm": 0.6460781681887059, "learning_rate": 1.6733171127331713e-06, "loss": 0.5244, "step": 23935 }, { "epoch": 0.6988409097544597, "grad_norm": 0.7453632375351699, "learning_rate": 1.6731549067315491e-06, "loss": 0.6579, "step": 23936 }, { "epoch": 0.698870105982307, "grad_norm": 0.7476170796050535, "learning_rate": 1.6729927007299271e-06, "loss": 0.6812, "step": 23937 }, { "epoch": 0.6988993022101544, "grad_norm": 0.712530536019891, "learning_rate": 1.672830494728305e-06, "loss": 0.6371, "step": 23938 }, { "epoch": 0.6989284984380018, "grad_norm": 0.712673983661924, "learning_rate": 1.672668288726683e-06, "loss": 0.6376, "step": 23939 }, { "epoch": 0.6989576946658492, "grad_norm": 0.8085139712901458, "learning_rate": 1.6725060827250612e-06, "loss": 0.674, "step": 23940 }, { "epoch": 0.6989868908936966, "grad_norm": 0.7904782436527215, "learning_rate": 1.672343876723439e-06, "loss": 0.7222, "step": 23941 }, { "epoch": 0.699016087121544, "grad_norm": 0.6966784175543571, "learning_rate": 1.672181670721817e-06, "loss": 0.6086, "step": 23942 }, { "epoch": 0.6990452833493913, "grad_norm": 0.8242298442208542, "learning_rate": 1.6720194647201948e-06, "loss": 0.6043, "step": 23943 }, { "epoch": 0.6990744795772387, "grad_norm": 0.7722970535115906, "learning_rate": 1.6718572587185728e-06, "loss": 0.6833, "step": 23944 }, { "epoch": 0.699103675805086, "grad_norm": 0.7273185576873551, "learning_rate": 1.6716950527169506e-06, "loss": 0.6761, "step": 23945 }, { "epoch": 0.6991328720329334, "grad_norm": 0.6897101555542978, "learning_rate": 1.6715328467153286e-06, "loss": 0.5936, "step": 23946 }, { "epoch": 0.6991620682607808, "grad_norm": 0.780136056955711, "learning_rate": 1.6713706407137064e-06, "loss": 0.7364, "step": 23947 }, { "epoch": 0.6991912644886281, "grad_norm": 0.7069403082218845, "learning_rate": 1.6712084347120844e-06, "loss": 0.6145, "step": 23948 }, { "epoch": 0.6992204607164755, "grad_norm": 0.7252032482897416, "learning_rate": 1.6710462287104624e-06, "loss": 0.6757, "step": 23949 }, { "epoch": 0.6992496569443228, "grad_norm": 0.7132834199577492, "learning_rate": 1.6708840227088404e-06, "loss": 0.61, "step": 23950 }, { "epoch": 0.6992788531721702, "grad_norm": 0.6723676561464954, "learning_rate": 1.6707218167072184e-06, "loss": 0.5936, "step": 23951 }, { "epoch": 0.6993080494000176, "grad_norm": 0.6910625913683537, "learning_rate": 1.6705596107055964e-06, "loss": 0.6135, "step": 23952 }, { "epoch": 0.6993372456278649, "grad_norm": 0.7317658168277641, "learning_rate": 1.6703974047039742e-06, "loss": 0.5758, "step": 23953 }, { "epoch": 0.6993664418557123, "grad_norm": 0.7290554757819403, "learning_rate": 1.6702351987023522e-06, "loss": 0.5763, "step": 23954 }, { "epoch": 0.6993956380835596, "grad_norm": 0.6986767535555579, "learning_rate": 1.67007299270073e-06, "loss": 0.5899, "step": 23955 }, { "epoch": 0.699424834311407, "grad_norm": 0.6779887642797839, "learning_rate": 1.669910786699108e-06, "loss": 0.5472, "step": 23956 }, { "epoch": 0.6994540305392544, "grad_norm": 0.7025552877114812, "learning_rate": 1.6697485806974858e-06, "loss": 0.6311, "step": 23957 }, { "epoch": 0.6994832267671017, "grad_norm": 0.7204126614635795, "learning_rate": 1.6695863746958638e-06, "loss": 0.6254, "step": 23958 }, { "epoch": 0.6995124229949491, "grad_norm": 0.9122536795784608, "learning_rate": 1.669424168694242e-06, "loss": 0.773, "step": 23959 }, { "epoch": 0.6995416192227965, "grad_norm": 0.7452261508898382, "learning_rate": 1.6692619626926198e-06, "loss": 0.6138, "step": 23960 }, { "epoch": 0.6995708154506438, "grad_norm": 0.7879155989017675, "learning_rate": 1.6690997566909978e-06, "loss": 0.729, "step": 23961 }, { "epoch": 0.6996000116784912, "grad_norm": 0.7064780096056534, "learning_rate": 1.6689375506893756e-06, "loss": 0.645, "step": 23962 }, { "epoch": 0.6996292079063385, "grad_norm": 0.6824910830101093, "learning_rate": 1.6687753446877536e-06, "loss": 0.5395, "step": 23963 }, { "epoch": 0.6996584041341859, "grad_norm": 0.7063429325242899, "learning_rate": 1.6686131386861314e-06, "loss": 0.5509, "step": 23964 }, { "epoch": 0.6996876003620333, "grad_norm": 0.7606327687871285, "learning_rate": 1.6684509326845094e-06, "loss": 0.6806, "step": 23965 }, { "epoch": 0.6997167965898806, "grad_norm": 0.7634304227409602, "learning_rate": 1.6682887266828872e-06, "loss": 0.7424, "step": 23966 }, { "epoch": 0.699745992817728, "grad_norm": 0.7382752332950976, "learning_rate": 1.6681265206812652e-06, "loss": 0.6553, "step": 23967 }, { "epoch": 0.6997751890455753, "grad_norm": 0.6607426123294237, "learning_rate": 1.6679643146796432e-06, "loss": 0.5797, "step": 23968 }, { "epoch": 0.6998043852734227, "grad_norm": 0.751006880119801, "learning_rate": 1.6678021086780212e-06, "loss": 0.6664, "step": 23969 }, { "epoch": 0.6998335815012701, "grad_norm": 0.7449308161995473, "learning_rate": 1.6676399026763993e-06, "loss": 0.6326, "step": 23970 }, { "epoch": 0.6998627777291174, "grad_norm": 0.7948486037521966, "learning_rate": 1.6674776966747773e-06, "loss": 0.7126, "step": 23971 }, { "epoch": 0.6998919739569648, "grad_norm": 0.7363848294452645, "learning_rate": 1.667315490673155e-06, "loss": 0.6403, "step": 23972 }, { "epoch": 0.6999211701848121, "grad_norm": 0.7096305719151187, "learning_rate": 1.667153284671533e-06, "loss": 0.6139, "step": 23973 }, { "epoch": 0.6999503664126595, "grad_norm": 0.7549724567039646, "learning_rate": 1.6669910786699109e-06, "loss": 0.6892, "step": 23974 }, { "epoch": 0.6999795626405069, "grad_norm": 0.7331575493390802, "learning_rate": 1.6668288726682889e-06, "loss": 0.6944, "step": 23975 }, { "epoch": 0.7000087588683542, "grad_norm": 0.6851405755863984, "learning_rate": 1.6666666666666667e-06, "loss": 0.5625, "step": 23976 }, { "epoch": 0.7000379550962016, "grad_norm": 0.7206218197883388, "learning_rate": 1.6665044606650447e-06, "loss": 0.6195, "step": 23977 }, { "epoch": 0.700067151324049, "grad_norm": 0.7034151327521407, "learning_rate": 1.6663422546634229e-06, "loss": 0.5702, "step": 23978 }, { "epoch": 0.7000963475518963, "grad_norm": 0.7302786168160632, "learning_rate": 1.6661800486618007e-06, "loss": 0.6283, "step": 23979 }, { "epoch": 0.7001255437797437, "grad_norm": 0.7203277982876428, "learning_rate": 1.6660178426601787e-06, "loss": 0.6504, "step": 23980 }, { "epoch": 0.700154740007591, "grad_norm": 0.790888489639221, "learning_rate": 1.6658556366585565e-06, "loss": 0.6463, "step": 23981 }, { "epoch": 0.7001839362354384, "grad_norm": 0.7602029029905584, "learning_rate": 1.6656934306569345e-06, "loss": 0.6765, "step": 23982 }, { "epoch": 0.7002131324632858, "grad_norm": 0.6773719030691967, "learning_rate": 1.6655312246553123e-06, "loss": 0.6087, "step": 23983 }, { "epoch": 0.7002423286911331, "grad_norm": 0.7864102251154216, "learning_rate": 1.6653690186536903e-06, "loss": 0.6241, "step": 23984 }, { "epoch": 0.7002715249189805, "grad_norm": 0.7276987701803316, "learning_rate": 1.665206812652068e-06, "loss": 0.6916, "step": 23985 }, { "epoch": 0.7003007211468278, "grad_norm": 0.7051259452964604, "learning_rate": 1.665044606650446e-06, "loss": 0.6258, "step": 23986 }, { "epoch": 0.7003299173746752, "grad_norm": 0.7041500365783968, "learning_rate": 1.664882400648824e-06, "loss": 0.6016, "step": 23987 }, { "epoch": 0.7003591136025226, "grad_norm": 0.700372334739508, "learning_rate": 1.6647201946472021e-06, "loss": 0.5731, "step": 23988 }, { "epoch": 0.7003883098303699, "grad_norm": 0.8128488959321565, "learning_rate": 1.6645579886455801e-06, "loss": 0.6704, "step": 23989 }, { "epoch": 0.7004175060582173, "grad_norm": 0.6812258079869769, "learning_rate": 1.664395782643958e-06, "loss": 0.5705, "step": 23990 }, { "epoch": 0.7004467022860646, "grad_norm": 0.7408006634533233, "learning_rate": 1.664233576642336e-06, "loss": 0.6037, "step": 23991 }, { "epoch": 0.700475898513912, "grad_norm": 0.7358284499623412, "learning_rate": 1.664071370640714e-06, "loss": 0.6532, "step": 23992 }, { "epoch": 0.7005050947417594, "grad_norm": 0.7172372725050813, "learning_rate": 1.6639091646390917e-06, "loss": 0.6404, "step": 23993 }, { "epoch": 0.7005342909696067, "grad_norm": 0.6798526668650238, "learning_rate": 1.6637469586374697e-06, "loss": 0.5638, "step": 23994 }, { "epoch": 0.7005634871974541, "grad_norm": 0.7453877140385795, "learning_rate": 1.6635847526358475e-06, "loss": 0.6754, "step": 23995 }, { "epoch": 0.7005926834253015, "grad_norm": 0.7397677633825779, "learning_rate": 1.6634225466342255e-06, "loss": 0.6819, "step": 23996 }, { "epoch": 0.7006218796531488, "grad_norm": 0.7440161651588096, "learning_rate": 1.6632603406326037e-06, "loss": 0.6729, "step": 23997 }, { "epoch": 0.7006510758809962, "grad_norm": 0.7335043710738252, "learning_rate": 1.6630981346309815e-06, "loss": 0.6735, "step": 23998 }, { "epoch": 0.7006802721088435, "grad_norm": 0.7127005070974239, "learning_rate": 1.6629359286293595e-06, "loss": 0.639, "step": 23999 }, { "epoch": 0.7007094683366909, "grad_norm": 0.7245964129839693, "learning_rate": 1.6627737226277373e-06, "loss": 0.6501, "step": 24000 }, { "epoch": 0.7007386645645383, "grad_norm": 0.7911896796080619, "learning_rate": 1.6626115166261153e-06, "loss": 0.6502, "step": 24001 }, { "epoch": 0.7007678607923856, "grad_norm": 0.7631342351304851, "learning_rate": 1.6624493106244931e-06, "loss": 0.697, "step": 24002 }, { "epoch": 0.700797057020233, "grad_norm": 0.6907962983590384, "learning_rate": 1.6622871046228712e-06, "loss": 0.5362, "step": 24003 }, { "epoch": 0.7008262532480803, "grad_norm": 0.7280035467841025, "learning_rate": 1.662124898621249e-06, "loss": 0.679, "step": 24004 }, { "epoch": 0.7008554494759277, "grad_norm": 0.6906545131976717, "learning_rate": 1.661962692619627e-06, "loss": 0.5659, "step": 24005 }, { "epoch": 0.7008846457037751, "grad_norm": 0.764882735607368, "learning_rate": 1.6618004866180052e-06, "loss": 0.7242, "step": 24006 }, { "epoch": 0.7009138419316224, "grad_norm": 0.7182297055906575, "learning_rate": 1.661638280616383e-06, "loss": 0.6413, "step": 24007 }, { "epoch": 0.7009430381594698, "grad_norm": 0.6881876254270423, "learning_rate": 1.661476074614761e-06, "loss": 0.5948, "step": 24008 }, { "epoch": 0.7009722343873172, "grad_norm": 0.6979896334484799, "learning_rate": 1.6613138686131388e-06, "loss": 0.6341, "step": 24009 }, { "epoch": 0.7010014306151645, "grad_norm": 0.7423393598058522, "learning_rate": 1.6611516626115168e-06, "loss": 0.6431, "step": 24010 }, { "epoch": 0.7010306268430119, "grad_norm": 0.6913201597839008, "learning_rate": 1.6609894566098948e-06, "loss": 0.593, "step": 24011 }, { "epoch": 0.7010598230708592, "grad_norm": 0.7131837325264206, "learning_rate": 1.6608272506082726e-06, "loss": 0.6085, "step": 24012 }, { "epoch": 0.7010890192987066, "grad_norm": 0.8253280548278054, "learning_rate": 1.6606650446066506e-06, "loss": 0.7022, "step": 24013 }, { "epoch": 0.701118215526554, "grad_norm": 0.6937244794529253, "learning_rate": 1.6605028386050284e-06, "loss": 0.6124, "step": 24014 }, { "epoch": 0.7011474117544013, "grad_norm": 0.7384687896454679, "learning_rate": 1.6603406326034064e-06, "loss": 0.5607, "step": 24015 }, { "epoch": 0.7011766079822487, "grad_norm": 0.7229436442437875, "learning_rate": 1.6601784266017846e-06, "loss": 0.6686, "step": 24016 }, { "epoch": 0.701205804210096, "grad_norm": 0.735635082832004, "learning_rate": 1.6600162206001624e-06, "loss": 0.68, "step": 24017 }, { "epoch": 0.7012350004379434, "grad_norm": 0.710590349125342, "learning_rate": 1.6598540145985404e-06, "loss": 0.635, "step": 24018 }, { "epoch": 0.7012641966657908, "grad_norm": 0.7040741694127027, "learning_rate": 1.6596918085969182e-06, "loss": 0.6288, "step": 24019 }, { "epoch": 0.7012933928936381, "grad_norm": 0.7421312509346484, "learning_rate": 1.6595296025952962e-06, "loss": 0.6714, "step": 24020 }, { "epoch": 0.7013225891214855, "grad_norm": 0.6946623232603337, "learning_rate": 1.659367396593674e-06, "loss": 0.6121, "step": 24021 }, { "epoch": 0.7013517853493328, "grad_norm": 0.6732057222614851, "learning_rate": 1.659205190592052e-06, "loss": 0.5568, "step": 24022 }, { "epoch": 0.7013809815771802, "grad_norm": 0.7043128619448511, "learning_rate": 1.6590429845904298e-06, "loss": 0.5706, "step": 24023 }, { "epoch": 0.7014101778050276, "grad_norm": 0.7361924063829578, "learning_rate": 1.6588807785888078e-06, "loss": 0.7052, "step": 24024 }, { "epoch": 0.7014393740328749, "grad_norm": 0.6897717628007769, "learning_rate": 1.658718572587186e-06, "loss": 0.5425, "step": 24025 }, { "epoch": 0.7014685702607223, "grad_norm": 0.6508627023921031, "learning_rate": 1.6585563665855638e-06, "loss": 0.5344, "step": 24026 }, { "epoch": 0.7014977664885697, "grad_norm": 0.7373435145893831, "learning_rate": 1.6583941605839418e-06, "loss": 0.6377, "step": 24027 }, { "epoch": 0.701526962716417, "grad_norm": 0.6508582513048234, "learning_rate": 1.6582319545823196e-06, "loss": 0.5404, "step": 24028 }, { "epoch": 0.7015561589442644, "grad_norm": 0.7071381249007516, "learning_rate": 1.6580697485806976e-06, "loss": 0.644, "step": 24029 }, { "epoch": 0.7015853551721117, "grad_norm": 0.7282338391922001, "learning_rate": 1.6579075425790756e-06, "loss": 0.6901, "step": 24030 }, { "epoch": 0.7016145513999591, "grad_norm": 0.8187053323938466, "learning_rate": 1.6577453365774534e-06, "loss": 0.6704, "step": 24031 }, { "epoch": 0.7016437476278065, "grad_norm": 0.7134246445900452, "learning_rate": 1.6575831305758314e-06, "loss": 0.6063, "step": 24032 }, { "epoch": 0.7016729438556538, "grad_norm": 0.6893187130756933, "learning_rate": 1.6574209245742092e-06, "loss": 0.553, "step": 24033 }, { "epoch": 0.7017021400835012, "grad_norm": 0.7627586319129331, "learning_rate": 1.6572587185725872e-06, "loss": 0.6403, "step": 24034 }, { "epoch": 0.7017313363113485, "grad_norm": 0.6836911597945854, "learning_rate": 1.6570965125709655e-06, "loss": 0.6101, "step": 24035 }, { "epoch": 0.7017605325391959, "grad_norm": 0.7322438332075744, "learning_rate": 1.6569343065693433e-06, "loss": 0.6676, "step": 24036 }, { "epoch": 0.7017897287670433, "grad_norm": 0.7273015982699528, "learning_rate": 1.6567721005677213e-06, "loss": 0.5944, "step": 24037 }, { "epoch": 0.7018189249948906, "grad_norm": 0.7471795420024387, "learning_rate": 1.656609894566099e-06, "loss": 0.7019, "step": 24038 }, { "epoch": 0.701848121222738, "grad_norm": 0.7196322324655496, "learning_rate": 1.656447688564477e-06, "loss": 0.6019, "step": 24039 }, { "epoch": 0.7018773174505853, "grad_norm": 0.7111310010702545, "learning_rate": 1.6562854825628549e-06, "loss": 0.65, "step": 24040 }, { "epoch": 0.7019065136784327, "grad_norm": 0.8072465707039951, "learning_rate": 1.6561232765612329e-06, "loss": 0.6759, "step": 24041 }, { "epoch": 0.7019357099062801, "grad_norm": 0.799219555943447, "learning_rate": 1.6559610705596107e-06, "loss": 0.734, "step": 24042 }, { "epoch": 0.7019649061341274, "grad_norm": 0.8069855196102457, "learning_rate": 1.6557988645579887e-06, "loss": 0.7309, "step": 24043 }, { "epoch": 0.7019941023619748, "grad_norm": 0.6762686278026956, "learning_rate": 1.6556366585563669e-06, "loss": 0.6021, "step": 24044 }, { "epoch": 0.7020232985898222, "grad_norm": 0.6809831841103637, "learning_rate": 1.6554744525547447e-06, "loss": 0.5623, "step": 24045 }, { "epoch": 0.7020524948176695, "grad_norm": 0.7644707447051279, "learning_rate": 1.6553122465531227e-06, "loss": 0.7321, "step": 24046 }, { "epoch": 0.7020816910455169, "grad_norm": 0.7713114403106732, "learning_rate": 1.6551500405515005e-06, "loss": 0.7462, "step": 24047 }, { "epoch": 0.7021108872733642, "grad_norm": 0.6983005950776697, "learning_rate": 1.6549878345498785e-06, "loss": 0.6116, "step": 24048 }, { "epoch": 0.7021400835012116, "grad_norm": 0.6809288635936899, "learning_rate": 1.6548256285482565e-06, "loss": 0.5835, "step": 24049 }, { "epoch": 0.702169279729059, "grad_norm": 0.7000916746328614, "learning_rate": 1.6546634225466343e-06, "loss": 0.6091, "step": 24050 }, { "epoch": 0.7021984759569063, "grad_norm": 0.7513856809372136, "learning_rate": 1.6545012165450123e-06, "loss": 0.6848, "step": 24051 }, { "epoch": 0.7022276721847537, "grad_norm": 0.6975291751909757, "learning_rate": 1.65433901054339e-06, "loss": 0.5799, "step": 24052 }, { "epoch": 0.702256868412601, "grad_norm": 0.7042379710947702, "learning_rate": 1.654176804541768e-06, "loss": 0.5963, "step": 24053 }, { "epoch": 0.7022860646404484, "grad_norm": 0.7706394729789264, "learning_rate": 1.6540145985401463e-06, "loss": 0.6663, "step": 24054 }, { "epoch": 0.7023152608682958, "grad_norm": 0.7411608704471369, "learning_rate": 1.6538523925385241e-06, "loss": 0.6596, "step": 24055 }, { "epoch": 0.7023444570961431, "grad_norm": 0.7340179142284241, "learning_rate": 1.6536901865369021e-06, "loss": 0.6627, "step": 24056 }, { "epoch": 0.7023736533239905, "grad_norm": 0.6805739107666446, "learning_rate": 1.65352798053528e-06, "loss": 0.5779, "step": 24057 }, { "epoch": 0.7024028495518378, "grad_norm": 0.6754470902576925, "learning_rate": 1.653365774533658e-06, "loss": 0.589, "step": 24058 }, { "epoch": 0.7024320457796852, "grad_norm": 0.6764436033242307, "learning_rate": 1.6532035685320357e-06, "loss": 0.5745, "step": 24059 }, { "epoch": 0.7024612420075326, "grad_norm": 0.6809715256986814, "learning_rate": 1.6530413625304137e-06, "loss": 0.5501, "step": 24060 }, { "epoch": 0.70249043823538, "grad_norm": 0.7724698900981755, "learning_rate": 1.6528791565287915e-06, "loss": 0.7121, "step": 24061 }, { "epoch": 0.7025196344632274, "grad_norm": 0.6703813890145417, "learning_rate": 1.6527169505271695e-06, "loss": 0.5864, "step": 24062 }, { "epoch": 0.7025488306910748, "grad_norm": 0.7877498290273445, "learning_rate": 1.6525547445255477e-06, "loss": 0.7272, "step": 24063 }, { "epoch": 0.7025780269189221, "grad_norm": 0.7828118092584397, "learning_rate": 1.6523925385239255e-06, "loss": 0.7125, "step": 24064 }, { "epoch": 0.7026072231467695, "grad_norm": 0.658035227936693, "learning_rate": 1.6522303325223035e-06, "loss": 0.5711, "step": 24065 }, { "epoch": 0.7026364193746168, "grad_norm": 0.725667309218753, "learning_rate": 1.6520681265206813e-06, "loss": 0.6111, "step": 24066 }, { "epoch": 0.7026656156024642, "grad_norm": 0.7318181263908599, "learning_rate": 1.6519059205190594e-06, "loss": 0.6471, "step": 24067 }, { "epoch": 0.7026948118303116, "grad_norm": 0.6837480045071688, "learning_rate": 1.6517437145174374e-06, "loss": 0.6047, "step": 24068 }, { "epoch": 0.7027240080581589, "grad_norm": 0.7720901786543055, "learning_rate": 1.6515815085158152e-06, "loss": 0.6616, "step": 24069 }, { "epoch": 0.7027532042860063, "grad_norm": 0.7539856806613413, "learning_rate": 1.6514193025141932e-06, "loss": 0.6148, "step": 24070 }, { "epoch": 0.7027824005138537, "grad_norm": 0.7794997305576808, "learning_rate": 1.651257096512571e-06, "loss": 0.7162, "step": 24071 }, { "epoch": 0.702811596741701, "grad_norm": 0.7303893327143847, "learning_rate": 1.6510948905109492e-06, "loss": 0.6272, "step": 24072 }, { "epoch": 0.7028407929695484, "grad_norm": 0.7137669832832769, "learning_rate": 1.6509326845093272e-06, "loss": 0.6254, "step": 24073 }, { "epoch": 0.7028699891973957, "grad_norm": 0.6955523497434138, "learning_rate": 1.650770478507705e-06, "loss": 0.6101, "step": 24074 }, { "epoch": 0.7028991854252431, "grad_norm": 0.7605830910590398, "learning_rate": 1.650608272506083e-06, "loss": 0.7444, "step": 24075 }, { "epoch": 0.7029283816530905, "grad_norm": 0.692583512253765, "learning_rate": 1.6504460665044608e-06, "loss": 0.5781, "step": 24076 }, { "epoch": 0.7029575778809378, "grad_norm": 0.7733709128849258, "learning_rate": 1.6502838605028388e-06, "loss": 0.7165, "step": 24077 }, { "epoch": 0.7029867741087852, "grad_norm": 0.6829184869629379, "learning_rate": 1.6501216545012166e-06, "loss": 0.6061, "step": 24078 }, { "epoch": 0.7030159703366325, "grad_norm": 0.6896411215927901, "learning_rate": 1.6499594484995946e-06, "loss": 0.5611, "step": 24079 }, { "epoch": 0.7030451665644799, "grad_norm": 0.7182966526419996, "learning_rate": 1.6497972424979724e-06, "loss": 0.6168, "step": 24080 }, { "epoch": 0.7030743627923273, "grad_norm": 1.2973341825593558, "learning_rate": 1.6496350364963504e-06, "loss": 0.6733, "step": 24081 }, { "epoch": 0.7031035590201746, "grad_norm": 0.7018330288683508, "learning_rate": 1.6494728304947286e-06, "loss": 0.6191, "step": 24082 }, { "epoch": 0.703132755248022, "grad_norm": 0.7169899686276368, "learning_rate": 1.6493106244931064e-06, "loss": 0.6354, "step": 24083 }, { "epoch": 0.7031619514758694, "grad_norm": 0.74890392458153, "learning_rate": 1.6491484184914844e-06, "loss": 0.625, "step": 24084 }, { "epoch": 0.7031911477037167, "grad_norm": 0.6901328350325358, "learning_rate": 1.6489862124898622e-06, "loss": 0.5766, "step": 24085 }, { "epoch": 0.7032203439315641, "grad_norm": 0.7286597580263956, "learning_rate": 1.6488240064882402e-06, "loss": 0.6427, "step": 24086 }, { "epoch": 0.7032495401594114, "grad_norm": 0.7516115109704922, "learning_rate": 1.6486618004866182e-06, "loss": 0.58, "step": 24087 }, { "epoch": 0.7032787363872588, "grad_norm": 0.6939010784882069, "learning_rate": 1.648499594484996e-06, "loss": 0.5384, "step": 24088 }, { "epoch": 0.7033079326151062, "grad_norm": 0.7808464623038891, "learning_rate": 1.648337388483374e-06, "loss": 0.6841, "step": 24089 }, { "epoch": 0.7033371288429535, "grad_norm": 0.8426689769195878, "learning_rate": 1.6481751824817518e-06, "loss": 0.7183, "step": 24090 }, { "epoch": 0.7033663250708009, "grad_norm": 0.8192018470128428, "learning_rate": 1.64801297648013e-06, "loss": 0.7258, "step": 24091 }, { "epoch": 0.7033955212986482, "grad_norm": 0.7948288754508568, "learning_rate": 1.647850770478508e-06, "loss": 0.6651, "step": 24092 }, { "epoch": 0.7034247175264956, "grad_norm": 0.7067771990530032, "learning_rate": 1.6476885644768858e-06, "loss": 0.6123, "step": 24093 }, { "epoch": 0.703453913754343, "grad_norm": 0.717981241120701, "learning_rate": 1.6475263584752638e-06, "loss": 0.6366, "step": 24094 }, { "epoch": 0.7034831099821903, "grad_norm": 0.697777938972522, "learning_rate": 1.6473641524736416e-06, "loss": 0.6345, "step": 24095 }, { "epoch": 0.7035123062100377, "grad_norm": 0.687731093486017, "learning_rate": 1.6472019464720196e-06, "loss": 0.6141, "step": 24096 }, { "epoch": 0.703541502437885, "grad_norm": 0.740190210509954, "learning_rate": 1.6470397404703974e-06, "loss": 0.6417, "step": 24097 }, { "epoch": 0.7035706986657324, "grad_norm": 0.7097295571843065, "learning_rate": 1.6468775344687754e-06, "loss": 0.5957, "step": 24098 }, { "epoch": 0.7035998948935798, "grad_norm": 0.7381617967137436, "learning_rate": 1.6467153284671532e-06, "loss": 0.6933, "step": 24099 }, { "epoch": 0.7036290911214271, "grad_norm": 0.733184255872884, "learning_rate": 1.6465531224655312e-06, "loss": 0.6484, "step": 24100 }, { "epoch": 0.7036582873492745, "grad_norm": 0.7666837569207672, "learning_rate": 1.6463909164639095e-06, "loss": 0.71, "step": 24101 }, { "epoch": 0.7036874835771219, "grad_norm": 0.6782630899352091, "learning_rate": 1.6462287104622873e-06, "loss": 0.5667, "step": 24102 }, { "epoch": 0.7037166798049692, "grad_norm": 0.7919585740320432, "learning_rate": 1.6460665044606653e-06, "loss": 0.7915, "step": 24103 }, { "epoch": 0.7037458760328166, "grad_norm": 0.7093625861812968, "learning_rate": 1.645904298459043e-06, "loss": 0.6564, "step": 24104 }, { "epoch": 0.7037750722606639, "grad_norm": 0.764394532388191, "learning_rate": 1.645742092457421e-06, "loss": 0.6698, "step": 24105 }, { "epoch": 0.7038042684885113, "grad_norm": 0.6927795901001051, "learning_rate": 1.6455798864557989e-06, "loss": 0.6068, "step": 24106 }, { "epoch": 0.7038334647163587, "grad_norm": 0.7126934800814511, "learning_rate": 1.6454176804541769e-06, "loss": 0.5998, "step": 24107 }, { "epoch": 0.703862660944206, "grad_norm": 0.7384964547417789, "learning_rate": 1.6452554744525549e-06, "loss": 0.6828, "step": 24108 }, { "epoch": 0.7038918571720534, "grad_norm": 0.7724503372237563, "learning_rate": 1.6450932684509327e-06, "loss": 0.7115, "step": 24109 }, { "epoch": 0.7039210533999007, "grad_norm": 0.7112505749431566, "learning_rate": 1.6449310624493109e-06, "loss": 0.6563, "step": 24110 }, { "epoch": 0.7039502496277481, "grad_norm": 0.7275143085514695, "learning_rate": 1.644768856447689e-06, "loss": 0.6728, "step": 24111 }, { "epoch": 0.7039794458555955, "grad_norm": 0.7642974982890701, "learning_rate": 1.6446066504460667e-06, "loss": 0.7016, "step": 24112 }, { "epoch": 0.7040086420834428, "grad_norm": 0.7532915261331334, "learning_rate": 1.6444444444444447e-06, "loss": 0.7107, "step": 24113 }, { "epoch": 0.7040378383112902, "grad_norm": 0.7602191432720794, "learning_rate": 1.6442822384428225e-06, "loss": 0.6263, "step": 24114 }, { "epoch": 0.7040670345391375, "grad_norm": 0.7741084054586853, "learning_rate": 1.6441200324412005e-06, "loss": 0.7116, "step": 24115 }, { "epoch": 0.7040962307669849, "grad_norm": 0.6774089958308581, "learning_rate": 1.6439578264395783e-06, "loss": 0.5203, "step": 24116 }, { "epoch": 0.7041254269948323, "grad_norm": 0.6501224301888066, "learning_rate": 1.6437956204379563e-06, "loss": 0.5282, "step": 24117 }, { "epoch": 0.7041546232226796, "grad_norm": 0.7036991834145778, "learning_rate": 1.643633414436334e-06, "loss": 0.6704, "step": 24118 }, { "epoch": 0.704183819450527, "grad_norm": 0.7156116113113201, "learning_rate": 1.643471208434712e-06, "loss": 0.6567, "step": 24119 }, { "epoch": 0.7042130156783744, "grad_norm": 0.703490802404805, "learning_rate": 1.6433090024330903e-06, "loss": 0.5624, "step": 24120 }, { "epoch": 0.7042422119062217, "grad_norm": 0.7153334894654411, "learning_rate": 1.6431467964314681e-06, "loss": 0.6145, "step": 24121 }, { "epoch": 0.7042714081340691, "grad_norm": 0.6533067855794138, "learning_rate": 1.6429845904298461e-06, "loss": 0.536, "step": 24122 }, { "epoch": 0.7043006043619164, "grad_norm": 0.7407006918605367, "learning_rate": 1.642822384428224e-06, "loss": 0.6473, "step": 24123 }, { "epoch": 0.7043298005897638, "grad_norm": 0.7401049836542963, "learning_rate": 1.642660178426602e-06, "loss": 0.6408, "step": 24124 }, { "epoch": 0.7043589968176112, "grad_norm": 0.7546709949754739, "learning_rate": 1.6424979724249797e-06, "loss": 0.6177, "step": 24125 }, { "epoch": 0.7043881930454585, "grad_norm": 0.7242769896411275, "learning_rate": 1.6423357664233577e-06, "loss": 0.6484, "step": 24126 }, { "epoch": 0.7044173892733059, "grad_norm": 0.8928125977711753, "learning_rate": 1.6421735604217357e-06, "loss": 0.5906, "step": 24127 }, { "epoch": 0.7044465855011532, "grad_norm": 0.7874897719850623, "learning_rate": 1.6420113544201135e-06, "loss": 0.6913, "step": 24128 }, { "epoch": 0.7044757817290006, "grad_norm": 0.6951874495248808, "learning_rate": 1.6418491484184917e-06, "loss": 0.6043, "step": 24129 }, { "epoch": 0.704504977956848, "grad_norm": 0.7101520162902099, "learning_rate": 1.6416869424168698e-06, "loss": 0.6754, "step": 24130 }, { "epoch": 0.7045341741846953, "grad_norm": 0.6638934338441178, "learning_rate": 1.6415247364152476e-06, "loss": 0.5667, "step": 24131 }, { "epoch": 0.7045633704125427, "grad_norm": 0.7715191601641678, "learning_rate": 1.6413625304136256e-06, "loss": 0.7156, "step": 24132 }, { "epoch": 0.70459256664039, "grad_norm": 0.7152420506664191, "learning_rate": 1.6412003244120034e-06, "loss": 0.6239, "step": 24133 }, { "epoch": 0.7046217628682374, "grad_norm": 0.7395932532102859, "learning_rate": 1.6410381184103814e-06, "loss": 0.6773, "step": 24134 }, { "epoch": 0.7046509590960848, "grad_norm": 0.8016602707362748, "learning_rate": 1.6408759124087592e-06, "loss": 0.6097, "step": 24135 }, { "epoch": 0.7046801553239321, "grad_norm": 0.7557583055771983, "learning_rate": 1.6407137064071372e-06, "loss": 0.7415, "step": 24136 }, { "epoch": 0.7047093515517795, "grad_norm": 0.7297160325258694, "learning_rate": 1.640551500405515e-06, "loss": 0.6426, "step": 24137 }, { "epoch": 0.7047385477796269, "grad_norm": 0.7106638283766555, "learning_rate": 1.640389294403893e-06, "loss": 0.6232, "step": 24138 }, { "epoch": 0.7047677440074742, "grad_norm": 0.6589912321211447, "learning_rate": 1.6402270884022712e-06, "loss": 0.5436, "step": 24139 }, { "epoch": 0.7047969402353216, "grad_norm": 0.7401408290617515, "learning_rate": 1.640064882400649e-06, "loss": 0.7057, "step": 24140 }, { "epoch": 0.7048261364631689, "grad_norm": 0.726135574449171, "learning_rate": 1.639902676399027e-06, "loss": 0.6888, "step": 24141 }, { "epoch": 0.7048553326910163, "grad_norm": 0.7201690538237301, "learning_rate": 1.6397404703974048e-06, "loss": 0.6789, "step": 24142 }, { "epoch": 0.7048845289188637, "grad_norm": 0.7293487671535487, "learning_rate": 1.6395782643957828e-06, "loss": 0.636, "step": 24143 }, { "epoch": 0.704913725146711, "grad_norm": 0.732026704304723, "learning_rate": 1.6394160583941606e-06, "loss": 0.6364, "step": 24144 }, { "epoch": 0.7049429213745584, "grad_norm": 0.7100634983369085, "learning_rate": 1.6392538523925386e-06, "loss": 0.5852, "step": 24145 }, { "epoch": 0.7049721176024057, "grad_norm": 0.7539342757971288, "learning_rate": 1.6390916463909166e-06, "loss": 0.7134, "step": 24146 }, { "epoch": 0.7050013138302531, "grad_norm": 0.7067250738709132, "learning_rate": 1.6389294403892944e-06, "loss": 0.5982, "step": 24147 }, { "epoch": 0.7050305100581005, "grad_norm": 0.744801729984403, "learning_rate": 1.6387672343876726e-06, "loss": 0.6856, "step": 24148 }, { "epoch": 0.7050597062859478, "grad_norm": 0.7056970471573686, "learning_rate": 1.6386050283860506e-06, "loss": 0.6665, "step": 24149 }, { "epoch": 0.7050889025137952, "grad_norm": 0.7110228095347069, "learning_rate": 1.6384428223844284e-06, "loss": 0.6335, "step": 24150 }, { "epoch": 0.7051180987416426, "grad_norm": 0.7430228375186397, "learning_rate": 1.6382806163828064e-06, "loss": 0.6765, "step": 24151 }, { "epoch": 0.7051472949694899, "grad_norm": 0.7466454586370695, "learning_rate": 1.6381184103811842e-06, "loss": 0.6458, "step": 24152 }, { "epoch": 0.7051764911973373, "grad_norm": 0.8059950903049022, "learning_rate": 1.6379562043795622e-06, "loss": 0.6791, "step": 24153 }, { "epoch": 0.7052056874251846, "grad_norm": 0.7087909476417701, "learning_rate": 1.63779399837794e-06, "loss": 0.6447, "step": 24154 }, { "epoch": 0.705234883653032, "grad_norm": 0.7355444264552103, "learning_rate": 1.637631792376318e-06, "loss": 0.6496, "step": 24155 }, { "epoch": 0.7052640798808794, "grad_norm": 0.7024574480000981, "learning_rate": 1.6374695863746958e-06, "loss": 0.5845, "step": 24156 }, { "epoch": 0.7052932761087267, "grad_norm": 0.7021049409178688, "learning_rate": 1.637307380373074e-06, "loss": 0.5706, "step": 24157 }, { "epoch": 0.7053224723365741, "grad_norm": 0.7129781207120048, "learning_rate": 1.637145174371452e-06, "loss": 0.6571, "step": 24158 }, { "epoch": 0.7053516685644214, "grad_norm": 0.7176263775055445, "learning_rate": 1.6369829683698298e-06, "loss": 0.593, "step": 24159 }, { "epoch": 0.7053808647922688, "grad_norm": 0.7187626995586741, "learning_rate": 1.6368207623682078e-06, "loss": 0.6058, "step": 24160 }, { "epoch": 0.7054100610201162, "grad_norm": 0.7992688528789526, "learning_rate": 1.6366585563665856e-06, "loss": 0.727, "step": 24161 }, { "epoch": 0.7054392572479635, "grad_norm": 0.7169485956477323, "learning_rate": 1.6364963503649636e-06, "loss": 0.6691, "step": 24162 }, { "epoch": 0.7054684534758109, "grad_norm": 0.693528374101775, "learning_rate": 1.6363341443633414e-06, "loss": 0.5994, "step": 24163 }, { "epoch": 0.7054976497036582, "grad_norm": 0.7217802368198191, "learning_rate": 1.6361719383617194e-06, "loss": 0.6095, "step": 24164 }, { "epoch": 0.7055268459315056, "grad_norm": 0.7413768218460035, "learning_rate": 1.6360097323600975e-06, "loss": 0.6874, "step": 24165 }, { "epoch": 0.705556042159353, "grad_norm": 0.7618378570642943, "learning_rate": 1.6358475263584752e-06, "loss": 0.6477, "step": 24166 }, { "epoch": 0.7055852383872003, "grad_norm": 0.8154918509663996, "learning_rate": 1.6356853203568535e-06, "loss": 0.7024, "step": 24167 }, { "epoch": 0.7056144346150477, "grad_norm": 0.6979390499569019, "learning_rate": 1.6355231143552313e-06, "loss": 0.6125, "step": 24168 }, { "epoch": 0.705643630842895, "grad_norm": 0.7502600679607233, "learning_rate": 1.6353609083536093e-06, "loss": 0.6253, "step": 24169 }, { "epoch": 0.7056728270707424, "grad_norm": 0.7043974500448151, "learning_rate": 1.6351987023519873e-06, "loss": 0.6375, "step": 24170 }, { "epoch": 0.7057020232985898, "grad_norm": 0.7491778222820168, "learning_rate": 1.635036496350365e-06, "loss": 0.6355, "step": 24171 }, { "epoch": 0.7057312195264371, "grad_norm": 0.7422552131067642, "learning_rate": 1.634874290348743e-06, "loss": 0.637, "step": 24172 }, { "epoch": 0.7057604157542845, "grad_norm": 0.7027928224985805, "learning_rate": 1.6347120843471209e-06, "loss": 0.6203, "step": 24173 }, { "epoch": 0.7057896119821319, "grad_norm": 0.6712259719373697, "learning_rate": 1.6345498783454989e-06, "loss": 0.5672, "step": 24174 }, { "epoch": 0.7058188082099792, "grad_norm": 0.7760253073220094, "learning_rate": 1.6343876723438767e-06, "loss": 0.7232, "step": 24175 }, { "epoch": 0.7058480044378266, "grad_norm": 0.7587482929100028, "learning_rate": 1.6342254663422549e-06, "loss": 0.6832, "step": 24176 }, { "epoch": 0.7058772006656739, "grad_norm": 0.7975172039980111, "learning_rate": 1.634063260340633e-06, "loss": 0.7211, "step": 24177 }, { "epoch": 0.7059063968935213, "grad_norm": 0.7078107085063082, "learning_rate": 1.6339010543390107e-06, "loss": 0.6163, "step": 24178 }, { "epoch": 0.7059355931213687, "grad_norm": 0.7419155309662891, "learning_rate": 1.6337388483373887e-06, "loss": 0.6704, "step": 24179 }, { "epoch": 0.705964789349216, "grad_norm": 0.7507831277395014, "learning_rate": 1.6335766423357665e-06, "loss": 0.6862, "step": 24180 }, { "epoch": 0.7059939855770635, "grad_norm": 0.7532864877709582, "learning_rate": 1.6334144363341445e-06, "loss": 0.6659, "step": 24181 }, { "epoch": 0.7060231818049109, "grad_norm": 0.7480080315210302, "learning_rate": 1.6332522303325223e-06, "loss": 0.6309, "step": 24182 }, { "epoch": 0.7060523780327582, "grad_norm": 0.7821759425979137, "learning_rate": 1.6330900243309003e-06, "loss": 0.7299, "step": 24183 }, { "epoch": 0.7060815742606056, "grad_norm": 0.7444700051757558, "learning_rate": 1.6329278183292783e-06, "loss": 0.6684, "step": 24184 }, { "epoch": 0.7061107704884529, "grad_norm": 0.7200948152588146, "learning_rate": 1.6327656123276561e-06, "loss": 0.6621, "step": 24185 }, { "epoch": 0.7061399667163003, "grad_norm": 0.7622132566827277, "learning_rate": 1.6326034063260343e-06, "loss": 0.655, "step": 24186 }, { "epoch": 0.7061691629441477, "grad_norm": 0.6957603693331452, "learning_rate": 1.6324412003244121e-06, "loss": 0.6376, "step": 24187 }, { "epoch": 0.706198359171995, "grad_norm": 0.7528876385874848, "learning_rate": 1.6322789943227901e-06, "loss": 0.642, "step": 24188 }, { "epoch": 0.7062275553998424, "grad_norm": 0.672958435109331, "learning_rate": 1.6321167883211681e-06, "loss": 0.5882, "step": 24189 }, { "epoch": 0.7062567516276897, "grad_norm": 0.776167053122958, "learning_rate": 1.631954582319546e-06, "loss": 0.7305, "step": 24190 }, { "epoch": 0.7062859478555371, "grad_norm": 0.7264708078405961, "learning_rate": 1.631792376317924e-06, "loss": 0.651, "step": 24191 }, { "epoch": 0.7063151440833845, "grad_norm": 0.7351533505032599, "learning_rate": 1.6316301703163017e-06, "loss": 0.6811, "step": 24192 }, { "epoch": 0.7063443403112318, "grad_norm": 0.7024886006544986, "learning_rate": 1.6314679643146797e-06, "loss": 0.6244, "step": 24193 }, { "epoch": 0.7063735365390792, "grad_norm": 0.6936444984446998, "learning_rate": 1.6313057583130575e-06, "loss": 0.6325, "step": 24194 }, { "epoch": 0.7064027327669266, "grad_norm": 0.7199418081829613, "learning_rate": 1.6311435523114358e-06, "loss": 0.6112, "step": 24195 }, { "epoch": 0.7064319289947739, "grad_norm": 0.7313796078028084, "learning_rate": 1.6309813463098138e-06, "loss": 0.6645, "step": 24196 }, { "epoch": 0.7064611252226213, "grad_norm": 0.6864324466206206, "learning_rate": 1.6308191403081916e-06, "loss": 0.6048, "step": 24197 }, { "epoch": 0.7064903214504686, "grad_norm": 0.7224403342600381, "learning_rate": 1.6306569343065696e-06, "loss": 0.6633, "step": 24198 }, { "epoch": 0.706519517678316, "grad_norm": 0.7968367827511701, "learning_rate": 1.6304947283049474e-06, "loss": 0.6926, "step": 24199 }, { "epoch": 0.7065487139061634, "grad_norm": 0.7419013373075578, "learning_rate": 1.6303325223033254e-06, "loss": 0.7005, "step": 24200 }, { "epoch": 0.7065779101340107, "grad_norm": 0.7048000455222632, "learning_rate": 1.6301703163017032e-06, "loss": 0.5997, "step": 24201 }, { "epoch": 0.7066071063618581, "grad_norm": 0.7318770865180116, "learning_rate": 1.6300081103000812e-06, "loss": 0.6585, "step": 24202 }, { "epoch": 0.7066363025897054, "grad_norm": 0.7494172394021765, "learning_rate": 1.6298459042984592e-06, "loss": 0.7151, "step": 24203 }, { "epoch": 0.7066654988175528, "grad_norm": 0.7192894183734476, "learning_rate": 1.629683698296837e-06, "loss": 0.5818, "step": 24204 }, { "epoch": 0.7066946950454002, "grad_norm": 0.6874679608674601, "learning_rate": 1.6295214922952152e-06, "loss": 0.5998, "step": 24205 }, { "epoch": 0.7067238912732475, "grad_norm": 0.7341733210588458, "learning_rate": 1.629359286293593e-06, "loss": 0.6703, "step": 24206 }, { "epoch": 0.7067530875010949, "grad_norm": 0.7094575152473444, "learning_rate": 1.629197080291971e-06, "loss": 0.6587, "step": 24207 }, { "epoch": 0.7067822837289423, "grad_norm": 0.6937980853645533, "learning_rate": 1.629034874290349e-06, "loss": 0.6336, "step": 24208 }, { "epoch": 0.7068114799567896, "grad_norm": 0.7848719339310922, "learning_rate": 1.6288726682887268e-06, "loss": 0.6241, "step": 24209 }, { "epoch": 0.706840676184637, "grad_norm": 0.7477565055414634, "learning_rate": 1.6287104622871048e-06, "loss": 0.7245, "step": 24210 }, { "epoch": 0.7068698724124843, "grad_norm": 0.7431267260945996, "learning_rate": 1.6285482562854826e-06, "loss": 0.6778, "step": 24211 }, { "epoch": 0.7068990686403317, "grad_norm": 0.7087422174891227, "learning_rate": 1.6283860502838606e-06, "loss": 0.5822, "step": 24212 }, { "epoch": 0.7069282648681791, "grad_norm": 0.7795250002429721, "learning_rate": 1.6282238442822384e-06, "loss": 0.7373, "step": 24213 }, { "epoch": 0.7069574610960264, "grad_norm": 0.7296658719519571, "learning_rate": 1.6280616382806166e-06, "loss": 0.6867, "step": 24214 }, { "epoch": 0.7069866573238738, "grad_norm": 0.7204867638774142, "learning_rate": 1.6278994322789946e-06, "loss": 0.637, "step": 24215 }, { "epoch": 0.7070158535517211, "grad_norm": 0.8430856273608901, "learning_rate": 1.6277372262773724e-06, "loss": 0.6551, "step": 24216 }, { "epoch": 0.7070450497795685, "grad_norm": 0.7589121073716508, "learning_rate": 1.6275750202757504e-06, "loss": 0.6524, "step": 24217 }, { "epoch": 0.7070742460074159, "grad_norm": 0.8103614275130862, "learning_rate": 1.6274128142741282e-06, "loss": 0.7468, "step": 24218 }, { "epoch": 0.7071034422352632, "grad_norm": 0.6993467311727609, "learning_rate": 1.6272506082725062e-06, "loss": 0.6293, "step": 24219 }, { "epoch": 0.7071326384631106, "grad_norm": 0.7331201937213001, "learning_rate": 1.627088402270884e-06, "loss": 0.6571, "step": 24220 }, { "epoch": 0.707161834690958, "grad_norm": 0.8037239696853865, "learning_rate": 1.626926196269262e-06, "loss": 0.7675, "step": 24221 }, { "epoch": 0.7071910309188053, "grad_norm": 0.7631310789258093, "learning_rate": 1.62676399026764e-06, "loss": 0.679, "step": 24222 }, { "epoch": 0.7072202271466527, "grad_norm": 0.6470431637191826, "learning_rate": 1.626601784266018e-06, "loss": 0.5089, "step": 24223 }, { "epoch": 0.7072494233745, "grad_norm": 0.6804819847067153, "learning_rate": 1.626439578264396e-06, "loss": 0.5746, "step": 24224 }, { "epoch": 0.7072786196023474, "grad_norm": 0.780861189480664, "learning_rate": 1.6262773722627738e-06, "loss": 0.7125, "step": 24225 }, { "epoch": 0.7073078158301948, "grad_norm": 0.6764752929698195, "learning_rate": 1.6261151662611518e-06, "loss": 0.6009, "step": 24226 }, { "epoch": 0.7073370120580421, "grad_norm": 0.7531034117234067, "learning_rate": 1.6259529602595299e-06, "loss": 0.5974, "step": 24227 }, { "epoch": 0.7073662082858895, "grad_norm": 0.7346217326613148, "learning_rate": 1.6257907542579076e-06, "loss": 0.7116, "step": 24228 }, { "epoch": 0.7073954045137368, "grad_norm": 0.7304593183970576, "learning_rate": 1.6256285482562857e-06, "loss": 0.5994, "step": 24229 }, { "epoch": 0.7074246007415842, "grad_norm": 0.7111222355110886, "learning_rate": 1.6254663422546634e-06, "loss": 0.6223, "step": 24230 }, { "epoch": 0.7074537969694316, "grad_norm": 0.7307427790091169, "learning_rate": 1.6253041362530415e-06, "loss": 0.656, "step": 24231 }, { "epoch": 0.7074829931972789, "grad_norm": 0.7704122777825823, "learning_rate": 1.6251419302514192e-06, "loss": 0.7644, "step": 24232 }, { "epoch": 0.7075121894251263, "grad_norm": 0.7540145405408604, "learning_rate": 1.6249797242497975e-06, "loss": 0.6575, "step": 24233 }, { "epoch": 0.7075413856529736, "grad_norm": 0.6571018286288874, "learning_rate": 1.6248175182481755e-06, "loss": 0.5444, "step": 24234 }, { "epoch": 0.707570581880821, "grad_norm": 0.8219640325968209, "learning_rate": 1.6246553122465533e-06, "loss": 0.746, "step": 24235 }, { "epoch": 0.7075997781086684, "grad_norm": 0.724637602286491, "learning_rate": 1.6244931062449313e-06, "loss": 0.6106, "step": 24236 }, { "epoch": 0.7076289743365157, "grad_norm": 0.7521726385803311, "learning_rate": 1.624330900243309e-06, "loss": 0.6822, "step": 24237 }, { "epoch": 0.7076581705643631, "grad_norm": 0.6982246204543947, "learning_rate": 1.624168694241687e-06, "loss": 0.6207, "step": 24238 }, { "epoch": 0.7076873667922104, "grad_norm": 0.7902826484926443, "learning_rate": 1.6240064882400649e-06, "loss": 0.5839, "step": 24239 }, { "epoch": 0.7077165630200578, "grad_norm": 0.6945391925174272, "learning_rate": 1.6238442822384429e-06, "loss": 0.5892, "step": 24240 }, { "epoch": 0.7077457592479052, "grad_norm": 0.7237282083249273, "learning_rate": 1.6236820762368207e-06, "loss": 0.681, "step": 24241 }, { "epoch": 0.7077749554757525, "grad_norm": 0.7070469078028198, "learning_rate": 1.623519870235199e-06, "loss": 0.6233, "step": 24242 }, { "epoch": 0.7078041517035999, "grad_norm": 0.7029887641571296, "learning_rate": 1.623357664233577e-06, "loss": 0.5966, "step": 24243 }, { "epoch": 0.7078333479314473, "grad_norm": 0.6576183901143042, "learning_rate": 1.6231954582319547e-06, "loss": 0.5617, "step": 24244 }, { "epoch": 0.7078625441592946, "grad_norm": 0.7300845888660199, "learning_rate": 1.6230332522303327e-06, "loss": 0.6448, "step": 24245 }, { "epoch": 0.707891740387142, "grad_norm": 0.7045842693175639, "learning_rate": 1.6228710462287107e-06, "loss": 0.6015, "step": 24246 }, { "epoch": 0.7079209366149893, "grad_norm": 0.7314480219949888, "learning_rate": 1.6227088402270885e-06, "loss": 0.6242, "step": 24247 }, { "epoch": 0.7079501328428367, "grad_norm": 0.7404075491202756, "learning_rate": 1.6225466342254665e-06, "loss": 0.6448, "step": 24248 }, { "epoch": 0.7079793290706841, "grad_norm": 0.6688451885781957, "learning_rate": 1.6223844282238443e-06, "loss": 0.5575, "step": 24249 }, { "epoch": 0.7080085252985314, "grad_norm": 0.744323583516783, "learning_rate": 1.6222222222222223e-06, "loss": 0.6174, "step": 24250 }, { "epoch": 0.7080377215263788, "grad_norm": 0.8379071588031886, "learning_rate": 1.6220600162206001e-06, "loss": 0.8029, "step": 24251 }, { "epoch": 0.7080669177542261, "grad_norm": 0.7008531760909743, "learning_rate": 1.6218978102189783e-06, "loss": 0.5847, "step": 24252 }, { "epoch": 0.7080961139820735, "grad_norm": 0.6983024690132034, "learning_rate": 1.6217356042173563e-06, "loss": 0.6112, "step": 24253 }, { "epoch": 0.7081253102099209, "grad_norm": 0.7585741132517245, "learning_rate": 1.6215733982157341e-06, "loss": 0.6566, "step": 24254 }, { "epoch": 0.7081545064377682, "grad_norm": 0.792494648215534, "learning_rate": 1.6214111922141121e-06, "loss": 0.7614, "step": 24255 }, { "epoch": 0.7081837026656156, "grad_norm": 0.9101318509462245, "learning_rate": 1.62124898621249e-06, "loss": 0.6218, "step": 24256 }, { "epoch": 0.708212898893463, "grad_norm": 0.7526496432848808, "learning_rate": 1.621086780210868e-06, "loss": 0.7042, "step": 24257 }, { "epoch": 0.7082420951213103, "grad_norm": 0.736310166776273, "learning_rate": 1.6209245742092457e-06, "loss": 0.6529, "step": 24258 }, { "epoch": 0.7082712913491577, "grad_norm": 0.6912449296878463, "learning_rate": 1.6207623682076237e-06, "loss": 0.5482, "step": 24259 }, { "epoch": 0.708300487577005, "grad_norm": 0.7207691912914911, "learning_rate": 1.6206001622060015e-06, "loss": 0.5982, "step": 24260 }, { "epoch": 0.7083296838048524, "grad_norm": 0.7386580559745626, "learning_rate": 1.6204379562043798e-06, "loss": 0.6313, "step": 24261 }, { "epoch": 0.7083588800326998, "grad_norm": 0.6959035475145986, "learning_rate": 1.6202757502027578e-06, "loss": 0.6065, "step": 24262 }, { "epoch": 0.7083880762605471, "grad_norm": 0.7199748057110421, "learning_rate": 1.6201135442011356e-06, "loss": 0.604, "step": 24263 }, { "epoch": 0.7084172724883945, "grad_norm": 0.7662421870618543, "learning_rate": 1.6199513381995136e-06, "loss": 0.6817, "step": 24264 }, { "epoch": 0.7084464687162418, "grad_norm": 0.7208634057889053, "learning_rate": 1.6197891321978916e-06, "loss": 0.6621, "step": 24265 }, { "epoch": 0.7084756649440892, "grad_norm": 0.6877186116992965, "learning_rate": 1.6196269261962694e-06, "loss": 0.6373, "step": 24266 }, { "epoch": 0.7085048611719366, "grad_norm": 0.8591985242119827, "learning_rate": 1.6194647201946474e-06, "loss": 0.694, "step": 24267 }, { "epoch": 0.7085340573997839, "grad_norm": 0.6970671832984354, "learning_rate": 1.6193025141930252e-06, "loss": 0.5839, "step": 24268 }, { "epoch": 0.7085632536276313, "grad_norm": 0.7287206971759259, "learning_rate": 1.6191403081914032e-06, "loss": 0.6379, "step": 24269 }, { "epoch": 0.7085924498554786, "grad_norm": 0.76726755850968, "learning_rate": 1.618978102189781e-06, "loss": 0.6781, "step": 24270 }, { "epoch": 0.708621646083326, "grad_norm": 0.6990923091316462, "learning_rate": 1.6188158961881592e-06, "loss": 0.5781, "step": 24271 }, { "epoch": 0.7086508423111734, "grad_norm": 0.73096698081958, "learning_rate": 1.6186536901865372e-06, "loss": 0.6652, "step": 24272 }, { "epoch": 0.7086800385390207, "grad_norm": 0.7083447672239875, "learning_rate": 1.618491484184915e-06, "loss": 0.6283, "step": 24273 }, { "epoch": 0.7087092347668681, "grad_norm": 0.7847170251398723, "learning_rate": 1.618329278183293e-06, "loss": 0.6309, "step": 24274 }, { "epoch": 0.7087384309947155, "grad_norm": 0.7026682726128026, "learning_rate": 1.6181670721816708e-06, "loss": 0.5776, "step": 24275 }, { "epoch": 0.7087676272225628, "grad_norm": 0.8119441287755186, "learning_rate": 1.6180048661800488e-06, "loss": 0.6278, "step": 24276 }, { "epoch": 0.7087968234504102, "grad_norm": 0.7955760522767479, "learning_rate": 1.6178426601784266e-06, "loss": 0.709, "step": 24277 }, { "epoch": 0.7088260196782575, "grad_norm": 0.7141905648826709, "learning_rate": 1.6176804541768046e-06, "loss": 0.6137, "step": 24278 }, { "epoch": 0.7088552159061049, "grad_norm": 0.7817630089730144, "learning_rate": 1.6175182481751824e-06, "loss": 0.7232, "step": 24279 }, { "epoch": 0.7088844121339523, "grad_norm": 0.7748859891690588, "learning_rate": 1.6173560421735606e-06, "loss": 0.6721, "step": 24280 }, { "epoch": 0.7089136083617996, "grad_norm": 0.7575866776746919, "learning_rate": 1.6171938361719386e-06, "loss": 0.6607, "step": 24281 }, { "epoch": 0.708942804589647, "grad_norm": 0.7121141730177871, "learning_rate": 1.6170316301703164e-06, "loss": 0.6136, "step": 24282 }, { "epoch": 0.7089720008174943, "grad_norm": 0.6990032914538105, "learning_rate": 1.6168694241686944e-06, "loss": 0.6169, "step": 24283 }, { "epoch": 0.7090011970453417, "grad_norm": 0.7029092413010507, "learning_rate": 1.6167072181670724e-06, "loss": 0.5902, "step": 24284 }, { "epoch": 0.7090303932731891, "grad_norm": 0.6987627687151351, "learning_rate": 1.6165450121654502e-06, "loss": 0.5901, "step": 24285 }, { "epoch": 0.7090595895010364, "grad_norm": 0.7425681198321554, "learning_rate": 1.6163828061638282e-06, "loss": 0.6655, "step": 24286 }, { "epoch": 0.7090887857288838, "grad_norm": 0.709243929137114, "learning_rate": 1.616220600162206e-06, "loss": 0.6291, "step": 24287 }, { "epoch": 0.7091179819567311, "grad_norm": 0.7829048773722274, "learning_rate": 1.616058394160584e-06, "loss": 0.7322, "step": 24288 }, { "epoch": 0.7091471781845785, "grad_norm": 0.7065189045842096, "learning_rate": 1.6158961881589618e-06, "loss": 0.6106, "step": 24289 }, { "epoch": 0.7091763744124259, "grad_norm": 0.7676402546259263, "learning_rate": 1.61573398215734e-06, "loss": 0.654, "step": 24290 }, { "epoch": 0.7092055706402732, "grad_norm": 0.7373329719250515, "learning_rate": 1.615571776155718e-06, "loss": 0.7032, "step": 24291 }, { "epoch": 0.7092347668681206, "grad_norm": 0.7384559890483876, "learning_rate": 1.6154095701540958e-06, "loss": 0.6066, "step": 24292 }, { "epoch": 0.709263963095968, "grad_norm": 0.8064008203520953, "learning_rate": 1.6152473641524739e-06, "loss": 0.7211, "step": 24293 }, { "epoch": 0.7092931593238153, "grad_norm": 0.7666950303151499, "learning_rate": 1.6150851581508516e-06, "loss": 0.6309, "step": 24294 }, { "epoch": 0.7093223555516627, "grad_norm": 0.7501148398115481, "learning_rate": 1.6149229521492297e-06, "loss": 0.6134, "step": 24295 }, { "epoch": 0.70935155177951, "grad_norm": 0.8483596562999084, "learning_rate": 1.6147607461476074e-06, "loss": 0.8062, "step": 24296 }, { "epoch": 0.7093807480073574, "grad_norm": 0.6822033833450737, "learning_rate": 1.6145985401459855e-06, "loss": 0.5816, "step": 24297 }, { "epoch": 0.7094099442352048, "grad_norm": 0.7183090709886356, "learning_rate": 1.6144363341443633e-06, "loss": 0.6277, "step": 24298 }, { "epoch": 0.7094391404630521, "grad_norm": 0.7665526092551166, "learning_rate": 1.6142741281427415e-06, "loss": 0.6509, "step": 24299 }, { "epoch": 0.7094683366908995, "grad_norm": 0.6847897074207449, "learning_rate": 1.6141119221411195e-06, "loss": 0.5959, "step": 24300 }, { "epoch": 0.7094975329187468, "grad_norm": 0.6879990816889594, "learning_rate": 1.6139497161394973e-06, "loss": 0.5824, "step": 24301 }, { "epoch": 0.7095267291465943, "grad_norm": 0.670121623191363, "learning_rate": 1.6137875101378753e-06, "loss": 0.5328, "step": 24302 }, { "epoch": 0.7095559253744417, "grad_norm": 0.7532760712726184, "learning_rate": 1.613625304136253e-06, "loss": 0.6639, "step": 24303 }, { "epoch": 0.709585121602289, "grad_norm": 0.7451592261293403, "learning_rate": 1.613463098134631e-06, "loss": 0.6415, "step": 24304 }, { "epoch": 0.7096143178301364, "grad_norm": 0.7292481872135066, "learning_rate": 1.613300892133009e-06, "loss": 0.6059, "step": 24305 }, { "epoch": 0.7096435140579838, "grad_norm": 0.7443045660127804, "learning_rate": 1.6131386861313869e-06, "loss": 0.6452, "step": 24306 }, { "epoch": 0.7096727102858311, "grad_norm": 0.7715301723200796, "learning_rate": 1.6129764801297649e-06, "loss": 0.6588, "step": 24307 }, { "epoch": 0.7097019065136785, "grad_norm": 0.7273377319162951, "learning_rate": 1.6128142741281431e-06, "loss": 0.661, "step": 24308 }, { "epoch": 0.7097311027415258, "grad_norm": 0.7047883344718836, "learning_rate": 1.612652068126521e-06, "loss": 0.6164, "step": 24309 }, { "epoch": 0.7097602989693732, "grad_norm": 0.7207326569843387, "learning_rate": 1.612489862124899e-06, "loss": 0.625, "step": 24310 }, { "epoch": 0.7097894951972206, "grad_norm": 0.7201749459736416, "learning_rate": 1.6123276561232767e-06, "loss": 0.6256, "step": 24311 }, { "epoch": 0.7098186914250679, "grad_norm": 0.786528865781044, "learning_rate": 1.6121654501216547e-06, "loss": 0.6037, "step": 24312 }, { "epoch": 0.7098478876529153, "grad_norm": 0.7146611552044932, "learning_rate": 1.6120032441200325e-06, "loss": 0.6219, "step": 24313 }, { "epoch": 0.7098770838807626, "grad_norm": 0.7405462354973736, "learning_rate": 1.6118410381184105e-06, "loss": 0.6733, "step": 24314 }, { "epoch": 0.70990628010861, "grad_norm": 0.696733609422773, "learning_rate": 1.6116788321167883e-06, "loss": 0.5927, "step": 24315 }, { "epoch": 0.7099354763364574, "grad_norm": 0.7371035995297115, "learning_rate": 1.6115166261151663e-06, "loss": 0.6904, "step": 24316 }, { "epoch": 0.7099646725643047, "grad_norm": 0.740214777641596, "learning_rate": 1.6113544201135441e-06, "loss": 0.6585, "step": 24317 }, { "epoch": 0.7099938687921521, "grad_norm": 0.6858805200156216, "learning_rate": 1.6111922141119223e-06, "loss": 0.5711, "step": 24318 }, { "epoch": 0.7100230650199995, "grad_norm": 0.7246706100172315, "learning_rate": 1.6110300081103003e-06, "loss": 0.6544, "step": 24319 }, { "epoch": 0.7100522612478468, "grad_norm": 0.7624994960117248, "learning_rate": 1.6108678021086781e-06, "loss": 0.5942, "step": 24320 }, { "epoch": 0.7100814574756942, "grad_norm": 0.6865676945164896, "learning_rate": 1.6107055961070561e-06, "loss": 0.5276, "step": 24321 }, { "epoch": 0.7101106537035415, "grad_norm": 0.7570630753716551, "learning_rate": 1.610543390105434e-06, "loss": 0.7232, "step": 24322 }, { "epoch": 0.7101398499313889, "grad_norm": 0.7168155909980698, "learning_rate": 1.610381184103812e-06, "loss": 0.5794, "step": 24323 }, { "epoch": 0.7101690461592363, "grad_norm": 0.7458359405282999, "learning_rate": 1.61021897810219e-06, "loss": 0.6358, "step": 24324 }, { "epoch": 0.7101982423870836, "grad_norm": 0.7122534820107905, "learning_rate": 1.6100567721005677e-06, "loss": 0.6458, "step": 24325 }, { "epoch": 0.710227438614931, "grad_norm": 0.697662693228691, "learning_rate": 1.6098945660989457e-06, "loss": 0.652, "step": 24326 }, { "epoch": 0.7102566348427783, "grad_norm": 0.695585718487848, "learning_rate": 1.609732360097324e-06, "loss": 0.5845, "step": 24327 }, { "epoch": 0.7102858310706257, "grad_norm": 0.7774796689377294, "learning_rate": 1.6095701540957018e-06, "loss": 0.7027, "step": 24328 }, { "epoch": 0.7103150272984731, "grad_norm": 0.7115587511694239, "learning_rate": 1.6094079480940798e-06, "loss": 0.6301, "step": 24329 }, { "epoch": 0.7103442235263204, "grad_norm": 0.72258573784186, "learning_rate": 1.6092457420924576e-06, "loss": 0.6221, "step": 24330 }, { "epoch": 0.7103734197541678, "grad_norm": 0.7160110234461547, "learning_rate": 1.6090835360908356e-06, "loss": 0.6137, "step": 24331 }, { "epoch": 0.7104026159820152, "grad_norm": 0.689939433293677, "learning_rate": 1.6089213300892134e-06, "loss": 0.6152, "step": 24332 }, { "epoch": 0.7104318122098625, "grad_norm": 0.9178281035691621, "learning_rate": 1.6087591240875914e-06, "loss": 0.6628, "step": 24333 }, { "epoch": 0.7104610084377099, "grad_norm": 0.7310052708069539, "learning_rate": 1.6085969180859692e-06, "loss": 0.696, "step": 24334 }, { "epoch": 0.7104902046655572, "grad_norm": 0.6608259210854486, "learning_rate": 1.6084347120843472e-06, "loss": 0.5683, "step": 24335 }, { "epoch": 0.7105194008934046, "grad_norm": 0.7019731146178996, "learning_rate": 1.608272506082725e-06, "loss": 0.5648, "step": 24336 }, { "epoch": 0.710548597121252, "grad_norm": 0.747056850149802, "learning_rate": 1.6081103000811032e-06, "loss": 0.6889, "step": 24337 }, { "epoch": 0.7105777933490993, "grad_norm": 0.7080964105056579, "learning_rate": 1.6079480940794812e-06, "loss": 0.633, "step": 24338 }, { "epoch": 0.7106069895769467, "grad_norm": 0.7852766049744374, "learning_rate": 1.607785888077859e-06, "loss": 0.672, "step": 24339 }, { "epoch": 0.710636185804794, "grad_norm": 0.7523950199218304, "learning_rate": 1.607623682076237e-06, "loss": 0.6126, "step": 24340 }, { "epoch": 0.7106653820326414, "grad_norm": 0.6895902818951974, "learning_rate": 1.6074614760746148e-06, "loss": 0.5969, "step": 24341 }, { "epoch": 0.7106945782604888, "grad_norm": 0.7775878976436804, "learning_rate": 1.6072992700729928e-06, "loss": 0.6632, "step": 24342 }, { "epoch": 0.7107237744883361, "grad_norm": 0.7956577437919584, "learning_rate": 1.6071370640713708e-06, "loss": 0.7368, "step": 24343 }, { "epoch": 0.7107529707161835, "grad_norm": 0.7613725539731352, "learning_rate": 1.6069748580697486e-06, "loss": 0.7152, "step": 24344 }, { "epoch": 0.7107821669440308, "grad_norm": 0.7059681291394388, "learning_rate": 1.6068126520681266e-06, "loss": 0.6594, "step": 24345 }, { "epoch": 0.7108113631718782, "grad_norm": 0.759388655370438, "learning_rate": 1.6066504460665048e-06, "loss": 0.632, "step": 24346 }, { "epoch": 0.7108405593997256, "grad_norm": 0.7456364817779864, "learning_rate": 1.6064882400648826e-06, "loss": 0.6602, "step": 24347 }, { "epoch": 0.7108697556275729, "grad_norm": 0.8198301378274168, "learning_rate": 1.6063260340632606e-06, "loss": 0.7156, "step": 24348 }, { "epoch": 0.7108989518554203, "grad_norm": 0.7335739318925593, "learning_rate": 1.6061638280616384e-06, "loss": 0.6549, "step": 24349 }, { "epoch": 0.7109281480832677, "grad_norm": 0.6880539574929562, "learning_rate": 1.6060016220600164e-06, "loss": 0.5713, "step": 24350 }, { "epoch": 0.710957344311115, "grad_norm": 0.698531140834896, "learning_rate": 1.6058394160583942e-06, "loss": 0.631, "step": 24351 }, { "epoch": 0.7109865405389624, "grad_norm": 0.6628946173638869, "learning_rate": 1.6056772100567722e-06, "loss": 0.547, "step": 24352 }, { "epoch": 0.7110157367668097, "grad_norm": 0.683508874433601, "learning_rate": 1.60551500405515e-06, "loss": 0.6394, "step": 24353 }, { "epoch": 0.7110449329946571, "grad_norm": 0.6801560595004537, "learning_rate": 1.605352798053528e-06, "loss": 0.5725, "step": 24354 }, { "epoch": 0.7110741292225045, "grad_norm": 0.7771519679686106, "learning_rate": 1.6051905920519058e-06, "loss": 0.6862, "step": 24355 }, { "epoch": 0.7111033254503518, "grad_norm": 0.7213760722334385, "learning_rate": 1.605028386050284e-06, "loss": 0.6591, "step": 24356 }, { "epoch": 0.7111325216781992, "grad_norm": 0.7233047760007044, "learning_rate": 1.604866180048662e-06, "loss": 0.6213, "step": 24357 }, { "epoch": 0.7111617179060465, "grad_norm": 0.6735620201655552, "learning_rate": 1.6047039740470398e-06, "loss": 0.5677, "step": 24358 }, { "epoch": 0.7111909141338939, "grad_norm": 0.7445153745983564, "learning_rate": 1.6045417680454179e-06, "loss": 0.6279, "step": 24359 }, { "epoch": 0.7112201103617413, "grad_norm": 0.7071823584201112, "learning_rate": 1.6043795620437956e-06, "loss": 0.6298, "step": 24360 }, { "epoch": 0.7112493065895886, "grad_norm": 0.7410545245618289, "learning_rate": 1.6042173560421737e-06, "loss": 0.6798, "step": 24361 }, { "epoch": 0.711278502817436, "grad_norm": 0.7235237273791685, "learning_rate": 1.6040551500405517e-06, "loss": 0.6103, "step": 24362 }, { "epoch": 0.7113076990452833, "grad_norm": 0.7084978606926012, "learning_rate": 1.6038929440389295e-06, "loss": 0.6181, "step": 24363 }, { "epoch": 0.7113368952731307, "grad_norm": 0.7557383760909562, "learning_rate": 1.6037307380373075e-06, "loss": 0.6821, "step": 24364 }, { "epoch": 0.7113660915009781, "grad_norm": 0.7298634561551468, "learning_rate": 1.6035685320356855e-06, "loss": 0.675, "step": 24365 }, { "epoch": 0.7113952877288254, "grad_norm": 0.7111648032694435, "learning_rate": 1.6034063260340635e-06, "loss": 0.6191, "step": 24366 }, { "epoch": 0.7114244839566728, "grad_norm": 0.7551663362609101, "learning_rate": 1.6032441200324415e-06, "loss": 0.6927, "step": 24367 }, { "epoch": 0.7114536801845202, "grad_norm": 0.71125223612505, "learning_rate": 1.6030819140308193e-06, "loss": 0.6222, "step": 24368 }, { "epoch": 0.7114828764123675, "grad_norm": 0.6951286838125671, "learning_rate": 1.6029197080291973e-06, "loss": 0.5282, "step": 24369 }, { "epoch": 0.7115120726402149, "grad_norm": 0.7777512507608215, "learning_rate": 1.602757502027575e-06, "loss": 0.7083, "step": 24370 }, { "epoch": 0.7115412688680622, "grad_norm": 0.6808242561955693, "learning_rate": 1.602595296025953e-06, "loss": 0.5861, "step": 24371 }, { "epoch": 0.7115704650959096, "grad_norm": 0.7434013410571311, "learning_rate": 1.6024330900243309e-06, "loss": 0.6624, "step": 24372 }, { "epoch": 0.711599661323757, "grad_norm": 0.7079682233566876, "learning_rate": 1.6022708840227089e-06, "loss": 0.6069, "step": 24373 }, { "epoch": 0.7116288575516043, "grad_norm": 0.7373606978392834, "learning_rate": 1.6021086780210871e-06, "loss": 0.6364, "step": 24374 }, { "epoch": 0.7116580537794517, "grad_norm": 0.7835605641446732, "learning_rate": 1.601946472019465e-06, "loss": 0.6961, "step": 24375 }, { "epoch": 0.711687250007299, "grad_norm": 0.7552741112755416, "learning_rate": 1.601784266017843e-06, "loss": 0.6788, "step": 24376 }, { "epoch": 0.7117164462351464, "grad_norm": 0.7726463317930872, "learning_rate": 1.6016220600162207e-06, "loss": 0.7583, "step": 24377 }, { "epoch": 0.7117456424629938, "grad_norm": 0.7254277749331061, "learning_rate": 1.6014598540145987e-06, "loss": 0.6333, "step": 24378 }, { "epoch": 0.7117748386908411, "grad_norm": 0.7544108773661324, "learning_rate": 1.6012976480129765e-06, "loss": 0.6604, "step": 24379 }, { "epoch": 0.7118040349186885, "grad_norm": 0.7214037920405482, "learning_rate": 1.6011354420113545e-06, "loss": 0.6255, "step": 24380 }, { "epoch": 0.7118332311465358, "grad_norm": 0.7239057468861636, "learning_rate": 1.6009732360097325e-06, "loss": 0.662, "step": 24381 }, { "epoch": 0.7118624273743832, "grad_norm": 0.7302317060023925, "learning_rate": 1.6008110300081103e-06, "loss": 0.6617, "step": 24382 }, { "epoch": 0.7118916236022306, "grad_norm": 0.7116740330366275, "learning_rate": 1.6006488240064883e-06, "loss": 0.5886, "step": 24383 }, { "epoch": 0.7119208198300779, "grad_norm": 0.7376424313408441, "learning_rate": 1.6004866180048663e-06, "loss": 0.6781, "step": 24384 }, { "epoch": 0.7119500160579253, "grad_norm": 0.7518486390183464, "learning_rate": 1.6003244120032443e-06, "loss": 0.6637, "step": 24385 }, { "epoch": 0.7119792122857727, "grad_norm": 0.8360876641189972, "learning_rate": 1.6001622060016223e-06, "loss": 0.7154, "step": 24386 }, { "epoch": 0.71200840851362, "grad_norm": 0.7442557264627216, "learning_rate": 1.6000000000000001e-06, "loss": 0.6576, "step": 24387 }, { "epoch": 0.7120376047414674, "grad_norm": 0.7471013801724308, "learning_rate": 1.5998377939983781e-06, "loss": 0.6421, "step": 24388 }, { "epoch": 0.7120668009693147, "grad_norm": 0.7408038948841627, "learning_rate": 1.599675587996756e-06, "loss": 0.6466, "step": 24389 }, { "epoch": 0.7120959971971621, "grad_norm": 0.7475618373118176, "learning_rate": 1.599513381995134e-06, "loss": 0.6345, "step": 24390 }, { "epoch": 0.7121251934250095, "grad_norm": 0.7961865224536933, "learning_rate": 1.5993511759935117e-06, "loss": 0.6169, "step": 24391 }, { "epoch": 0.7121543896528568, "grad_norm": 0.7113255456160905, "learning_rate": 1.5991889699918897e-06, "loss": 0.6452, "step": 24392 }, { "epoch": 0.7121835858807042, "grad_norm": 0.6021092722069105, "learning_rate": 1.599026763990268e-06, "loss": 0.4355, "step": 24393 }, { "epoch": 0.7122127821085515, "grad_norm": 0.6695280319632279, "learning_rate": 1.5988645579886458e-06, "loss": 0.5358, "step": 24394 }, { "epoch": 0.7122419783363989, "grad_norm": 0.7293200442931345, "learning_rate": 1.5987023519870238e-06, "loss": 0.5776, "step": 24395 }, { "epoch": 0.7122711745642463, "grad_norm": 0.7261980257198606, "learning_rate": 1.5985401459854016e-06, "loss": 0.6349, "step": 24396 }, { "epoch": 0.7123003707920936, "grad_norm": 0.7153112426597146, "learning_rate": 1.5983779399837796e-06, "loss": 0.6715, "step": 24397 }, { "epoch": 0.712329567019941, "grad_norm": 0.7506980687639395, "learning_rate": 1.5982157339821574e-06, "loss": 0.7027, "step": 24398 }, { "epoch": 0.7123587632477884, "grad_norm": 0.6368021193790527, "learning_rate": 1.5980535279805354e-06, "loss": 0.5241, "step": 24399 }, { "epoch": 0.7123879594756357, "grad_norm": 0.7640220367507254, "learning_rate": 1.5978913219789134e-06, "loss": 0.6916, "step": 24400 }, { "epoch": 0.7124171557034831, "grad_norm": 0.7207180979000576, "learning_rate": 1.5977291159772912e-06, "loss": 0.6361, "step": 24401 }, { "epoch": 0.7124463519313304, "grad_norm": 0.659607796994096, "learning_rate": 1.5975669099756692e-06, "loss": 0.5509, "step": 24402 }, { "epoch": 0.7124755481591778, "grad_norm": 0.6660940184382309, "learning_rate": 1.5974047039740472e-06, "loss": 0.5679, "step": 24403 }, { "epoch": 0.7125047443870252, "grad_norm": 0.7471822591687021, "learning_rate": 1.5972424979724252e-06, "loss": 0.6766, "step": 24404 }, { "epoch": 0.7125339406148725, "grad_norm": 0.731938231097059, "learning_rate": 1.5970802919708032e-06, "loss": 0.692, "step": 24405 }, { "epoch": 0.7125631368427199, "grad_norm": 0.7765284574125717, "learning_rate": 1.596918085969181e-06, "loss": 0.7668, "step": 24406 }, { "epoch": 0.7125923330705672, "grad_norm": 0.7228167599761957, "learning_rate": 1.596755879967559e-06, "loss": 0.6196, "step": 24407 }, { "epoch": 0.7126215292984146, "grad_norm": 0.7156211713531899, "learning_rate": 1.5965936739659368e-06, "loss": 0.5667, "step": 24408 }, { "epoch": 0.712650725526262, "grad_norm": 0.7708919347418338, "learning_rate": 1.5964314679643148e-06, "loss": 0.6753, "step": 24409 }, { "epoch": 0.7126799217541093, "grad_norm": 0.690368048620079, "learning_rate": 1.5962692619626926e-06, "loss": 0.589, "step": 24410 }, { "epoch": 0.7127091179819567, "grad_norm": 0.7136825011774754, "learning_rate": 1.5961070559610706e-06, "loss": 0.6083, "step": 24411 }, { "epoch": 0.712738314209804, "grad_norm": 0.7042794899504299, "learning_rate": 1.5959448499594488e-06, "loss": 0.6574, "step": 24412 }, { "epoch": 0.7127675104376514, "grad_norm": 0.7212529201713809, "learning_rate": 1.5957826439578266e-06, "loss": 0.6679, "step": 24413 }, { "epoch": 0.7127967066654988, "grad_norm": 0.7350373216941987, "learning_rate": 1.5956204379562046e-06, "loss": 0.6413, "step": 24414 }, { "epoch": 0.7128259028933461, "grad_norm": 0.6898919642001989, "learning_rate": 1.5954582319545824e-06, "loss": 0.5425, "step": 24415 }, { "epoch": 0.7128550991211935, "grad_norm": 0.7191815318830104, "learning_rate": 1.5952960259529604e-06, "loss": 0.6017, "step": 24416 }, { "epoch": 0.7128842953490409, "grad_norm": 0.7279850175794768, "learning_rate": 1.5951338199513382e-06, "loss": 0.636, "step": 24417 }, { "epoch": 0.7129134915768882, "grad_norm": 0.7092401235498814, "learning_rate": 1.5949716139497162e-06, "loss": 0.5855, "step": 24418 }, { "epoch": 0.7129426878047356, "grad_norm": 0.7238097105399212, "learning_rate": 1.5948094079480942e-06, "loss": 0.6781, "step": 24419 }, { "epoch": 0.7129718840325829, "grad_norm": 0.6688481875742153, "learning_rate": 1.594647201946472e-06, "loss": 0.5644, "step": 24420 }, { "epoch": 0.7130010802604303, "grad_norm": 0.7026762823192805, "learning_rate": 1.59448499594485e-06, "loss": 0.6182, "step": 24421 }, { "epoch": 0.7130302764882778, "grad_norm": 0.8193097155460911, "learning_rate": 1.594322789943228e-06, "loss": 0.7396, "step": 24422 }, { "epoch": 0.7130594727161251, "grad_norm": 0.7824232239084833, "learning_rate": 1.594160583941606e-06, "loss": 0.6949, "step": 24423 }, { "epoch": 0.7130886689439725, "grad_norm": 0.7134929057591879, "learning_rate": 1.593998377939984e-06, "loss": 0.6254, "step": 24424 }, { "epoch": 0.7131178651718199, "grad_norm": 0.6940741509475612, "learning_rate": 1.5938361719383619e-06, "loss": 0.591, "step": 24425 }, { "epoch": 0.7131470613996672, "grad_norm": 0.7361875404590491, "learning_rate": 1.5936739659367399e-06, "loss": 0.6863, "step": 24426 }, { "epoch": 0.7131762576275146, "grad_norm": 0.7548706633955348, "learning_rate": 1.5935117599351177e-06, "loss": 0.6864, "step": 24427 }, { "epoch": 0.7132054538553619, "grad_norm": 0.6936157896986025, "learning_rate": 1.5933495539334957e-06, "loss": 0.5922, "step": 24428 }, { "epoch": 0.7132346500832093, "grad_norm": 0.6801386205246244, "learning_rate": 1.5931873479318735e-06, "loss": 0.5872, "step": 24429 }, { "epoch": 0.7132638463110567, "grad_norm": 0.7097297845269369, "learning_rate": 1.5930251419302515e-06, "loss": 0.6128, "step": 24430 }, { "epoch": 0.713293042538904, "grad_norm": 0.7275953961384142, "learning_rate": 1.5928629359286297e-06, "loss": 0.6271, "step": 24431 }, { "epoch": 0.7133222387667514, "grad_norm": 0.712485202898808, "learning_rate": 1.5927007299270075e-06, "loss": 0.6166, "step": 24432 }, { "epoch": 0.7133514349945987, "grad_norm": 0.7572005735581673, "learning_rate": 1.5925385239253855e-06, "loss": 0.6614, "step": 24433 }, { "epoch": 0.7133806312224461, "grad_norm": 0.7155501228452449, "learning_rate": 1.5923763179237633e-06, "loss": 0.7133, "step": 24434 }, { "epoch": 0.7134098274502935, "grad_norm": 0.7466666846788651, "learning_rate": 1.5922141119221413e-06, "loss": 0.6675, "step": 24435 }, { "epoch": 0.7134390236781408, "grad_norm": 0.6798619113385128, "learning_rate": 1.592051905920519e-06, "loss": 0.6326, "step": 24436 }, { "epoch": 0.7134682199059882, "grad_norm": 0.6798152143123927, "learning_rate": 1.591889699918897e-06, "loss": 0.5677, "step": 24437 }, { "epoch": 0.7134974161338355, "grad_norm": 0.688870905714131, "learning_rate": 1.5917274939172749e-06, "loss": 0.5474, "step": 24438 }, { "epoch": 0.7135266123616829, "grad_norm": 0.6951978794873956, "learning_rate": 1.5915652879156529e-06, "loss": 0.641, "step": 24439 }, { "epoch": 0.7135558085895303, "grad_norm": 0.7581064573195677, "learning_rate": 1.591403081914031e-06, "loss": 0.6787, "step": 24440 }, { "epoch": 0.7135850048173776, "grad_norm": 0.7132229839160632, "learning_rate": 1.591240875912409e-06, "loss": 0.5951, "step": 24441 }, { "epoch": 0.713614201045225, "grad_norm": 0.646214400662708, "learning_rate": 1.591078669910787e-06, "loss": 0.5457, "step": 24442 }, { "epoch": 0.7136433972730724, "grad_norm": 0.713246298536435, "learning_rate": 1.590916463909165e-06, "loss": 0.6132, "step": 24443 }, { "epoch": 0.7136725935009197, "grad_norm": 0.8030171411149866, "learning_rate": 1.5907542579075427e-06, "loss": 0.6778, "step": 24444 }, { "epoch": 0.7137017897287671, "grad_norm": 0.6701265564645482, "learning_rate": 1.5905920519059207e-06, "loss": 0.5806, "step": 24445 }, { "epoch": 0.7137309859566144, "grad_norm": 0.7143815773771461, "learning_rate": 1.5904298459042985e-06, "loss": 0.6146, "step": 24446 }, { "epoch": 0.7137601821844618, "grad_norm": 0.6763986283179592, "learning_rate": 1.5902676399026765e-06, "loss": 0.5707, "step": 24447 }, { "epoch": 0.7137893784123092, "grad_norm": 1.2985645352233195, "learning_rate": 1.5901054339010543e-06, "loss": 0.6293, "step": 24448 }, { "epoch": 0.7138185746401565, "grad_norm": 0.7385732749691893, "learning_rate": 1.5899432278994323e-06, "loss": 0.6439, "step": 24449 }, { "epoch": 0.7138477708680039, "grad_norm": 0.8279268340661626, "learning_rate": 1.5897810218978105e-06, "loss": 0.6404, "step": 24450 }, { "epoch": 0.7138769670958512, "grad_norm": 0.7363670095819906, "learning_rate": 1.5896188158961883e-06, "loss": 0.6634, "step": 24451 }, { "epoch": 0.7139061633236986, "grad_norm": 0.7664119818222802, "learning_rate": 1.5894566098945663e-06, "loss": 0.6751, "step": 24452 }, { "epoch": 0.713935359551546, "grad_norm": 0.7238636719390027, "learning_rate": 1.5892944038929441e-06, "loss": 0.6581, "step": 24453 }, { "epoch": 0.7139645557793933, "grad_norm": 0.7690066392341169, "learning_rate": 1.5891321978913221e-06, "loss": 0.6954, "step": 24454 }, { "epoch": 0.7139937520072407, "grad_norm": 0.7050734810272368, "learning_rate": 1.5889699918897e-06, "loss": 0.6323, "step": 24455 }, { "epoch": 0.714022948235088, "grad_norm": 0.7576152921716801, "learning_rate": 1.588807785888078e-06, "loss": 0.6482, "step": 24456 }, { "epoch": 0.7140521444629354, "grad_norm": 0.7936922510244829, "learning_rate": 1.5886455798864557e-06, "loss": 0.7177, "step": 24457 }, { "epoch": 0.7140813406907828, "grad_norm": 0.7616916529742249, "learning_rate": 1.5884833738848338e-06, "loss": 0.6576, "step": 24458 }, { "epoch": 0.7141105369186301, "grad_norm": 0.6981564300612919, "learning_rate": 1.588321167883212e-06, "loss": 0.5709, "step": 24459 }, { "epoch": 0.7141397331464775, "grad_norm": 0.7216107237064902, "learning_rate": 1.5881589618815898e-06, "loss": 0.6248, "step": 24460 }, { "epoch": 0.7141689293743249, "grad_norm": 0.7021197700141196, "learning_rate": 1.5879967558799678e-06, "loss": 0.6348, "step": 24461 }, { "epoch": 0.7141981256021722, "grad_norm": 0.8441105218192558, "learning_rate": 1.5878345498783458e-06, "loss": 0.6455, "step": 24462 }, { "epoch": 0.7142273218300196, "grad_norm": 0.7146819732626838, "learning_rate": 1.5876723438767236e-06, "loss": 0.6478, "step": 24463 }, { "epoch": 0.7142565180578669, "grad_norm": 0.7675306623075594, "learning_rate": 1.5875101378751016e-06, "loss": 0.7243, "step": 24464 }, { "epoch": 0.7142857142857143, "grad_norm": 0.7100664701727909, "learning_rate": 1.5873479318734794e-06, "loss": 0.5494, "step": 24465 }, { "epoch": 0.7143149105135617, "grad_norm": 0.6733162043949087, "learning_rate": 1.5871857258718574e-06, "loss": 0.5864, "step": 24466 }, { "epoch": 0.714344106741409, "grad_norm": 0.7193266042591, "learning_rate": 1.5870235198702352e-06, "loss": 0.6354, "step": 24467 }, { "epoch": 0.7143733029692564, "grad_norm": 0.6932502305263406, "learning_rate": 1.5868613138686132e-06, "loss": 0.6007, "step": 24468 }, { "epoch": 0.7144024991971037, "grad_norm": 0.7542082031127897, "learning_rate": 1.5866991078669914e-06, "loss": 0.7094, "step": 24469 }, { "epoch": 0.7144316954249511, "grad_norm": 0.6905627100913554, "learning_rate": 1.5865369018653692e-06, "loss": 0.6048, "step": 24470 }, { "epoch": 0.7144608916527985, "grad_norm": 0.7157087097858176, "learning_rate": 1.5863746958637472e-06, "loss": 0.6072, "step": 24471 }, { "epoch": 0.7144900878806458, "grad_norm": 0.8324971552794956, "learning_rate": 1.586212489862125e-06, "loss": 0.6185, "step": 24472 }, { "epoch": 0.7145192841084932, "grad_norm": 0.7260124895584391, "learning_rate": 1.586050283860503e-06, "loss": 0.6292, "step": 24473 }, { "epoch": 0.7145484803363406, "grad_norm": 0.71706506257191, "learning_rate": 1.5858880778588808e-06, "loss": 0.658, "step": 24474 }, { "epoch": 0.7145776765641879, "grad_norm": 0.7149273450492345, "learning_rate": 1.5857258718572588e-06, "loss": 0.674, "step": 24475 }, { "epoch": 0.7146068727920353, "grad_norm": 0.7042426469394147, "learning_rate": 1.5855636658556366e-06, "loss": 0.6078, "step": 24476 }, { "epoch": 0.7146360690198826, "grad_norm": 0.7477223887754324, "learning_rate": 1.5854014598540146e-06, "loss": 0.6671, "step": 24477 }, { "epoch": 0.71466526524773, "grad_norm": 0.773316737755151, "learning_rate": 1.5852392538523928e-06, "loss": 0.7147, "step": 24478 }, { "epoch": 0.7146944614755774, "grad_norm": 0.8067102864809539, "learning_rate": 1.5850770478507706e-06, "loss": 0.6492, "step": 24479 }, { "epoch": 0.7147236577034247, "grad_norm": 0.7572464130773584, "learning_rate": 1.5849148418491486e-06, "loss": 0.705, "step": 24480 }, { "epoch": 0.7147528539312721, "grad_norm": 0.7039766595155357, "learning_rate": 1.5847526358475266e-06, "loss": 0.5815, "step": 24481 }, { "epoch": 0.7147820501591194, "grad_norm": 0.6852159688636245, "learning_rate": 1.5845904298459044e-06, "loss": 0.5838, "step": 24482 }, { "epoch": 0.7148112463869668, "grad_norm": 0.7205166155217968, "learning_rate": 1.5844282238442824e-06, "loss": 0.6713, "step": 24483 }, { "epoch": 0.7148404426148142, "grad_norm": 0.7110267825508294, "learning_rate": 1.5842660178426602e-06, "loss": 0.6245, "step": 24484 }, { "epoch": 0.7148696388426615, "grad_norm": 0.7209360577999754, "learning_rate": 1.5841038118410382e-06, "loss": 0.6218, "step": 24485 }, { "epoch": 0.7148988350705089, "grad_norm": 0.6819742974533252, "learning_rate": 1.583941605839416e-06, "loss": 0.5681, "step": 24486 }, { "epoch": 0.7149280312983562, "grad_norm": 0.7624702505206054, "learning_rate": 1.583779399837794e-06, "loss": 0.696, "step": 24487 }, { "epoch": 0.7149572275262036, "grad_norm": 0.739637691983022, "learning_rate": 1.5836171938361723e-06, "loss": 0.6843, "step": 24488 }, { "epoch": 0.714986423754051, "grad_norm": 0.7552814953216135, "learning_rate": 1.58345498783455e-06, "loss": 0.7314, "step": 24489 }, { "epoch": 0.7150156199818983, "grad_norm": 0.6866093135789821, "learning_rate": 1.583292781832928e-06, "loss": 0.5433, "step": 24490 }, { "epoch": 0.7150448162097457, "grad_norm": 0.7940633231362632, "learning_rate": 1.5831305758313059e-06, "loss": 0.6703, "step": 24491 }, { "epoch": 0.715074012437593, "grad_norm": 0.6954635476619538, "learning_rate": 1.5829683698296839e-06, "loss": 0.5901, "step": 24492 }, { "epoch": 0.7151032086654404, "grad_norm": 0.806782409990212, "learning_rate": 1.5828061638280617e-06, "loss": 0.7398, "step": 24493 }, { "epoch": 0.7151324048932878, "grad_norm": 0.6979968129144565, "learning_rate": 1.5826439578264397e-06, "loss": 0.5579, "step": 24494 }, { "epoch": 0.7151616011211351, "grad_norm": 0.7514519376135854, "learning_rate": 1.5824817518248175e-06, "loss": 0.6455, "step": 24495 }, { "epoch": 0.7151907973489825, "grad_norm": 0.7146503773221177, "learning_rate": 1.5823195458231955e-06, "loss": 0.6174, "step": 24496 }, { "epoch": 0.7152199935768299, "grad_norm": 0.7420505968478145, "learning_rate": 1.5821573398215737e-06, "loss": 0.6532, "step": 24497 }, { "epoch": 0.7152491898046772, "grad_norm": 0.7455633759867869, "learning_rate": 1.5819951338199515e-06, "loss": 0.6507, "step": 24498 }, { "epoch": 0.7152783860325246, "grad_norm": 0.7289496835380306, "learning_rate": 1.5818329278183295e-06, "loss": 0.6567, "step": 24499 }, { "epoch": 0.7153075822603719, "grad_norm": 0.6671767835309389, "learning_rate": 1.5816707218167073e-06, "loss": 0.57, "step": 24500 }, { "epoch": 0.7153367784882193, "grad_norm": 0.7687212108184812, "learning_rate": 1.5815085158150853e-06, "loss": 0.7239, "step": 24501 }, { "epoch": 0.7153659747160667, "grad_norm": 0.6755662134754671, "learning_rate": 1.5813463098134633e-06, "loss": 0.5307, "step": 24502 }, { "epoch": 0.715395170943914, "grad_norm": 0.7418042344167606, "learning_rate": 1.581184103811841e-06, "loss": 0.6379, "step": 24503 }, { "epoch": 0.7154243671717614, "grad_norm": 0.70917624731462, "learning_rate": 1.581021897810219e-06, "loss": 0.6751, "step": 24504 }, { "epoch": 0.7154535633996087, "grad_norm": 0.6854526464300881, "learning_rate": 1.580859691808597e-06, "loss": 0.5852, "step": 24505 }, { "epoch": 0.7154827596274561, "grad_norm": 0.7079689395139634, "learning_rate": 1.580697485806975e-06, "loss": 0.5904, "step": 24506 }, { "epoch": 0.7155119558553035, "grad_norm": 0.9061590700666953, "learning_rate": 1.5805352798053531e-06, "loss": 0.7112, "step": 24507 }, { "epoch": 0.7155411520831508, "grad_norm": 0.7671887697480846, "learning_rate": 1.580373073803731e-06, "loss": 0.7185, "step": 24508 }, { "epoch": 0.7155703483109982, "grad_norm": 0.6493740243706123, "learning_rate": 1.580210867802109e-06, "loss": 0.5653, "step": 24509 }, { "epoch": 0.7155995445388456, "grad_norm": 0.7138647203017117, "learning_rate": 1.5800486618004867e-06, "loss": 0.5929, "step": 24510 }, { "epoch": 0.7156287407666929, "grad_norm": 0.701027832290575, "learning_rate": 1.5798864557988647e-06, "loss": 0.607, "step": 24511 }, { "epoch": 0.7156579369945403, "grad_norm": 0.722273723835436, "learning_rate": 1.5797242497972425e-06, "loss": 0.6447, "step": 24512 }, { "epoch": 0.7156871332223876, "grad_norm": 0.745748965935093, "learning_rate": 1.5795620437956205e-06, "loss": 0.7125, "step": 24513 }, { "epoch": 0.715716329450235, "grad_norm": 0.7275308995615444, "learning_rate": 1.5793998377939983e-06, "loss": 0.6826, "step": 24514 }, { "epoch": 0.7157455256780824, "grad_norm": 0.8082277353464649, "learning_rate": 1.5792376317923763e-06, "loss": 0.7144, "step": 24515 }, { "epoch": 0.7157747219059297, "grad_norm": 0.7416929336793013, "learning_rate": 1.5790754257907545e-06, "loss": 0.7036, "step": 24516 }, { "epoch": 0.7158039181337771, "grad_norm": 0.678779029723417, "learning_rate": 1.5789132197891323e-06, "loss": 0.5902, "step": 24517 }, { "epoch": 0.7158331143616244, "grad_norm": 0.7736651657529027, "learning_rate": 1.5787510137875103e-06, "loss": 0.6234, "step": 24518 }, { "epoch": 0.7158623105894718, "grad_norm": 0.8314114265375137, "learning_rate": 1.5785888077858881e-06, "loss": 0.6502, "step": 24519 }, { "epoch": 0.7158915068173192, "grad_norm": 0.7137255976844461, "learning_rate": 1.5784266017842661e-06, "loss": 0.6124, "step": 24520 }, { "epoch": 0.7159207030451665, "grad_norm": 0.7511174303320741, "learning_rate": 1.5782643957826442e-06, "loss": 0.6616, "step": 24521 }, { "epoch": 0.7159498992730139, "grad_norm": 0.7216048145818933, "learning_rate": 1.578102189781022e-06, "loss": 0.6252, "step": 24522 }, { "epoch": 0.7159790955008613, "grad_norm": 0.6547185736167477, "learning_rate": 1.5779399837794e-06, "loss": 0.5468, "step": 24523 }, { "epoch": 0.7160082917287086, "grad_norm": 0.6898011874785506, "learning_rate": 1.5777777777777778e-06, "loss": 0.6418, "step": 24524 }, { "epoch": 0.716037487956556, "grad_norm": 0.6702016642179452, "learning_rate": 1.577615571776156e-06, "loss": 0.5708, "step": 24525 }, { "epoch": 0.7160666841844033, "grad_norm": 0.7214071387309888, "learning_rate": 1.577453365774534e-06, "loss": 0.6512, "step": 24526 }, { "epoch": 0.7160958804122507, "grad_norm": 0.7029594815233154, "learning_rate": 1.5772911597729118e-06, "loss": 0.6459, "step": 24527 }, { "epoch": 0.7161250766400981, "grad_norm": 0.6534481538007806, "learning_rate": 1.5771289537712898e-06, "loss": 0.5453, "step": 24528 }, { "epoch": 0.7161542728679454, "grad_norm": 0.6718689293225524, "learning_rate": 1.5769667477696676e-06, "loss": 0.5699, "step": 24529 }, { "epoch": 0.7161834690957928, "grad_norm": 0.7355233790614488, "learning_rate": 1.5768045417680456e-06, "loss": 0.6343, "step": 24530 }, { "epoch": 0.7162126653236401, "grad_norm": 0.7417324246389478, "learning_rate": 1.5766423357664234e-06, "loss": 0.6147, "step": 24531 }, { "epoch": 0.7162418615514875, "grad_norm": 0.679335411544673, "learning_rate": 1.5764801297648014e-06, "loss": 0.5976, "step": 24532 }, { "epoch": 0.7162710577793349, "grad_norm": 0.7435717519896826, "learning_rate": 1.5763179237631792e-06, "loss": 0.6967, "step": 24533 }, { "epoch": 0.7163002540071822, "grad_norm": 0.7000076430012984, "learning_rate": 1.5761557177615572e-06, "loss": 0.6657, "step": 24534 }, { "epoch": 0.7163294502350296, "grad_norm": 0.7217852858208001, "learning_rate": 1.5759935117599354e-06, "loss": 0.5975, "step": 24535 }, { "epoch": 0.716358646462877, "grad_norm": 0.7469017879122836, "learning_rate": 1.5758313057583132e-06, "loss": 0.6548, "step": 24536 }, { "epoch": 0.7163878426907243, "grad_norm": 0.7468072656406607, "learning_rate": 1.5756690997566912e-06, "loss": 0.6066, "step": 24537 }, { "epoch": 0.7164170389185717, "grad_norm": 0.7255719713307894, "learning_rate": 1.575506893755069e-06, "loss": 0.6488, "step": 24538 }, { "epoch": 0.716446235146419, "grad_norm": 0.7659066820653059, "learning_rate": 1.575344687753447e-06, "loss": 0.7543, "step": 24539 }, { "epoch": 0.7164754313742664, "grad_norm": 0.727423294960659, "learning_rate": 1.575182481751825e-06, "loss": 0.66, "step": 24540 }, { "epoch": 0.7165046276021138, "grad_norm": 0.7415393405114974, "learning_rate": 1.5750202757502028e-06, "loss": 0.6869, "step": 24541 }, { "epoch": 0.7165338238299611, "grad_norm": 0.7341534639878935, "learning_rate": 1.5748580697485808e-06, "loss": 0.6199, "step": 24542 }, { "epoch": 0.7165630200578086, "grad_norm": 0.7337039787077414, "learning_rate": 1.5746958637469586e-06, "loss": 0.6251, "step": 24543 }, { "epoch": 0.716592216285656, "grad_norm": 0.6939903605232594, "learning_rate": 1.5745336577453368e-06, "loss": 0.6494, "step": 24544 }, { "epoch": 0.7166214125135033, "grad_norm": 0.7245407089481144, "learning_rate": 1.5743714517437148e-06, "loss": 0.6438, "step": 24545 }, { "epoch": 0.7166506087413507, "grad_norm": 0.6930932201427494, "learning_rate": 1.5742092457420926e-06, "loss": 0.5889, "step": 24546 }, { "epoch": 0.716679804969198, "grad_norm": 0.6986228139240386, "learning_rate": 1.5740470397404706e-06, "loss": 0.6463, "step": 24547 }, { "epoch": 0.7167090011970454, "grad_norm": 0.7887840573079451, "learning_rate": 1.5738848337388484e-06, "loss": 0.7092, "step": 24548 }, { "epoch": 0.7167381974248928, "grad_norm": 0.7128365095939122, "learning_rate": 1.5737226277372264e-06, "loss": 0.6262, "step": 24549 }, { "epoch": 0.7167673936527401, "grad_norm": 0.7229997257442744, "learning_rate": 1.5735604217356042e-06, "loss": 0.6082, "step": 24550 }, { "epoch": 0.7167965898805875, "grad_norm": 0.6909921718755322, "learning_rate": 1.5733982157339822e-06, "loss": 0.6561, "step": 24551 }, { "epoch": 0.7168257861084348, "grad_norm": 0.7688023866400905, "learning_rate": 1.57323600973236e-06, "loss": 0.7037, "step": 24552 }, { "epoch": 0.7168549823362822, "grad_norm": 0.6490276782260486, "learning_rate": 1.573073803730738e-06, "loss": 0.5442, "step": 24553 }, { "epoch": 0.7168841785641296, "grad_norm": 0.6993120427761843, "learning_rate": 1.5729115977291163e-06, "loss": 0.5991, "step": 24554 }, { "epoch": 0.7169133747919769, "grad_norm": 0.6560819966529534, "learning_rate": 1.572749391727494e-06, "loss": 0.5065, "step": 24555 }, { "epoch": 0.7169425710198243, "grad_norm": 0.8990215699522726, "learning_rate": 1.572587185725872e-06, "loss": 0.8685, "step": 24556 }, { "epoch": 0.7169717672476716, "grad_norm": 0.8195130853056576, "learning_rate": 1.5724249797242499e-06, "loss": 0.6269, "step": 24557 }, { "epoch": 0.717000963475519, "grad_norm": 0.6533418539187643, "learning_rate": 1.5722627737226279e-06, "loss": 0.5702, "step": 24558 }, { "epoch": 0.7170301597033664, "grad_norm": 0.7820327328303373, "learning_rate": 1.5721005677210059e-06, "loss": 0.6998, "step": 24559 }, { "epoch": 0.7170593559312137, "grad_norm": 0.7653661678837257, "learning_rate": 1.5719383617193837e-06, "loss": 0.6357, "step": 24560 }, { "epoch": 0.7170885521590611, "grad_norm": 0.7444164344471265, "learning_rate": 1.5717761557177617e-06, "loss": 0.6631, "step": 24561 }, { "epoch": 0.7171177483869084, "grad_norm": 0.6936130130000746, "learning_rate": 1.5716139497161395e-06, "loss": 0.5931, "step": 24562 }, { "epoch": 0.7171469446147558, "grad_norm": 0.7103844551960425, "learning_rate": 1.5714517437145177e-06, "loss": 0.6008, "step": 24563 }, { "epoch": 0.7171761408426032, "grad_norm": 0.7104132401397432, "learning_rate": 1.5712895377128957e-06, "loss": 0.5514, "step": 24564 }, { "epoch": 0.7172053370704505, "grad_norm": 0.7929359541351091, "learning_rate": 1.5711273317112735e-06, "loss": 0.7678, "step": 24565 }, { "epoch": 0.7172345332982979, "grad_norm": 0.6879703643562293, "learning_rate": 1.5709651257096515e-06, "loss": 0.5444, "step": 24566 }, { "epoch": 0.7172637295261453, "grad_norm": 0.7786116954317963, "learning_rate": 1.5708029197080293e-06, "loss": 0.7176, "step": 24567 }, { "epoch": 0.7172929257539926, "grad_norm": 0.6968360205159165, "learning_rate": 1.5706407137064073e-06, "loss": 0.601, "step": 24568 }, { "epoch": 0.71732212198184, "grad_norm": 0.7612477136998491, "learning_rate": 1.570478507704785e-06, "loss": 0.7085, "step": 24569 }, { "epoch": 0.7173513182096873, "grad_norm": 0.6975548635251537, "learning_rate": 1.570316301703163e-06, "loss": 0.602, "step": 24570 }, { "epoch": 0.7173805144375347, "grad_norm": 0.7207130081747835, "learning_rate": 1.570154095701541e-06, "loss": 0.6434, "step": 24571 }, { "epoch": 0.7174097106653821, "grad_norm": 0.7506921638671787, "learning_rate": 1.569991889699919e-06, "loss": 0.659, "step": 24572 }, { "epoch": 0.7174389068932294, "grad_norm": 0.7078583928192567, "learning_rate": 1.5698296836982971e-06, "loss": 0.6298, "step": 24573 }, { "epoch": 0.7174681031210768, "grad_norm": 0.7215528183363938, "learning_rate": 1.569667477696675e-06, "loss": 0.6429, "step": 24574 }, { "epoch": 0.7174972993489241, "grad_norm": 0.7264251088103133, "learning_rate": 1.569505271695053e-06, "loss": 0.6371, "step": 24575 }, { "epoch": 0.7175264955767715, "grad_norm": 0.7263392452258822, "learning_rate": 1.5693430656934307e-06, "loss": 0.6363, "step": 24576 }, { "epoch": 0.7175556918046189, "grad_norm": 0.6847965566979138, "learning_rate": 1.5691808596918087e-06, "loss": 0.5358, "step": 24577 }, { "epoch": 0.7175848880324662, "grad_norm": 0.7812104075287216, "learning_rate": 1.5690186536901867e-06, "loss": 0.7327, "step": 24578 }, { "epoch": 0.7176140842603136, "grad_norm": 0.7408728455535573, "learning_rate": 1.5688564476885645e-06, "loss": 0.7182, "step": 24579 }, { "epoch": 0.717643280488161, "grad_norm": 0.7961122335187469, "learning_rate": 1.5686942416869425e-06, "loss": 0.6395, "step": 24580 }, { "epoch": 0.7176724767160083, "grad_norm": 0.7699301581890465, "learning_rate": 1.5685320356853203e-06, "loss": 0.7477, "step": 24581 }, { "epoch": 0.7177016729438557, "grad_norm": 0.7224105676465609, "learning_rate": 1.5683698296836985e-06, "loss": 0.5831, "step": 24582 }, { "epoch": 0.717730869171703, "grad_norm": 0.7888016527589565, "learning_rate": 1.5682076236820766e-06, "loss": 0.6555, "step": 24583 }, { "epoch": 0.7177600653995504, "grad_norm": 0.7051644966595747, "learning_rate": 1.5680454176804543e-06, "loss": 0.6517, "step": 24584 }, { "epoch": 0.7177892616273978, "grad_norm": 0.757985341542771, "learning_rate": 1.5678832116788324e-06, "loss": 0.6794, "step": 24585 }, { "epoch": 0.7178184578552451, "grad_norm": 0.7417970708331139, "learning_rate": 1.5677210056772102e-06, "loss": 0.6555, "step": 24586 }, { "epoch": 0.7178476540830925, "grad_norm": 0.6943380888832398, "learning_rate": 1.5675587996755882e-06, "loss": 0.5952, "step": 24587 }, { "epoch": 0.7178768503109398, "grad_norm": 0.6904775054510588, "learning_rate": 1.567396593673966e-06, "loss": 0.6172, "step": 24588 }, { "epoch": 0.7179060465387872, "grad_norm": 0.6898394783222017, "learning_rate": 1.567234387672344e-06, "loss": 0.6105, "step": 24589 }, { "epoch": 0.7179352427666346, "grad_norm": 0.7398574301347433, "learning_rate": 1.5670721816707218e-06, "loss": 0.689, "step": 24590 }, { "epoch": 0.7179644389944819, "grad_norm": 0.7138831700199446, "learning_rate": 1.5669099756690998e-06, "loss": 0.6191, "step": 24591 }, { "epoch": 0.7179936352223293, "grad_norm": 0.7800294730631977, "learning_rate": 1.566747769667478e-06, "loss": 0.7317, "step": 24592 }, { "epoch": 0.7180228314501766, "grad_norm": 0.7361463443680536, "learning_rate": 1.5665855636658558e-06, "loss": 0.7184, "step": 24593 }, { "epoch": 0.718052027678024, "grad_norm": 0.7478463581032938, "learning_rate": 1.5664233576642338e-06, "loss": 0.717, "step": 24594 }, { "epoch": 0.7180812239058714, "grad_norm": 0.6653034046942945, "learning_rate": 1.5662611516626116e-06, "loss": 0.5725, "step": 24595 }, { "epoch": 0.7181104201337187, "grad_norm": 0.764570088987712, "learning_rate": 1.5660989456609896e-06, "loss": 0.7094, "step": 24596 }, { "epoch": 0.7181396163615661, "grad_norm": 0.8063549220155013, "learning_rate": 1.5659367396593676e-06, "loss": 0.7024, "step": 24597 }, { "epoch": 0.7181688125894135, "grad_norm": 0.668523599397247, "learning_rate": 1.5657745336577454e-06, "loss": 0.5795, "step": 24598 }, { "epoch": 0.7181980088172608, "grad_norm": 0.6925779040518537, "learning_rate": 1.5656123276561234e-06, "loss": 0.6172, "step": 24599 }, { "epoch": 0.7182272050451082, "grad_norm": 0.7698108443395031, "learning_rate": 1.5654501216545012e-06, "loss": 0.6448, "step": 24600 }, { "epoch": 0.7182564012729555, "grad_norm": 0.7227087302184222, "learning_rate": 1.5652879156528794e-06, "loss": 0.6939, "step": 24601 }, { "epoch": 0.7182855975008029, "grad_norm": 0.7055099439202767, "learning_rate": 1.5651257096512574e-06, "loss": 0.6079, "step": 24602 }, { "epoch": 0.7183147937286503, "grad_norm": 0.8099152916432438, "learning_rate": 1.5649635036496352e-06, "loss": 0.7519, "step": 24603 }, { "epoch": 0.7183439899564976, "grad_norm": 0.7065104640927555, "learning_rate": 1.5648012976480132e-06, "loss": 0.5891, "step": 24604 }, { "epoch": 0.718373186184345, "grad_norm": 0.7110188977182886, "learning_rate": 1.564639091646391e-06, "loss": 0.6173, "step": 24605 }, { "epoch": 0.7184023824121923, "grad_norm": 0.6823402993495714, "learning_rate": 1.564476885644769e-06, "loss": 0.5563, "step": 24606 }, { "epoch": 0.7184315786400397, "grad_norm": 0.7075079694964791, "learning_rate": 1.5643146796431468e-06, "loss": 0.6044, "step": 24607 }, { "epoch": 0.7184607748678871, "grad_norm": 0.7846627912621565, "learning_rate": 1.5641524736415248e-06, "loss": 0.7192, "step": 24608 }, { "epoch": 0.7184899710957344, "grad_norm": 0.7139672491364647, "learning_rate": 1.5639902676399026e-06, "loss": 0.6197, "step": 24609 }, { "epoch": 0.7185191673235818, "grad_norm": 0.7301917951348867, "learning_rate": 1.5638280616382808e-06, "loss": 0.7047, "step": 24610 }, { "epoch": 0.7185483635514291, "grad_norm": 0.8605946009669777, "learning_rate": 1.5636658556366588e-06, "loss": 0.6549, "step": 24611 }, { "epoch": 0.7185775597792765, "grad_norm": 0.7230041843998776, "learning_rate": 1.5635036496350366e-06, "loss": 0.6446, "step": 24612 }, { "epoch": 0.7186067560071239, "grad_norm": 0.776250691523561, "learning_rate": 1.5633414436334146e-06, "loss": 0.6598, "step": 24613 }, { "epoch": 0.7186359522349712, "grad_norm": 0.7652760180775264, "learning_rate": 1.5631792376317924e-06, "loss": 0.6839, "step": 24614 }, { "epoch": 0.7186651484628186, "grad_norm": 0.6534878720245264, "learning_rate": 1.5630170316301704e-06, "loss": 0.5036, "step": 24615 }, { "epoch": 0.718694344690666, "grad_norm": 0.7972133492477947, "learning_rate": 1.5628548256285484e-06, "loss": 0.7339, "step": 24616 }, { "epoch": 0.7187235409185133, "grad_norm": 0.7607386769920246, "learning_rate": 1.5626926196269262e-06, "loss": 0.7574, "step": 24617 }, { "epoch": 0.7187527371463607, "grad_norm": 0.7521699310322195, "learning_rate": 1.5625304136253043e-06, "loss": 0.7235, "step": 24618 }, { "epoch": 0.718781933374208, "grad_norm": 0.71468859373364, "learning_rate": 1.562368207623682e-06, "loss": 0.6324, "step": 24619 }, { "epoch": 0.7188111296020554, "grad_norm": 0.6775056873331502, "learning_rate": 1.5622060016220603e-06, "loss": 0.5591, "step": 24620 }, { "epoch": 0.7188403258299028, "grad_norm": 0.6948703254128116, "learning_rate": 1.5620437956204383e-06, "loss": 0.6359, "step": 24621 }, { "epoch": 0.7188695220577501, "grad_norm": 0.7423000330815408, "learning_rate": 1.561881589618816e-06, "loss": 0.7027, "step": 24622 }, { "epoch": 0.7188987182855975, "grad_norm": 0.7273503246798796, "learning_rate": 1.561719383617194e-06, "loss": 0.6077, "step": 24623 }, { "epoch": 0.7189279145134448, "grad_norm": 0.6906213963630156, "learning_rate": 1.5615571776155719e-06, "loss": 0.5726, "step": 24624 }, { "epoch": 0.7189571107412922, "grad_norm": 0.688104821322448, "learning_rate": 1.5613949716139499e-06, "loss": 0.568, "step": 24625 }, { "epoch": 0.7189863069691396, "grad_norm": 0.7147783925006519, "learning_rate": 1.5612327656123277e-06, "loss": 0.6435, "step": 24626 }, { "epoch": 0.7190155031969869, "grad_norm": 0.7162619911147038, "learning_rate": 1.5610705596107057e-06, "loss": 0.6263, "step": 24627 }, { "epoch": 0.7190446994248343, "grad_norm": 0.7348238219773334, "learning_rate": 1.5609083536090835e-06, "loss": 0.6488, "step": 24628 }, { "epoch": 0.7190738956526816, "grad_norm": 0.8027549739399077, "learning_rate": 1.5607461476074617e-06, "loss": 0.7736, "step": 24629 }, { "epoch": 0.719103091880529, "grad_norm": 0.7150636867307292, "learning_rate": 1.5605839416058397e-06, "loss": 0.6137, "step": 24630 }, { "epoch": 0.7191322881083764, "grad_norm": 0.7366345135416085, "learning_rate": 1.5604217356042175e-06, "loss": 0.6229, "step": 24631 }, { "epoch": 0.7191614843362237, "grad_norm": 0.695510113930313, "learning_rate": 1.5602595296025955e-06, "loss": 0.5943, "step": 24632 }, { "epoch": 0.7191906805640711, "grad_norm": 0.6948806751490635, "learning_rate": 1.5600973236009733e-06, "loss": 0.6102, "step": 24633 }, { "epoch": 0.7192198767919185, "grad_norm": 0.7366153870823621, "learning_rate": 1.5599351175993513e-06, "loss": 0.6234, "step": 24634 }, { "epoch": 0.7192490730197658, "grad_norm": 0.6830966084997293, "learning_rate": 1.559772911597729e-06, "loss": 0.5485, "step": 24635 }, { "epoch": 0.7192782692476132, "grad_norm": 0.7727399050140754, "learning_rate": 1.559610705596107e-06, "loss": 0.7124, "step": 24636 }, { "epoch": 0.7193074654754605, "grad_norm": 0.7722775993569208, "learning_rate": 1.5594484995944851e-06, "loss": 0.7194, "step": 24637 }, { "epoch": 0.7193366617033079, "grad_norm": 0.7112458079043636, "learning_rate": 1.559286293592863e-06, "loss": 0.5977, "step": 24638 }, { "epoch": 0.7193658579311553, "grad_norm": 0.7135433390208465, "learning_rate": 1.5591240875912411e-06, "loss": 0.6268, "step": 24639 }, { "epoch": 0.7193950541590026, "grad_norm": 0.7088132668241313, "learning_rate": 1.5589618815896191e-06, "loss": 0.6159, "step": 24640 }, { "epoch": 0.71942425038685, "grad_norm": 0.7395262650207657, "learning_rate": 1.558799675587997e-06, "loss": 0.6559, "step": 24641 }, { "epoch": 0.7194534466146973, "grad_norm": 0.7212126661373047, "learning_rate": 1.558637469586375e-06, "loss": 0.6374, "step": 24642 }, { "epoch": 0.7194826428425447, "grad_norm": 0.7213710100341398, "learning_rate": 1.5584752635847527e-06, "loss": 0.6456, "step": 24643 }, { "epoch": 0.7195118390703921, "grad_norm": 0.7925469072452911, "learning_rate": 1.5583130575831307e-06, "loss": 0.7413, "step": 24644 }, { "epoch": 0.7195410352982394, "grad_norm": 0.7701886441181142, "learning_rate": 1.5581508515815085e-06, "loss": 0.7078, "step": 24645 }, { "epoch": 0.7195702315260868, "grad_norm": 0.7396096137390058, "learning_rate": 1.5579886455798865e-06, "loss": 0.6853, "step": 24646 }, { "epoch": 0.7195994277539342, "grad_norm": 0.6743476364353511, "learning_rate": 1.5578264395782643e-06, "loss": 0.6016, "step": 24647 }, { "epoch": 0.7196286239817815, "grad_norm": 0.6950199587720112, "learning_rate": 1.5576642335766425e-06, "loss": 0.6172, "step": 24648 }, { "epoch": 0.7196578202096289, "grad_norm": 0.7378088664811129, "learning_rate": 1.5575020275750206e-06, "loss": 0.6956, "step": 24649 }, { "epoch": 0.7196870164374762, "grad_norm": 0.7336189537877296, "learning_rate": 1.5573398215733984e-06, "loss": 0.6806, "step": 24650 }, { "epoch": 0.7197162126653236, "grad_norm": 0.9235113282930707, "learning_rate": 1.5571776155717764e-06, "loss": 0.5825, "step": 24651 }, { "epoch": 0.719745408893171, "grad_norm": 0.715962546447495, "learning_rate": 1.5570154095701542e-06, "loss": 0.629, "step": 24652 }, { "epoch": 0.7197746051210183, "grad_norm": 0.6776242829025074, "learning_rate": 1.5568532035685322e-06, "loss": 0.5716, "step": 24653 }, { "epoch": 0.7198038013488657, "grad_norm": 0.6693716674949614, "learning_rate": 1.55669099756691e-06, "loss": 0.5389, "step": 24654 }, { "epoch": 0.719832997576713, "grad_norm": 0.6958423836616368, "learning_rate": 1.556528791565288e-06, "loss": 0.6625, "step": 24655 }, { "epoch": 0.7198621938045604, "grad_norm": 0.791997890034635, "learning_rate": 1.556366585563666e-06, "loss": 0.6979, "step": 24656 }, { "epoch": 0.7198913900324078, "grad_norm": 0.7146150197034135, "learning_rate": 1.5562043795620438e-06, "loss": 0.6322, "step": 24657 }, { "epoch": 0.7199205862602551, "grad_norm": 0.7548046842134652, "learning_rate": 1.556042173560422e-06, "loss": 0.6616, "step": 24658 }, { "epoch": 0.7199497824881025, "grad_norm": 0.7373707336725746, "learning_rate": 1.5558799675588e-06, "loss": 0.6843, "step": 24659 }, { "epoch": 0.7199789787159498, "grad_norm": 0.8305595473532763, "learning_rate": 1.5557177615571778e-06, "loss": 0.6598, "step": 24660 }, { "epoch": 0.7200081749437972, "grad_norm": 0.7089933550876336, "learning_rate": 1.5555555555555558e-06, "loss": 0.6177, "step": 24661 }, { "epoch": 0.7200373711716446, "grad_norm": 0.7532387115510573, "learning_rate": 1.5553933495539336e-06, "loss": 0.7065, "step": 24662 }, { "epoch": 0.7200665673994919, "grad_norm": 0.7485172903356094, "learning_rate": 1.5552311435523116e-06, "loss": 0.5456, "step": 24663 }, { "epoch": 0.7200957636273394, "grad_norm": 0.7504098130582877, "learning_rate": 1.5550689375506894e-06, "loss": 0.5789, "step": 24664 }, { "epoch": 0.7201249598551868, "grad_norm": 0.6968856380405042, "learning_rate": 1.5549067315490674e-06, "loss": 0.6187, "step": 24665 }, { "epoch": 0.7201541560830341, "grad_norm": 0.7727176527745988, "learning_rate": 1.5547445255474452e-06, "loss": 0.6947, "step": 24666 }, { "epoch": 0.7201833523108815, "grad_norm": 0.7006933320523713, "learning_rate": 1.5545823195458234e-06, "loss": 0.6128, "step": 24667 }, { "epoch": 0.7202125485387288, "grad_norm": 0.7664152171717097, "learning_rate": 1.5544201135442014e-06, "loss": 0.7469, "step": 24668 }, { "epoch": 0.7202417447665762, "grad_norm": 0.7152866962687885, "learning_rate": 1.5542579075425792e-06, "loss": 0.6742, "step": 24669 }, { "epoch": 0.7202709409944236, "grad_norm": 0.6861892905357799, "learning_rate": 1.5540957015409572e-06, "loss": 0.6238, "step": 24670 }, { "epoch": 0.7203001372222709, "grad_norm": 0.7356926833884694, "learning_rate": 1.553933495539335e-06, "loss": 0.6833, "step": 24671 }, { "epoch": 0.7203293334501183, "grad_norm": 0.9536277024986843, "learning_rate": 1.553771289537713e-06, "loss": 0.7194, "step": 24672 }, { "epoch": 0.7203585296779657, "grad_norm": 0.7443229908466996, "learning_rate": 1.5536090835360908e-06, "loss": 0.676, "step": 24673 }, { "epoch": 0.720387725905813, "grad_norm": 0.7789817142319073, "learning_rate": 1.5534468775344688e-06, "loss": 0.7105, "step": 24674 }, { "epoch": 0.7204169221336604, "grad_norm": 0.7284025433181924, "learning_rate": 1.5532846715328468e-06, "loss": 0.5955, "step": 24675 }, { "epoch": 0.7204461183615077, "grad_norm": 0.6963573093882277, "learning_rate": 1.5531224655312248e-06, "loss": 0.5662, "step": 24676 }, { "epoch": 0.7204753145893551, "grad_norm": 0.7424170782250883, "learning_rate": 1.5529602595296028e-06, "loss": 0.6351, "step": 24677 }, { "epoch": 0.7205045108172025, "grad_norm": 0.683636739468163, "learning_rate": 1.5527980535279808e-06, "loss": 0.5844, "step": 24678 }, { "epoch": 0.7205337070450498, "grad_norm": 0.690981619864342, "learning_rate": 1.5526358475263586e-06, "loss": 0.6424, "step": 24679 }, { "epoch": 0.7205629032728972, "grad_norm": 0.702119862984636, "learning_rate": 1.5524736415247366e-06, "loss": 0.6224, "step": 24680 }, { "epoch": 0.7205920995007445, "grad_norm": 0.7951668093663851, "learning_rate": 1.5523114355231144e-06, "loss": 0.6495, "step": 24681 }, { "epoch": 0.7206212957285919, "grad_norm": 0.7471879948263703, "learning_rate": 1.5521492295214925e-06, "loss": 0.6545, "step": 24682 }, { "epoch": 0.7206504919564393, "grad_norm": 0.7391666147385653, "learning_rate": 1.5519870235198702e-06, "loss": 0.6449, "step": 24683 }, { "epoch": 0.7206796881842866, "grad_norm": 0.770259759652792, "learning_rate": 1.5518248175182483e-06, "loss": 0.6894, "step": 24684 }, { "epoch": 0.720708884412134, "grad_norm": 0.7547463096757088, "learning_rate": 1.551662611516626e-06, "loss": 0.7721, "step": 24685 }, { "epoch": 0.7207380806399813, "grad_norm": 0.7741000565790954, "learning_rate": 1.5515004055150043e-06, "loss": 0.7065, "step": 24686 }, { "epoch": 0.7207672768678287, "grad_norm": 0.7520863625833106, "learning_rate": 1.5513381995133823e-06, "loss": 0.7017, "step": 24687 }, { "epoch": 0.7207964730956761, "grad_norm": 0.7081869229681398, "learning_rate": 1.55117599351176e-06, "loss": 0.6277, "step": 24688 }, { "epoch": 0.7208256693235234, "grad_norm": 0.7685414767545591, "learning_rate": 1.551013787510138e-06, "loss": 0.6796, "step": 24689 }, { "epoch": 0.7208548655513708, "grad_norm": 0.7022862428838873, "learning_rate": 1.5508515815085159e-06, "loss": 0.6378, "step": 24690 }, { "epoch": 0.7208840617792182, "grad_norm": 0.8222077077509757, "learning_rate": 1.5506893755068939e-06, "loss": 0.6984, "step": 24691 }, { "epoch": 0.7209132580070655, "grad_norm": 0.7308578953777115, "learning_rate": 1.5505271695052717e-06, "loss": 0.6565, "step": 24692 }, { "epoch": 0.7209424542349129, "grad_norm": 0.6994079068113995, "learning_rate": 1.5503649635036497e-06, "loss": 0.6227, "step": 24693 }, { "epoch": 0.7209716504627602, "grad_norm": 0.7412281674739276, "learning_rate": 1.5502027575020277e-06, "loss": 0.67, "step": 24694 }, { "epoch": 0.7210008466906076, "grad_norm": 0.6966979839694492, "learning_rate": 1.5500405515004057e-06, "loss": 0.5963, "step": 24695 }, { "epoch": 0.721030042918455, "grad_norm": 0.6612503277750041, "learning_rate": 1.5498783454987837e-06, "loss": 0.5611, "step": 24696 }, { "epoch": 0.7210592391463023, "grad_norm": 0.7186254487037945, "learning_rate": 1.5497161394971615e-06, "loss": 0.6413, "step": 24697 }, { "epoch": 0.7210884353741497, "grad_norm": 0.7246234154647547, "learning_rate": 1.5495539334955395e-06, "loss": 0.5999, "step": 24698 }, { "epoch": 0.721117631601997, "grad_norm": 0.7422373951795231, "learning_rate": 1.5493917274939175e-06, "loss": 0.7146, "step": 24699 }, { "epoch": 0.7211468278298444, "grad_norm": 0.6989678690086167, "learning_rate": 1.5492295214922953e-06, "loss": 0.6073, "step": 24700 }, { "epoch": 0.7211760240576918, "grad_norm": 0.7177152005221898, "learning_rate": 1.5490673154906733e-06, "loss": 0.6685, "step": 24701 }, { "epoch": 0.7212052202855391, "grad_norm": 0.7403935316249909, "learning_rate": 1.548905109489051e-06, "loss": 0.6812, "step": 24702 }, { "epoch": 0.7212344165133865, "grad_norm": 0.729959657640117, "learning_rate": 1.5487429034874291e-06, "loss": 0.6164, "step": 24703 }, { "epoch": 0.7212636127412339, "grad_norm": 0.7129425160138193, "learning_rate": 1.548580697485807e-06, "loss": 0.6598, "step": 24704 }, { "epoch": 0.7212928089690812, "grad_norm": 1.1582099002244926, "learning_rate": 1.5484184914841851e-06, "loss": 0.7341, "step": 24705 }, { "epoch": 0.7213220051969286, "grad_norm": 0.687306170225198, "learning_rate": 1.5482562854825631e-06, "loss": 0.589, "step": 24706 }, { "epoch": 0.7213512014247759, "grad_norm": 0.7037087551826888, "learning_rate": 1.548094079480941e-06, "loss": 0.6069, "step": 24707 }, { "epoch": 0.7213803976526233, "grad_norm": 0.7221034296714035, "learning_rate": 1.547931873479319e-06, "loss": 0.675, "step": 24708 }, { "epoch": 0.7214095938804707, "grad_norm": 0.644236398734685, "learning_rate": 1.5477696674776967e-06, "loss": 0.5586, "step": 24709 }, { "epoch": 0.721438790108318, "grad_norm": 0.6901815923321505, "learning_rate": 1.5476074614760747e-06, "loss": 0.5672, "step": 24710 }, { "epoch": 0.7214679863361654, "grad_norm": 0.7373653839952621, "learning_rate": 1.5474452554744525e-06, "loss": 0.6585, "step": 24711 }, { "epoch": 0.7214971825640127, "grad_norm": 0.701464219506736, "learning_rate": 1.5472830494728305e-06, "loss": 0.629, "step": 24712 }, { "epoch": 0.7215263787918601, "grad_norm": 0.7005764145728985, "learning_rate": 1.5471208434712085e-06, "loss": 0.6028, "step": 24713 }, { "epoch": 0.7215555750197075, "grad_norm": 0.7104893463928085, "learning_rate": 1.5469586374695866e-06, "loss": 0.6625, "step": 24714 }, { "epoch": 0.7215847712475548, "grad_norm": 0.7185831460422465, "learning_rate": 1.5467964314679646e-06, "loss": 0.6377, "step": 24715 }, { "epoch": 0.7216139674754022, "grad_norm": 0.7043578480956291, "learning_rate": 1.5466342254663424e-06, "loss": 0.5936, "step": 24716 }, { "epoch": 0.7216431637032495, "grad_norm": 0.7407582145220285, "learning_rate": 1.5464720194647204e-06, "loss": 0.652, "step": 24717 }, { "epoch": 0.7216723599310969, "grad_norm": 0.6998383490619969, "learning_rate": 1.5463098134630984e-06, "loss": 0.614, "step": 24718 }, { "epoch": 0.7217015561589443, "grad_norm": 0.7855957422141997, "learning_rate": 1.5461476074614762e-06, "loss": 0.682, "step": 24719 }, { "epoch": 0.7217307523867916, "grad_norm": 0.716451220995031, "learning_rate": 1.5459854014598542e-06, "loss": 0.6047, "step": 24720 }, { "epoch": 0.721759948614639, "grad_norm": 0.750154787924758, "learning_rate": 1.545823195458232e-06, "loss": 0.6992, "step": 24721 }, { "epoch": 0.7217891448424864, "grad_norm": 0.6878692076797217, "learning_rate": 1.54566098945661e-06, "loss": 0.5649, "step": 24722 }, { "epoch": 0.7218183410703337, "grad_norm": 0.7295463682421222, "learning_rate": 1.5454987834549878e-06, "loss": 0.6065, "step": 24723 }, { "epoch": 0.7218475372981811, "grad_norm": 0.776389289631637, "learning_rate": 1.545336577453366e-06, "loss": 0.7188, "step": 24724 }, { "epoch": 0.7218767335260284, "grad_norm": 0.7381002286993237, "learning_rate": 1.545174371451744e-06, "loss": 0.705, "step": 24725 }, { "epoch": 0.7219059297538758, "grad_norm": 0.7750975528256367, "learning_rate": 1.5450121654501218e-06, "loss": 0.7602, "step": 24726 }, { "epoch": 0.7219351259817232, "grad_norm": 0.7337470917794147, "learning_rate": 1.5448499594484998e-06, "loss": 0.6872, "step": 24727 }, { "epoch": 0.7219643222095705, "grad_norm": 0.7241725882310771, "learning_rate": 1.5446877534468776e-06, "loss": 0.6726, "step": 24728 }, { "epoch": 0.7219935184374179, "grad_norm": 0.7330045611697532, "learning_rate": 1.5445255474452556e-06, "loss": 0.6711, "step": 24729 }, { "epoch": 0.7220227146652652, "grad_norm": 0.6795334734290795, "learning_rate": 1.5443633414436334e-06, "loss": 0.5873, "step": 24730 }, { "epoch": 0.7220519108931126, "grad_norm": 0.6709450083766565, "learning_rate": 1.5442011354420114e-06, "loss": 0.5423, "step": 24731 }, { "epoch": 0.72208110712096, "grad_norm": 0.7508256210096674, "learning_rate": 1.5440389294403894e-06, "loss": 0.724, "step": 24732 }, { "epoch": 0.7221103033488073, "grad_norm": 0.7564339969369047, "learning_rate": 1.5438767234387674e-06, "loss": 0.7042, "step": 24733 }, { "epoch": 0.7221394995766547, "grad_norm": 0.690317525385648, "learning_rate": 1.5437145174371454e-06, "loss": 0.6129, "step": 24734 }, { "epoch": 0.722168695804502, "grad_norm": 0.6621475976298612, "learning_rate": 1.5435523114355232e-06, "loss": 0.5805, "step": 24735 }, { "epoch": 0.7221978920323494, "grad_norm": 0.7432664444198236, "learning_rate": 1.5433901054339012e-06, "loss": 0.6582, "step": 24736 }, { "epoch": 0.7222270882601968, "grad_norm": 0.728167964809493, "learning_rate": 1.5432278994322792e-06, "loss": 0.6227, "step": 24737 }, { "epoch": 0.7222562844880441, "grad_norm": 0.7630275984268908, "learning_rate": 1.543065693430657e-06, "loss": 0.7563, "step": 24738 }, { "epoch": 0.7222854807158915, "grad_norm": 0.6811229228424585, "learning_rate": 1.542903487429035e-06, "loss": 0.6001, "step": 24739 }, { "epoch": 0.7223146769437389, "grad_norm": 0.6879427950467663, "learning_rate": 1.5427412814274128e-06, "loss": 0.5884, "step": 24740 }, { "epoch": 0.7223438731715862, "grad_norm": 0.7019242290295962, "learning_rate": 1.5425790754257908e-06, "loss": 0.609, "step": 24741 }, { "epoch": 0.7223730693994336, "grad_norm": 0.8146514827300422, "learning_rate": 1.5424168694241686e-06, "loss": 0.6949, "step": 24742 }, { "epoch": 0.7224022656272809, "grad_norm": 0.7125214570550864, "learning_rate": 1.5422546634225468e-06, "loss": 0.6271, "step": 24743 }, { "epoch": 0.7224314618551283, "grad_norm": 0.6958818527393915, "learning_rate": 1.5420924574209248e-06, "loss": 0.6263, "step": 24744 }, { "epoch": 0.7224606580829757, "grad_norm": 0.7684339281898462, "learning_rate": 1.5419302514193026e-06, "loss": 0.722, "step": 24745 }, { "epoch": 0.722489854310823, "grad_norm": 0.7374429644397693, "learning_rate": 1.5417680454176807e-06, "loss": 0.6341, "step": 24746 }, { "epoch": 0.7225190505386704, "grad_norm": 0.7710105917249163, "learning_rate": 1.5416058394160584e-06, "loss": 0.6467, "step": 24747 }, { "epoch": 0.7225482467665177, "grad_norm": 0.692650317308715, "learning_rate": 1.5414436334144365e-06, "loss": 0.6145, "step": 24748 }, { "epoch": 0.7225774429943651, "grad_norm": 0.7716931160333157, "learning_rate": 1.5412814274128142e-06, "loss": 0.7242, "step": 24749 }, { "epoch": 0.7226066392222125, "grad_norm": 0.7531448249283086, "learning_rate": 1.5411192214111923e-06, "loss": 0.6779, "step": 24750 }, { "epoch": 0.7226358354500598, "grad_norm": 0.7114766711317037, "learning_rate": 1.5409570154095703e-06, "loss": 0.6764, "step": 24751 }, { "epoch": 0.7226650316779072, "grad_norm": 0.8654605350376321, "learning_rate": 1.5407948094079483e-06, "loss": 0.7018, "step": 24752 }, { "epoch": 0.7226942279057545, "grad_norm": 0.7908410667869237, "learning_rate": 1.5406326034063263e-06, "loss": 0.7599, "step": 24753 }, { "epoch": 0.7227234241336019, "grad_norm": 0.7219378358477296, "learning_rate": 1.540470397404704e-06, "loss": 0.6446, "step": 24754 }, { "epoch": 0.7227526203614493, "grad_norm": 0.7289281670862923, "learning_rate": 1.540308191403082e-06, "loss": 0.6775, "step": 24755 }, { "epoch": 0.7227818165892966, "grad_norm": 0.7905002974324714, "learning_rate": 1.54014598540146e-06, "loss": 0.6796, "step": 24756 }, { "epoch": 0.722811012817144, "grad_norm": 0.7306207232593129, "learning_rate": 1.5399837793998379e-06, "loss": 0.6108, "step": 24757 }, { "epoch": 0.7228402090449914, "grad_norm": 0.6837148299580025, "learning_rate": 1.5398215733982159e-06, "loss": 0.5339, "step": 24758 }, { "epoch": 0.7228694052728387, "grad_norm": 0.7370990106651779, "learning_rate": 1.5396593673965937e-06, "loss": 0.6587, "step": 24759 }, { "epoch": 0.7228986015006861, "grad_norm": 0.7476270769047292, "learning_rate": 1.5394971613949717e-06, "loss": 0.6779, "step": 24760 }, { "epoch": 0.7229277977285334, "grad_norm": 0.7464565606562815, "learning_rate": 1.53933495539335e-06, "loss": 0.6775, "step": 24761 }, { "epoch": 0.7229569939563808, "grad_norm": 0.7160050436092935, "learning_rate": 1.5391727493917277e-06, "loss": 0.5596, "step": 24762 }, { "epoch": 0.7229861901842282, "grad_norm": 0.7218222343840193, "learning_rate": 1.5390105433901057e-06, "loss": 0.652, "step": 24763 }, { "epoch": 0.7230153864120755, "grad_norm": 0.697524621141186, "learning_rate": 1.5388483373884835e-06, "loss": 0.5807, "step": 24764 }, { "epoch": 0.7230445826399229, "grad_norm": 0.7458632026927957, "learning_rate": 1.5386861313868615e-06, "loss": 0.5693, "step": 24765 }, { "epoch": 0.7230737788677702, "grad_norm": 0.7730943376740688, "learning_rate": 1.5385239253852393e-06, "loss": 0.6965, "step": 24766 }, { "epoch": 0.7231029750956176, "grad_norm": 0.7131083375525143, "learning_rate": 1.5383617193836173e-06, "loss": 0.6323, "step": 24767 }, { "epoch": 0.723132171323465, "grad_norm": 0.725442494160053, "learning_rate": 1.5381995133819951e-06, "loss": 0.6326, "step": 24768 }, { "epoch": 0.7231613675513123, "grad_norm": 0.7146983152363441, "learning_rate": 1.5380373073803731e-06, "loss": 0.6695, "step": 24769 }, { "epoch": 0.7231905637791597, "grad_norm": 0.6838617365567511, "learning_rate": 1.537875101378751e-06, "loss": 0.5925, "step": 24770 }, { "epoch": 0.723219760007007, "grad_norm": 0.7418946418966643, "learning_rate": 1.5377128953771291e-06, "loss": 0.6741, "step": 24771 }, { "epoch": 0.7232489562348544, "grad_norm": 0.7145947642495624, "learning_rate": 1.5375506893755071e-06, "loss": 0.6206, "step": 24772 }, { "epoch": 0.7232781524627018, "grad_norm": 0.7300359708133852, "learning_rate": 1.537388483373885e-06, "loss": 0.5621, "step": 24773 }, { "epoch": 0.7233073486905491, "grad_norm": 0.7562457107897238, "learning_rate": 1.537226277372263e-06, "loss": 0.6429, "step": 24774 }, { "epoch": 0.7233365449183965, "grad_norm": 0.7067889973765791, "learning_rate": 1.537064071370641e-06, "loss": 0.6313, "step": 24775 }, { "epoch": 0.7233657411462439, "grad_norm": 0.7590253249563607, "learning_rate": 1.5369018653690187e-06, "loss": 0.7207, "step": 24776 }, { "epoch": 0.7233949373740912, "grad_norm": 0.7056352580815257, "learning_rate": 1.5367396593673967e-06, "loss": 0.6275, "step": 24777 }, { "epoch": 0.7234241336019386, "grad_norm": 0.6962116056837916, "learning_rate": 1.5365774533657745e-06, "loss": 0.6331, "step": 24778 }, { "epoch": 0.7234533298297859, "grad_norm": 0.7823523678745136, "learning_rate": 1.5364152473641525e-06, "loss": 0.6582, "step": 24779 }, { "epoch": 0.7234825260576333, "grad_norm": 0.7359399966829786, "learning_rate": 1.5362530413625308e-06, "loss": 0.647, "step": 24780 }, { "epoch": 0.7235117222854807, "grad_norm": 0.7245507495866148, "learning_rate": 1.5360908353609086e-06, "loss": 0.6491, "step": 24781 }, { "epoch": 0.723540918513328, "grad_norm": 0.7353939994634636, "learning_rate": 1.5359286293592866e-06, "loss": 0.6331, "step": 24782 }, { "epoch": 0.7235701147411754, "grad_norm": 0.718118373550538, "learning_rate": 1.5357664233576644e-06, "loss": 0.6631, "step": 24783 }, { "epoch": 0.7235993109690229, "grad_norm": 0.6707031966396868, "learning_rate": 1.5356042173560424e-06, "loss": 0.5721, "step": 24784 }, { "epoch": 0.7236285071968702, "grad_norm": 0.7053917186548645, "learning_rate": 1.5354420113544202e-06, "loss": 0.6247, "step": 24785 }, { "epoch": 0.7236577034247176, "grad_norm": 0.6868393558512003, "learning_rate": 1.5352798053527982e-06, "loss": 0.5831, "step": 24786 }, { "epoch": 0.7236868996525649, "grad_norm": 0.6783835459298893, "learning_rate": 1.535117599351176e-06, "loss": 0.5338, "step": 24787 }, { "epoch": 0.7237160958804123, "grad_norm": 0.6729012857034119, "learning_rate": 1.534955393349554e-06, "loss": 0.5518, "step": 24788 }, { "epoch": 0.7237452921082597, "grad_norm": 0.6954500832820316, "learning_rate": 1.5347931873479318e-06, "loss": 0.6165, "step": 24789 }, { "epoch": 0.723774488336107, "grad_norm": 0.7014302677026074, "learning_rate": 1.53463098134631e-06, "loss": 0.5717, "step": 24790 }, { "epoch": 0.7238036845639544, "grad_norm": 0.764669554998381, "learning_rate": 1.534468775344688e-06, "loss": 0.6712, "step": 24791 }, { "epoch": 0.7238328807918017, "grad_norm": 0.7394132471050917, "learning_rate": 1.5343065693430658e-06, "loss": 0.6448, "step": 24792 }, { "epoch": 0.7238620770196491, "grad_norm": 0.8047050451597445, "learning_rate": 1.5341443633414438e-06, "loss": 0.622, "step": 24793 }, { "epoch": 0.7238912732474965, "grad_norm": 0.7110028143559127, "learning_rate": 1.5339821573398218e-06, "loss": 0.5828, "step": 24794 }, { "epoch": 0.7239204694753438, "grad_norm": 0.8085142526677741, "learning_rate": 1.5338199513381996e-06, "loss": 0.8032, "step": 24795 }, { "epoch": 0.7239496657031912, "grad_norm": 0.737097298576135, "learning_rate": 1.5336577453365776e-06, "loss": 0.6628, "step": 24796 }, { "epoch": 0.7239788619310386, "grad_norm": 0.7511244712372974, "learning_rate": 1.5334955393349554e-06, "loss": 0.647, "step": 24797 }, { "epoch": 0.7240080581588859, "grad_norm": 0.752663919323921, "learning_rate": 1.5333333333333334e-06, "loss": 0.6648, "step": 24798 }, { "epoch": 0.7240372543867333, "grad_norm": 0.7390458571541672, "learning_rate": 1.5331711273317116e-06, "loss": 0.6697, "step": 24799 }, { "epoch": 0.7240664506145806, "grad_norm": 0.8287023847708356, "learning_rate": 1.5330089213300894e-06, "loss": 0.7407, "step": 24800 }, { "epoch": 0.724095646842428, "grad_norm": 0.7686424766527892, "learning_rate": 1.5328467153284674e-06, "loss": 0.6574, "step": 24801 }, { "epoch": 0.7241248430702754, "grad_norm": 0.6964470620766147, "learning_rate": 1.5326845093268452e-06, "loss": 0.5744, "step": 24802 }, { "epoch": 0.7241540392981227, "grad_norm": 0.6703597924856332, "learning_rate": 1.5325223033252232e-06, "loss": 0.559, "step": 24803 }, { "epoch": 0.7241832355259701, "grad_norm": 0.6959221842924997, "learning_rate": 1.532360097323601e-06, "loss": 0.6222, "step": 24804 }, { "epoch": 0.7242124317538174, "grad_norm": 0.7495398570068099, "learning_rate": 1.532197891321979e-06, "loss": 0.6422, "step": 24805 }, { "epoch": 0.7242416279816648, "grad_norm": 0.7531571838151587, "learning_rate": 1.5320356853203568e-06, "loss": 0.6953, "step": 24806 }, { "epoch": 0.7242708242095122, "grad_norm": 0.7284719339904275, "learning_rate": 1.5318734793187348e-06, "loss": 0.638, "step": 24807 }, { "epoch": 0.7243000204373595, "grad_norm": 0.7045507900988249, "learning_rate": 1.5317112733171126e-06, "loss": 0.6343, "step": 24808 }, { "epoch": 0.7243292166652069, "grad_norm": 0.7651196111716305, "learning_rate": 1.5315490673154908e-06, "loss": 0.7008, "step": 24809 }, { "epoch": 0.7243584128930542, "grad_norm": 0.7326206768138914, "learning_rate": 1.5313868613138689e-06, "loss": 0.6132, "step": 24810 }, { "epoch": 0.7243876091209016, "grad_norm": 0.7608998835794617, "learning_rate": 1.5312246553122466e-06, "loss": 0.7098, "step": 24811 }, { "epoch": 0.724416805348749, "grad_norm": 0.7645761189197573, "learning_rate": 1.5310624493106247e-06, "loss": 0.6974, "step": 24812 }, { "epoch": 0.7244460015765963, "grad_norm": 0.7043000778973093, "learning_rate": 1.5309002433090024e-06, "loss": 0.6433, "step": 24813 }, { "epoch": 0.7244751978044437, "grad_norm": 0.7033855164629026, "learning_rate": 1.5307380373073805e-06, "loss": 0.6376, "step": 24814 }, { "epoch": 0.724504394032291, "grad_norm": 0.8440101641202961, "learning_rate": 1.5305758313057585e-06, "loss": 0.7142, "step": 24815 }, { "epoch": 0.7245335902601384, "grad_norm": 0.7192191986199424, "learning_rate": 1.5304136253041363e-06, "loss": 0.6219, "step": 24816 }, { "epoch": 0.7245627864879858, "grad_norm": 0.7274209208462377, "learning_rate": 1.5302514193025143e-06, "loss": 0.6742, "step": 24817 }, { "epoch": 0.7245919827158331, "grad_norm": 0.7270055817693886, "learning_rate": 1.5300892133008925e-06, "loss": 0.6486, "step": 24818 }, { "epoch": 0.7246211789436805, "grad_norm": 0.7105229751567018, "learning_rate": 1.5299270072992703e-06, "loss": 0.6351, "step": 24819 }, { "epoch": 0.7246503751715279, "grad_norm": 0.795956248992151, "learning_rate": 1.5297648012976483e-06, "loss": 0.7366, "step": 24820 }, { "epoch": 0.7246795713993752, "grad_norm": 0.6970275615863287, "learning_rate": 1.529602595296026e-06, "loss": 0.617, "step": 24821 }, { "epoch": 0.7247087676272226, "grad_norm": 0.754236723423551, "learning_rate": 1.529440389294404e-06, "loss": 0.6291, "step": 24822 }, { "epoch": 0.7247379638550699, "grad_norm": 0.722268898060337, "learning_rate": 1.5292781832927819e-06, "loss": 0.6532, "step": 24823 }, { "epoch": 0.7247671600829173, "grad_norm": 0.738883691943646, "learning_rate": 1.5291159772911599e-06, "loss": 0.6711, "step": 24824 }, { "epoch": 0.7247963563107647, "grad_norm": 0.8568553207963742, "learning_rate": 1.5289537712895377e-06, "loss": 0.6274, "step": 24825 }, { "epoch": 0.724825552538612, "grad_norm": 0.7497959495936583, "learning_rate": 1.5287915652879157e-06, "loss": 0.6672, "step": 24826 }, { "epoch": 0.7248547487664594, "grad_norm": 0.7350423543216219, "learning_rate": 1.5286293592862935e-06, "loss": 0.6692, "step": 24827 }, { "epoch": 0.7248839449943068, "grad_norm": 0.6868698868690504, "learning_rate": 1.5284671532846717e-06, "loss": 0.5932, "step": 24828 }, { "epoch": 0.7249131412221541, "grad_norm": 0.7542955910352798, "learning_rate": 1.5283049472830497e-06, "loss": 0.6238, "step": 24829 }, { "epoch": 0.7249423374500015, "grad_norm": 0.6890682773996298, "learning_rate": 1.5281427412814275e-06, "loss": 0.6049, "step": 24830 }, { "epoch": 0.7249715336778488, "grad_norm": 0.7384421400072371, "learning_rate": 1.5279805352798055e-06, "loss": 0.6563, "step": 24831 }, { "epoch": 0.7250007299056962, "grad_norm": 0.757903578465208, "learning_rate": 1.5278183292781833e-06, "loss": 0.6651, "step": 24832 }, { "epoch": 0.7250299261335436, "grad_norm": 0.7060748298660114, "learning_rate": 1.5276561232765613e-06, "loss": 0.6078, "step": 24833 }, { "epoch": 0.7250591223613909, "grad_norm": 0.7149890818850855, "learning_rate": 1.5274939172749393e-06, "loss": 0.6459, "step": 24834 }, { "epoch": 0.7250883185892383, "grad_norm": 0.6712688587048107, "learning_rate": 1.5273317112733171e-06, "loss": 0.5677, "step": 24835 }, { "epoch": 0.7251175148170856, "grad_norm": 0.6926041505943079, "learning_rate": 1.5271695052716951e-06, "loss": 0.5694, "step": 24836 }, { "epoch": 0.725146711044933, "grad_norm": 0.6777666192928942, "learning_rate": 1.5270072992700733e-06, "loss": 0.5292, "step": 24837 }, { "epoch": 0.7251759072727804, "grad_norm": 0.7921617961132374, "learning_rate": 1.5268450932684511e-06, "loss": 0.6859, "step": 24838 }, { "epoch": 0.7252051035006277, "grad_norm": 0.7185932351288362, "learning_rate": 1.5266828872668291e-06, "loss": 0.6814, "step": 24839 }, { "epoch": 0.7252342997284751, "grad_norm": 0.7581929974645518, "learning_rate": 1.526520681265207e-06, "loss": 0.6767, "step": 24840 }, { "epoch": 0.7252634959563224, "grad_norm": 0.8065205175684093, "learning_rate": 1.526358475263585e-06, "loss": 0.7074, "step": 24841 }, { "epoch": 0.7252926921841698, "grad_norm": 0.7440229885068232, "learning_rate": 1.5261962692619627e-06, "loss": 0.6987, "step": 24842 }, { "epoch": 0.7253218884120172, "grad_norm": 0.7327370282349516, "learning_rate": 1.5260340632603407e-06, "loss": 0.6442, "step": 24843 }, { "epoch": 0.7253510846398645, "grad_norm": 0.7049127980106237, "learning_rate": 1.5258718572587185e-06, "loss": 0.6004, "step": 24844 }, { "epoch": 0.7253802808677119, "grad_norm": 0.7414475163592582, "learning_rate": 1.5257096512570965e-06, "loss": 0.6848, "step": 24845 }, { "epoch": 0.7254094770955593, "grad_norm": 0.7088784405428826, "learning_rate": 1.5255474452554748e-06, "loss": 0.6211, "step": 24846 }, { "epoch": 0.7254386733234066, "grad_norm": 0.7316570658320554, "learning_rate": 1.5253852392538526e-06, "loss": 0.6534, "step": 24847 }, { "epoch": 0.725467869551254, "grad_norm": 0.7019697841803778, "learning_rate": 1.5252230332522306e-06, "loss": 0.6048, "step": 24848 }, { "epoch": 0.7254970657791013, "grad_norm": 0.7359581173639153, "learning_rate": 1.5250608272506084e-06, "loss": 0.6554, "step": 24849 }, { "epoch": 0.7255262620069487, "grad_norm": 0.7097024900547735, "learning_rate": 1.5248986212489864e-06, "loss": 0.599, "step": 24850 }, { "epoch": 0.7255554582347961, "grad_norm": 0.7103486322237635, "learning_rate": 1.5247364152473642e-06, "loss": 0.6006, "step": 24851 }, { "epoch": 0.7255846544626434, "grad_norm": 0.790014372777142, "learning_rate": 1.5245742092457422e-06, "loss": 0.6269, "step": 24852 }, { "epoch": 0.7256138506904908, "grad_norm": 0.7197305863519218, "learning_rate": 1.5244120032441202e-06, "loss": 0.6565, "step": 24853 }, { "epoch": 0.7256430469183381, "grad_norm": 0.7482512617445642, "learning_rate": 1.524249797242498e-06, "loss": 0.6744, "step": 24854 }, { "epoch": 0.7256722431461855, "grad_norm": 0.6888028593187255, "learning_rate": 1.524087591240876e-06, "loss": 0.5731, "step": 24855 }, { "epoch": 0.7257014393740329, "grad_norm": 0.7058514374384777, "learning_rate": 1.5239253852392542e-06, "loss": 0.6218, "step": 24856 }, { "epoch": 0.7257306356018802, "grad_norm": 0.6888716813881733, "learning_rate": 1.523763179237632e-06, "loss": 0.6192, "step": 24857 }, { "epoch": 0.7257598318297276, "grad_norm": 0.7797109523010078, "learning_rate": 1.52360097323601e-06, "loss": 0.6519, "step": 24858 }, { "epoch": 0.725789028057575, "grad_norm": 0.7266919266115648, "learning_rate": 1.5234387672343878e-06, "loss": 0.5955, "step": 24859 }, { "epoch": 0.7258182242854223, "grad_norm": 0.7096724320959754, "learning_rate": 1.5232765612327658e-06, "loss": 0.6528, "step": 24860 }, { "epoch": 0.7258474205132697, "grad_norm": 0.7379000258285145, "learning_rate": 1.5231143552311436e-06, "loss": 0.6255, "step": 24861 }, { "epoch": 0.725876616741117, "grad_norm": 0.698854959116064, "learning_rate": 1.5229521492295216e-06, "loss": 0.6323, "step": 24862 }, { "epoch": 0.7259058129689644, "grad_norm": 0.8402426468989804, "learning_rate": 1.5227899432278994e-06, "loss": 0.6994, "step": 24863 }, { "epoch": 0.7259350091968118, "grad_norm": 0.6534236043742965, "learning_rate": 1.5226277372262774e-06, "loss": 0.4733, "step": 24864 }, { "epoch": 0.7259642054246591, "grad_norm": 0.7984296336151573, "learning_rate": 1.5224655312246556e-06, "loss": 0.7276, "step": 24865 }, { "epoch": 0.7259934016525065, "grad_norm": 0.7734942284136105, "learning_rate": 1.5223033252230334e-06, "loss": 0.6363, "step": 24866 }, { "epoch": 0.7260225978803538, "grad_norm": 0.7204738703464982, "learning_rate": 1.5221411192214114e-06, "loss": 0.6617, "step": 24867 }, { "epoch": 0.7260517941082012, "grad_norm": 0.7533395291576455, "learning_rate": 1.5219789132197892e-06, "loss": 0.6445, "step": 24868 }, { "epoch": 0.7260809903360486, "grad_norm": 0.7418121482739233, "learning_rate": 1.5218167072181672e-06, "loss": 0.5672, "step": 24869 }, { "epoch": 0.7261101865638959, "grad_norm": 0.6620796863780255, "learning_rate": 1.521654501216545e-06, "loss": 0.5271, "step": 24870 }, { "epoch": 0.7261393827917433, "grad_norm": 0.657035527223647, "learning_rate": 1.521492295214923e-06, "loss": 0.5978, "step": 24871 }, { "epoch": 0.7261685790195906, "grad_norm": 0.6758476096487614, "learning_rate": 1.521330089213301e-06, "loss": 0.5742, "step": 24872 }, { "epoch": 0.726197775247438, "grad_norm": 0.6581983374604716, "learning_rate": 1.5211678832116788e-06, "loss": 0.5632, "step": 24873 }, { "epoch": 0.7262269714752854, "grad_norm": 0.6806609957768563, "learning_rate": 1.5210056772100568e-06, "loss": 0.5728, "step": 24874 }, { "epoch": 0.7262561677031327, "grad_norm": 0.761081888808374, "learning_rate": 1.5208434712084348e-06, "loss": 0.6887, "step": 24875 }, { "epoch": 0.7262853639309801, "grad_norm": 0.7122809970661644, "learning_rate": 1.5206812652068129e-06, "loss": 0.6199, "step": 24876 }, { "epoch": 0.7263145601588274, "grad_norm": 0.7339318796797337, "learning_rate": 1.5205190592051909e-06, "loss": 0.5788, "step": 24877 }, { "epoch": 0.7263437563866748, "grad_norm": 0.702200105714276, "learning_rate": 1.5203568532035687e-06, "loss": 0.5953, "step": 24878 }, { "epoch": 0.7263729526145222, "grad_norm": 0.7057339199026392, "learning_rate": 1.5201946472019467e-06, "loss": 0.6367, "step": 24879 }, { "epoch": 0.7264021488423695, "grad_norm": 0.7685530768832267, "learning_rate": 1.5200324412003245e-06, "loss": 0.7221, "step": 24880 }, { "epoch": 0.7264313450702169, "grad_norm": 0.7011021097472255, "learning_rate": 1.5198702351987025e-06, "loss": 0.5814, "step": 24881 }, { "epoch": 0.7264605412980643, "grad_norm": 0.721354637440585, "learning_rate": 1.5197080291970803e-06, "loss": 0.6775, "step": 24882 }, { "epoch": 0.7264897375259116, "grad_norm": 0.6805182552807192, "learning_rate": 1.5195458231954583e-06, "loss": 0.5824, "step": 24883 }, { "epoch": 0.726518933753759, "grad_norm": 0.6912319930704119, "learning_rate": 1.5193836171938365e-06, "loss": 0.6386, "step": 24884 }, { "epoch": 0.7265481299816063, "grad_norm": 0.717992249922885, "learning_rate": 1.5192214111922143e-06, "loss": 0.6158, "step": 24885 }, { "epoch": 0.7265773262094537, "grad_norm": 0.762056150083612, "learning_rate": 1.5190592051905923e-06, "loss": 0.6685, "step": 24886 }, { "epoch": 0.7266065224373011, "grad_norm": 0.7494134481103547, "learning_rate": 1.51889699918897e-06, "loss": 0.6804, "step": 24887 }, { "epoch": 0.7266357186651484, "grad_norm": 0.686575539657051, "learning_rate": 1.518734793187348e-06, "loss": 0.5973, "step": 24888 }, { "epoch": 0.7266649148929958, "grad_norm": 0.691641315840697, "learning_rate": 1.5185725871857259e-06, "loss": 0.5993, "step": 24889 }, { "epoch": 0.7266941111208431, "grad_norm": 0.8067549368272033, "learning_rate": 1.5184103811841039e-06, "loss": 0.6925, "step": 24890 }, { "epoch": 0.7267233073486905, "grad_norm": 0.7657859113364581, "learning_rate": 1.518248175182482e-06, "loss": 0.6755, "step": 24891 }, { "epoch": 0.7267525035765379, "grad_norm": 0.8559418177244736, "learning_rate": 1.5180859691808597e-06, "loss": 0.7284, "step": 24892 }, { "epoch": 0.7267816998043852, "grad_norm": 0.7731109128263188, "learning_rate": 1.5179237631792377e-06, "loss": 0.7304, "step": 24893 }, { "epoch": 0.7268108960322326, "grad_norm": 0.7254149214862946, "learning_rate": 1.5177615571776157e-06, "loss": 0.6872, "step": 24894 }, { "epoch": 0.72684009226008, "grad_norm": 0.7051676513029353, "learning_rate": 1.5175993511759937e-06, "loss": 0.62, "step": 24895 }, { "epoch": 0.7268692884879273, "grad_norm": 0.7477596802420907, "learning_rate": 1.5174371451743717e-06, "loss": 0.662, "step": 24896 }, { "epoch": 0.7268984847157747, "grad_norm": 0.6852567785543525, "learning_rate": 1.5172749391727495e-06, "loss": 0.5473, "step": 24897 }, { "epoch": 0.726927680943622, "grad_norm": 0.6755218492752347, "learning_rate": 1.5171127331711275e-06, "loss": 0.5799, "step": 24898 }, { "epoch": 0.7269568771714694, "grad_norm": 0.7308766836910339, "learning_rate": 1.5169505271695053e-06, "loss": 0.6206, "step": 24899 }, { "epoch": 0.7269860733993168, "grad_norm": 0.7124621047626936, "learning_rate": 1.5167883211678833e-06, "loss": 0.6302, "step": 24900 }, { "epoch": 0.7270152696271641, "grad_norm": 0.7079350911286483, "learning_rate": 1.5166261151662611e-06, "loss": 0.5878, "step": 24901 }, { "epoch": 0.7270444658550115, "grad_norm": 0.7033650473320588, "learning_rate": 1.5164639091646391e-06, "loss": 0.5909, "step": 24902 }, { "epoch": 0.7270736620828588, "grad_norm": 0.7311341330566242, "learning_rate": 1.5163017031630173e-06, "loss": 0.669, "step": 24903 }, { "epoch": 0.7271028583107062, "grad_norm": 0.7571989457455844, "learning_rate": 1.5161394971613951e-06, "loss": 0.6892, "step": 24904 }, { "epoch": 0.7271320545385537, "grad_norm": 0.7217706555793947, "learning_rate": 1.5159772911597731e-06, "loss": 0.6479, "step": 24905 }, { "epoch": 0.727161250766401, "grad_norm": 0.7667410734908687, "learning_rate": 1.515815085158151e-06, "loss": 0.715, "step": 24906 }, { "epoch": 0.7271904469942484, "grad_norm": 0.7569233104789832, "learning_rate": 1.515652879156529e-06, "loss": 0.6998, "step": 24907 }, { "epoch": 0.7272196432220958, "grad_norm": 0.675717744445625, "learning_rate": 1.5154906731549067e-06, "loss": 0.5768, "step": 24908 }, { "epoch": 0.7272488394499431, "grad_norm": 0.6942921420838043, "learning_rate": 1.5153284671532847e-06, "loss": 0.5669, "step": 24909 }, { "epoch": 0.7272780356777905, "grad_norm": 0.7148756382308494, "learning_rate": 1.5151662611516628e-06, "loss": 0.5812, "step": 24910 }, { "epoch": 0.7273072319056378, "grad_norm": 0.6560129346279666, "learning_rate": 1.5150040551500406e-06, "loss": 0.5735, "step": 24911 }, { "epoch": 0.7273364281334852, "grad_norm": 0.7277113345881906, "learning_rate": 1.5148418491484188e-06, "loss": 0.6596, "step": 24912 }, { "epoch": 0.7273656243613326, "grad_norm": 0.7419146464870356, "learning_rate": 1.5146796431467966e-06, "loss": 0.6601, "step": 24913 }, { "epoch": 0.7273948205891799, "grad_norm": 0.7539301927597708, "learning_rate": 1.5145174371451746e-06, "loss": 0.6864, "step": 24914 }, { "epoch": 0.7274240168170273, "grad_norm": 0.7559332865715466, "learning_rate": 1.5143552311435526e-06, "loss": 0.6153, "step": 24915 }, { "epoch": 0.7274532130448746, "grad_norm": 0.6829158869097193, "learning_rate": 1.5141930251419304e-06, "loss": 0.6039, "step": 24916 }, { "epoch": 0.727482409272722, "grad_norm": 0.7654022343859933, "learning_rate": 1.5140308191403084e-06, "loss": 0.5493, "step": 24917 }, { "epoch": 0.7275116055005694, "grad_norm": 0.710652274257028, "learning_rate": 1.5138686131386862e-06, "loss": 0.5993, "step": 24918 }, { "epoch": 0.7275408017284167, "grad_norm": 0.723386227803421, "learning_rate": 1.5137064071370642e-06, "loss": 0.6596, "step": 24919 }, { "epoch": 0.7275699979562641, "grad_norm": 0.7447144656083502, "learning_rate": 1.513544201135442e-06, "loss": 0.7248, "step": 24920 }, { "epoch": 0.7275991941841115, "grad_norm": 0.6901630858052124, "learning_rate": 1.51338199513382e-06, "loss": 0.6066, "step": 24921 }, { "epoch": 0.7276283904119588, "grad_norm": 0.692488551520726, "learning_rate": 1.5132197891321982e-06, "loss": 0.6028, "step": 24922 }, { "epoch": 0.7276575866398062, "grad_norm": 0.7198454665031692, "learning_rate": 1.513057583130576e-06, "loss": 0.6246, "step": 24923 }, { "epoch": 0.7276867828676535, "grad_norm": 0.7513053951733806, "learning_rate": 1.512895377128954e-06, "loss": 0.7169, "step": 24924 }, { "epoch": 0.7277159790955009, "grad_norm": 0.7195843063120401, "learning_rate": 1.5127331711273318e-06, "loss": 0.6728, "step": 24925 }, { "epoch": 0.7277451753233483, "grad_norm": 0.7618992452785122, "learning_rate": 1.5125709651257098e-06, "loss": 0.6427, "step": 24926 }, { "epoch": 0.7277743715511956, "grad_norm": 0.6834459792632553, "learning_rate": 1.5124087591240876e-06, "loss": 0.5796, "step": 24927 }, { "epoch": 0.727803567779043, "grad_norm": 0.7157273717897049, "learning_rate": 1.5122465531224656e-06, "loss": 0.6502, "step": 24928 }, { "epoch": 0.7278327640068903, "grad_norm": 0.7073953891143987, "learning_rate": 1.5120843471208436e-06, "loss": 0.6428, "step": 24929 }, { "epoch": 0.7278619602347377, "grad_norm": 0.7628255923233981, "learning_rate": 1.5119221411192214e-06, "loss": 0.7038, "step": 24930 }, { "epoch": 0.7278911564625851, "grad_norm": 0.6698786131708437, "learning_rate": 1.5117599351175996e-06, "loss": 0.5373, "step": 24931 }, { "epoch": 0.7279203526904324, "grad_norm": 0.7561865551214589, "learning_rate": 1.5115977291159774e-06, "loss": 0.6472, "step": 24932 }, { "epoch": 0.7279495489182798, "grad_norm": 0.7106248830765213, "learning_rate": 1.5114355231143554e-06, "loss": 0.6145, "step": 24933 }, { "epoch": 0.7279787451461271, "grad_norm": 0.7122745866958371, "learning_rate": 1.5112733171127334e-06, "loss": 0.6363, "step": 24934 }, { "epoch": 0.7280079413739745, "grad_norm": 0.7166069593553868, "learning_rate": 1.5111111111111112e-06, "loss": 0.6299, "step": 24935 }, { "epoch": 0.7280371376018219, "grad_norm": 0.7673401811478215, "learning_rate": 1.5109489051094892e-06, "loss": 0.6758, "step": 24936 }, { "epoch": 0.7280663338296692, "grad_norm": 0.6792355470247855, "learning_rate": 1.510786699107867e-06, "loss": 0.5804, "step": 24937 }, { "epoch": 0.7280955300575166, "grad_norm": 0.7006818727750207, "learning_rate": 1.510624493106245e-06, "loss": 0.6105, "step": 24938 }, { "epoch": 0.728124726285364, "grad_norm": 0.6840686273769122, "learning_rate": 1.5104622871046228e-06, "loss": 0.5299, "step": 24939 }, { "epoch": 0.7281539225132113, "grad_norm": 0.7220057705032754, "learning_rate": 1.5103000811030008e-06, "loss": 0.6418, "step": 24940 }, { "epoch": 0.7281831187410587, "grad_norm": 0.7330698260676795, "learning_rate": 1.510137875101379e-06, "loss": 0.7002, "step": 24941 }, { "epoch": 0.728212314968906, "grad_norm": 0.7423542908846593, "learning_rate": 1.5099756690997569e-06, "loss": 0.6449, "step": 24942 }, { "epoch": 0.7282415111967534, "grad_norm": 0.7904812814949699, "learning_rate": 1.5098134630981349e-06, "loss": 0.786, "step": 24943 }, { "epoch": 0.7282707074246008, "grad_norm": 0.697384296321634, "learning_rate": 1.5096512570965127e-06, "loss": 0.6109, "step": 24944 }, { "epoch": 0.7282999036524481, "grad_norm": 0.6624068176226279, "learning_rate": 1.5094890510948907e-06, "loss": 0.5325, "step": 24945 }, { "epoch": 0.7283290998802955, "grad_norm": 0.8080538287814499, "learning_rate": 1.5093268450932685e-06, "loss": 0.7539, "step": 24946 }, { "epoch": 0.7283582961081428, "grad_norm": 0.7127874509673231, "learning_rate": 1.5091646390916465e-06, "loss": 0.5736, "step": 24947 }, { "epoch": 0.7283874923359902, "grad_norm": 0.7243949872944295, "learning_rate": 1.5090024330900243e-06, "loss": 0.663, "step": 24948 }, { "epoch": 0.7284166885638376, "grad_norm": 0.8188326978137231, "learning_rate": 1.5088402270884023e-06, "loss": 0.7046, "step": 24949 }, { "epoch": 0.7284458847916849, "grad_norm": 0.7338818481325602, "learning_rate": 1.5086780210867805e-06, "loss": 0.64, "step": 24950 }, { "epoch": 0.7284750810195323, "grad_norm": 0.793170057038293, "learning_rate": 1.5085158150851583e-06, "loss": 0.7397, "step": 24951 }, { "epoch": 0.7285042772473797, "grad_norm": 0.7646819785456734, "learning_rate": 1.5083536090835363e-06, "loss": 0.7082, "step": 24952 }, { "epoch": 0.728533473475227, "grad_norm": 0.6663232403347584, "learning_rate": 1.5081914030819143e-06, "loss": 0.5686, "step": 24953 }, { "epoch": 0.7285626697030744, "grad_norm": 0.7307037973841274, "learning_rate": 1.508029197080292e-06, "loss": 0.6323, "step": 24954 }, { "epoch": 0.7285918659309217, "grad_norm": 0.6929656702312056, "learning_rate": 1.50786699107867e-06, "loss": 0.6091, "step": 24955 }, { "epoch": 0.7286210621587691, "grad_norm": 0.7364179234419891, "learning_rate": 1.5077047850770479e-06, "loss": 0.6418, "step": 24956 }, { "epoch": 0.7286502583866165, "grad_norm": 0.6992670486760025, "learning_rate": 1.507542579075426e-06, "loss": 0.6113, "step": 24957 }, { "epoch": 0.7286794546144638, "grad_norm": 0.7700515443221363, "learning_rate": 1.5073803730738037e-06, "loss": 0.6637, "step": 24958 }, { "epoch": 0.7287086508423112, "grad_norm": 0.759387244336279, "learning_rate": 1.5072181670721817e-06, "loss": 0.6598, "step": 24959 }, { "epoch": 0.7287378470701585, "grad_norm": 0.6716641573673092, "learning_rate": 1.50705596107056e-06, "loss": 0.5762, "step": 24960 }, { "epoch": 0.7287670432980059, "grad_norm": 0.7638149699796472, "learning_rate": 1.5068937550689377e-06, "loss": 0.6817, "step": 24961 }, { "epoch": 0.7287962395258533, "grad_norm": 0.7191581764311786, "learning_rate": 1.5067315490673157e-06, "loss": 0.663, "step": 24962 }, { "epoch": 0.7288254357537006, "grad_norm": 0.7271511797954473, "learning_rate": 1.5065693430656935e-06, "loss": 0.5988, "step": 24963 }, { "epoch": 0.728854631981548, "grad_norm": 0.7145848572389005, "learning_rate": 1.5064071370640715e-06, "loss": 0.6624, "step": 24964 }, { "epoch": 0.7288838282093953, "grad_norm": 0.7314680370293175, "learning_rate": 1.5062449310624493e-06, "loss": 0.6666, "step": 24965 }, { "epoch": 0.7289130244372427, "grad_norm": 0.7645099714282956, "learning_rate": 1.5060827250608273e-06, "loss": 0.6931, "step": 24966 }, { "epoch": 0.7289422206650901, "grad_norm": 0.702390181429305, "learning_rate": 1.5059205190592051e-06, "loss": 0.6159, "step": 24967 }, { "epoch": 0.7289714168929374, "grad_norm": 0.7603284995777396, "learning_rate": 1.5057583130575831e-06, "loss": 0.6746, "step": 24968 }, { "epoch": 0.7290006131207848, "grad_norm": 0.7705853411179769, "learning_rate": 1.5055961070559613e-06, "loss": 0.7166, "step": 24969 }, { "epoch": 0.7290298093486322, "grad_norm": 0.7521559533400207, "learning_rate": 1.5054339010543391e-06, "loss": 0.6982, "step": 24970 }, { "epoch": 0.7290590055764795, "grad_norm": 0.6952105150207426, "learning_rate": 1.5052716950527171e-06, "loss": 0.5694, "step": 24971 }, { "epoch": 0.7290882018043269, "grad_norm": 0.6938312829687576, "learning_rate": 1.5051094890510952e-06, "loss": 0.5553, "step": 24972 }, { "epoch": 0.7291173980321742, "grad_norm": 0.7095192190869676, "learning_rate": 1.504947283049473e-06, "loss": 0.5923, "step": 24973 }, { "epoch": 0.7291465942600216, "grad_norm": 0.7115818083858375, "learning_rate": 1.504785077047851e-06, "loss": 0.5933, "step": 24974 }, { "epoch": 0.729175790487869, "grad_norm": 0.7021218569175073, "learning_rate": 1.5046228710462288e-06, "loss": 0.5985, "step": 24975 }, { "epoch": 0.7292049867157163, "grad_norm": 0.733260227801982, "learning_rate": 1.5044606650446068e-06, "loss": 0.658, "step": 24976 }, { "epoch": 0.7292341829435637, "grad_norm": 0.6764163455126734, "learning_rate": 1.5042984590429846e-06, "loss": 0.5165, "step": 24977 }, { "epoch": 0.729263379171411, "grad_norm": 0.8160343652891747, "learning_rate": 1.5041362530413626e-06, "loss": 0.7226, "step": 24978 }, { "epoch": 0.7292925753992584, "grad_norm": 0.7012075267585693, "learning_rate": 1.5039740470397408e-06, "loss": 0.6461, "step": 24979 }, { "epoch": 0.7293217716271058, "grad_norm": 0.6972255629077363, "learning_rate": 1.5038118410381186e-06, "loss": 0.5447, "step": 24980 }, { "epoch": 0.7293509678549531, "grad_norm": 0.6866333936470755, "learning_rate": 1.5036496350364966e-06, "loss": 0.5914, "step": 24981 }, { "epoch": 0.7293801640828005, "grad_norm": 0.6833696390608238, "learning_rate": 1.5034874290348744e-06, "loss": 0.5442, "step": 24982 }, { "epoch": 0.7294093603106478, "grad_norm": 0.6972397125441311, "learning_rate": 1.5033252230332524e-06, "loss": 0.6233, "step": 24983 }, { "epoch": 0.7294385565384952, "grad_norm": 0.7408590179700328, "learning_rate": 1.5031630170316302e-06, "loss": 0.6427, "step": 24984 }, { "epoch": 0.7294677527663426, "grad_norm": 0.6763259593950465, "learning_rate": 1.5030008110300082e-06, "loss": 0.5765, "step": 24985 }, { "epoch": 0.7294969489941899, "grad_norm": 0.6892223035331787, "learning_rate": 1.502838605028386e-06, "loss": 0.6181, "step": 24986 }, { "epoch": 0.7295261452220373, "grad_norm": 0.7322144055382553, "learning_rate": 1.502676399026764e-06, "loss": 0.6554, "step": 24987 }, { "epoch": 0.7295553414498847, "grad_norm": 0.877784678280765, "learning_rate": 1.5025141930251422e-06, "loss": 0.6291, "step": 24988 }, { "epoch": 0.729584537677732, "grad_norm": 0.781453708740083, "learning_rate": 1.50235198702352e-06, "loss": 0.7149, "step": 24989 }, { "epoch": 0.7296137339055794, "grad_norm": 0.7671430464661221, "learning_rate": 1.502189781021898e-06, "loss": 0.6957, "step": 24990 }, { "epoch": 0.7296429301334267, "grad_norm": 0.7083324289334496, "learning_rate": 1.502027575020276e-06, "loss": 0.6361, "step": 24991 }, { "epoch": 0.7296721263612741, "grad_norm": 0.7615558046565594, "learning_rate": 1.5018653690186538e-06, "loss": 0.6763, "step": 24992 }, { "epoch": 0.7297013225891215, "grad_norm": 0.7095494400775969, "learning_rate": 1.5017031630170318e-06, "loss": 0.6224, "step": 24993 }, { "epoch": 0.7297305188169688, "grad_norm": 0.7541644783893647, "learning_rate": 1.5015409570154096e-06, "loss": 0.6636, "step": 24994 }, { "epoch": 0.7297597150448162, "grad_norm": 0.727226531201642, "learning_rate": 1.5013787510137876e-06, "loss": 0.6333, "step": 24995 }, { "epoch": 0.7297889112726635, "grad_norm": 0.7515874366012157, "learning_rate": 1.5012165450121654e-06, "loss": 0.652, "step": 24996 }, { "epoch": 0.7298181075005109, "grad_norm": 0.7197783177086825, "learning_rate": 1.5010543390105436e-06, "loss": 0.6422, "step": 24997 }, { "epoch": 0.7298473037283583, "grad_norm": 0.7101007042623031, "learning_rate": 1.5008921330089216e-06, "loss": 0.5576, "step": 24998 }, { "epoch": 0.7298764999562056, "grad_norm": 0.7245348277581739, "learning_rate": 1.5007299270072994e-06, "loss": 0.6375, "step": 24999 }, { "epoch": 0.729905696184053, "grad_norm": 0.7721003477360354, "learning_rate": 1.5005677210056774e-06, "loss": 0.7105, "step": 25000 }, { "epoch": 0.7299348924119003, "grad_norm": 0.6928121908477147, "learning_rate": 1.5004055150040552e-06, "loss": 0.5981, "step": 25001 }, { "epoch": 0.7299640886397477, "grad_norm": 0.704446740396765, "learning_rate": 1.5002433090024332e-06, "loss": 0.6364, "step": 25002 }, { "epoch": 0.7299932848675951, "grad_norm": 0.6803105098815393, "learning_rate": 1.500081103000811e-06, "loss": 0.5838, "step": 25003 }, { "epoch": 0.7300224810954424, "grad_norm": 0.755882998566825, "learning_rate": 1.499918896999189e-06, "loss": 0.6706, "step": 25004 }, { "epoch": 0.7300516773232898, "grad_norm": 0.7079158800627142, "learning_rate": 1.4997566909975668e-06, "loss": 0.6061, "step": 25005 }, { "epoch": 0.7300808735511372, "grad_norm": 0.7222440041594985, "learning_rate": 1.4995944849959448e-06, "loss": 0.6834, "step": 25006 }, { "epoch": 0.7301100697789845, "grad_norm": 0.744883329611979, "learning_rate": 1.499432278994323e-06, "loss": 0.686, "step": 25007 }, { "epoch": 0.7301392660068319, "grad_norm": 0.6923078916660126, "learning_rate": 1.4992700729927009e-06, "loss": 0.6275, "step": 25008 }, { "epoch": 0.7301684622346792, "grad_norm": 0.7011521117607804, "learning_rate": 1.4991078669910789e-06, "loss": 0.6017, "step": 25009 }, { "epoch": 0.7301976584625266, "grad_norm": 0.8184160309079412, "learning_rate": 1.4989456609894567e-06, "loss": 0.7524, "step": 25010 }, { "epoch": 0.730226854690374, "grad_norm": 0.6659738315470988, "learning_rate": 1.4987834549878347e-06, "loss": 0.5159, "step": 25011 }, { "epoch": 0.7302560509182213, "grad_norm": 0.7867940595789316, "learning_rate": 1.4986212489862127e-06, "loss": 0.7427, "step": 25012 }, { "epoch": 0.7302852471460687, "grad_norm": 0.7268636468807879, "learning_rate": 1.4984590429845905e-06, "loss": 0.6838, "step": 25013 }, { "epoch": 0.730314443373916, "grad_norm": 0.7405315416551174, "learning_rate": 1.4982968369829685e-06, "loss": 0.6336, "step": 25014 }, { "epoch": 0.7303436396017634, "grad_norm": 0.7450411475202523, "learning_rate": 1.4981346309813463e-06, "loss": 0.6622, "step": 25015 }, { "epoch": 0.7303728358296108, "grad_norm": 0.6265060618504323, "learning_rate": 1.4979724249797245e-06, "loss": 0.4838, "step": 25016 }, { "epoch": 0.7304020320574581, "grad_norm": 0.727549032015597, "learning_rate": 1.4978102189781025e-06, "loss": 0.6236, "step": 25017 }, { "epoch": 0.7304312282853055, "grad_norm": 0.7837020914876865, "learning_rate": 1.4976480129764803e-06, "loss": 0.7266, "step": 25018 }, { "epoch": 0.7304604245131529, "grad_norm": 0.7137625601001387, "learning_rate": 1.4974858069748583e-06, "loss": 0.6024, "step": 25019 }, { "epoch": 0.7304896207410002, "grad_norm": 0.7563124990542727, "learning_rate": 1.497323600973236e-06, "loss": 0.6583, "step": 25020 }, { "epoch": 0.7305188169688476, "grad_norm": 0.696419247388369, "learning_rate": 1.497161394971614e-06, "loss": 0.5985, "step": 25021 }, { "epoch": 0.7305480131966949, "grad_norm": 0.7267274719014648, "learning_rate": 1.4969991889699919e-06, "loss": 0.7236, "step": 25022 }, { "epoch": 0.7305772094245423, "grad_norm": 0.7172498539343207, "learning_rate": 1.49683698296837e-06, "loss": 0.5838, "step": 25023 }, { "epoch": 0.7306064056523897, "grad_norm": 0.7457003790941511, "learning_rate": 1.4966747769667477e-06, "loss": 0.6753, "step": 25024 }, { "epoch": 0.7306356018802371, "grad_norm": 0.7159786689107719, "learning_rate": 1.4965125709651257e-06, "loss": 0.6298, "step": 25025 }, { "epoch": 0.7306647981080845, "grad_norm": 0.7029796204341006, "learning_rate": 1.496350364963504e-06, "loss": 0.6391, "step": 25026 }, { "epoch": 0.7306939943359319, "grad_norm": 0.7096934329916534, "learning_rate": 1.4961881589618817e-06, "loss": 0.5766, "step": 25027 }, { "epoch": 0.7307231905637792, "grad_norm": 0.7554811558265512, "learning_rate": 1.4960259529602597e-06, "loss": 0.6308, "step": 25028 }, { "epoch": 0.7307523867916266, "grad_norm": 0.7122928320806603, "learning_rate": 1.4958637469586375e-06, "loss": 0.6602, "step": 25029 }, { "epoch": 0.7307815830194739, "grad_norm": 0.6806464823510299, "learning_rate": 1.4957015409570155e-06, "loss": 0.5908, "step": 25030 }, { "epoch": 0.7308107792473213, "grad_norm": 0.7569085276173272, "learning_rate": 1.4955393349553935e-06, "loss": 0.7034, "step": 25031 }, { "epoch": 0.7308399754751687, "grad_norm": 0.7393135737192977, "learning_rate": 1.4953771289537713e-06, "loss": 0.6427, "step": 25032 }, { "epoch": 0.730869171703016, "grad_norm": 0.7228629908459733, "learning_rate": 1.4952149229521493e-06, "loss": 0.6565, "step": 25033 }, { "epoch": 0.7308983679308634, "grad_norm": 0.8230583323741336, "learning_rate": 1.4950527169505271e-06, "loss": 0.633, "step": 25034 }, { "epoch": 0.7309275641587107, "grad_norm": 0.7320436799170666, "learning_rate": 1.4948905109489053e-06, "loss": 0.6494, "step": 25035 }, { "epoch": 0.7309567603865581, "grad_norm": 0.6928567465682984, "learning_rate": 1.4947283049472834e-06, "loss": 0.5885, "step": 25036 }, { "epoch": 0.7309859566144055, "grad_norm": 1.682758593304944, "learning_rate": 1.4945660989456611e-06, "loss": 0.6901, "step": 25037 }, { "epoch": 0.7310151528422528, "grad_norm": 0.7413678154647919, "learning_rate": 1.4944038929440392e-06, "loss": 0.695, "step": 25038 }, { "epoch": 0.7310443490701002, "grad_norm": 0.743038118646573, "learning_rate": 1.494241686942417e-06, "loss": 0.6479, "step": 25039 }, { "epoch": 0.7310735452979475, "grad_norm": 0.7997446697948347, "learning_rate": 1.494079480940795e-06, "loss": 0.7368, "step": 25040 }, { "epoch": 0.7311027415257949, "grad_norm": 0.6815230309347151, "learning_rate": 1.4939172749391728e-06, "loss": 0.6151, "step": 25041 }, { "epoch": 0.7311319377536423, "grad_norm": 0.7687123531158676, "learning_rate": 1.4937550689375508e-06, "loss": 0.7285, "step": 25042 }, { "epoch": 0.7311611339814896, "grad_norm": 0.71041973026008, "learning_rate": 1.4935928629359286e-06, "loss": 0.6233, "step": 25043 }, { "epoch": 0.731190330209337, "grad_norm": 0.7553155158503964, "learning_rate": 1.4934306569343066e-06, "loss": 0.7101, "step": 25044 }, { "epoch": 0.7312195264371844, "grad_norm": 0.6858882240805858, "learning_rate": 1.4932684509326848e-06, "loss": 0.5857, "step": 25045 }, { "epoch": 0.7312487226650317, "grad_norm": 0.7124343277616407, "learning_rate": 1.4931062449310626e-06, "loss": 0.647, "step": 25046 }, { "epoch": 0.7312779188928791, "grad_norm": 0.6738135331399429, "learning_rate": 1.4929440389294406e-06, "loss": 0.5628, "step": 25047 }, { "epoch": 0.7313071151207264, "grad_norm": 0.7285830054815191, "learning_rate": 1.4927818329278184e-06, "loss": 0.6075, "step": 25048 }, { "epoch": 0.7313363113485738, "grad_norm": 0.7959862863288089, "learning_rate": 1.4926196269261964e-06, "loss": 0.7281, "step": 25049 }, { "epoch": 0.7313655075764212, "grad_norm": 0.6590473405351156, "learning_rate": 1.4924574209245744e-06, "loss": 0.5616, "step": 25050 }, { "epoch": 0.7313947038042685, "grad_norm": 0.7285055462821713, "learning_rate": 1.4922952149229522e-06, "loss": 0.5926, "step": 25051 }, { "epoch": 0.7314239000321159, "grad_norm": 0.7769075583601353, "learning_rate": 1.4921330089213302e-06, "loss": 0.7099, "step": 25052 }, { "epoch": 0.7314530962599632, "grad_norm": 0.7105574615363796, "learning_rate": 1.491970802919708e-06, "loss": 0.6126, "step": 25053 }, { "epoch": 0.7314822924878106, "grad_norm": 0.6593657597305252, "learning_rate": 1.4918085969180862e-06, "loss": 0.5641, "step": 25054 }, { "epoch": 0.731511488715658, "grad_norm": 0.714667316133545, "learning_rate": 1.4916463909164642e-06, "loss": 0.6114, "step": 25055 }, { "epoch": 0.7315406849435053, "grad_norm": 0.6775638694519361, "learning_rate": 1.491484184914842e-06, "loss": 0.6157, "step": 25056 }, { "epoch": 0.7315698811713527, "grad_norm": 0.7157653306576667, "learning_rate": 1.49132197891322e-06, "loss": 0.6598, "step": 25057 }, { "epoch": 0.7315990773992, "grad_norm": 0.6990725836456716, "learning_rate": 1.4911597729115978e-06, "loss": 0.5824, "step": 25058 }, { "epoch": 0.7316282736270474, "grad_norm": 0.7189320819213391, "learning_rate": 1.4909975669099758e-06, "loss": 0.6097, "step": 25059 }, { "epoch": 0.7316574698548948, "grad_norm": 0.6988544877044879, "learning_rate": 1.4908353609083536e-06, "loss": 0.6056, "step": 25060 }, { "epoch": 0.7316866660827421, "grad_norm": 0.7472989993810089, "learning_rate": 1.4906731549067316e-06, "loss": 0.7009, "step": 25061 }, { "epoch": 0.7317158623105895, "grad_norm": 0.7269607594298583, "learning_rate": 1.4905109489051094e-06, "loss": 0.6779, "step": 25062 }, { "epoch": 0.7317450585384369, "grad_norm": 0.7047405758311344, "learning_rate": 1.4903487429034876e-06, "loss": 0.5349, "step": 25063 }, { "epoch": 0.7317742547662842, "grad_norm": 0.7102506910509495, "learning_rate": 1.4901865369018656e-06, "loss": 0.5993, "step": 25064 }, { "epoch": 0.7318034509941316, "grad_norm": 0.6845159296654525, "learning_rate": 1.4900243309002434e-06, "loss": 0.5618, "step": 25065 }, { "epoch": 0.7318326472219789, "grad_norm": 0.6727497609529633, "learning_rate": 1.4898621248986214e-06, "loss": 0.5783, "step": 25066 }, { "epoch": 0.7318618434498263, "grad_norm": 0.7420213708301653, "learning_rate": 1.4896999188969992e-06, "loss": 0.6425, "step": 25067 }, { "epoch": 0.7318910396776737, "grad_norm": 0.6834018319205657, "learning_rate": 1.4895377128953772e-06, "loss": 0.5578, "step": 25068 }, { "epoch": 0.731920235905521, "grad_norm": 0.7049141513395166, "learning_rate": 1.4893755068937552e-06, "loss": 0.6439, "step": 25069 }, { "epoch": 0.7319494321333684, "grad_norm": 0.7499499547022285, "learning_rate": 1.489213300892133e-06, "loss": 0.6808, "step": 25070 }, { "epoch": 0.7319786283612157, "grad_norm": 0.74182743709995, "learning_rate": 1.489051094890511e-06, "loss": 0.7229, "step": 25071 }, { "epoch": 0.7320078245890631, "grad_norm": 0.6951963842382506, "learning_rate": 1.4888888888888888e-06, "loss": 0.6342, "step": 25072 }, { "epoch": 0.7320370208169105, "grad_norm": 0.6958094743466959, "learning_rate": 1.488726682887267e-06, "loss": 0.5648, "step": 25073 }, { "epoch": 0.7320662170447578, "grad_norm": 0.7099315693422519, "learning_rate": 1.488564476885645e-06, "loss": 0.6183, "step": 25074 }, { "epoch": 0.7320954132726052, "grad_norm": 0.6868672005751378, "learning_rate": 1.4884022708840229e-06, "loss": 0.5764, "step": 25075 }, { "epoch": 0.7321246095004526, "grad_norm": 0.8023325849102807, "learning_rate": 1.4882400648824009e-06, "loss": 0.7602, "step": 25076 }, { "epoch": 0.7321538057282999, "grad_norm": 0.6867039014810963, "learning_rate": 1.4880778588807787e-06, "loss": 0.5647, "step": 25077 }, { "epoch": 0.7321830019561473, "grad_norm": 0.7503172149423663, "learning_rate": 1.4879156528791567e-06, "loss": 0.6917, "step": 25078 }, { "epoch": 0.7322121981839946, "grad_norm": 0.7759321727849142, "learning_rate": 1.4877534468775345e-06, "loss": 0.7152, "step": 25079 }, { "epoch": 0.732241394411842, "grad_norm": 0.70912272711303, "learning_rate": 1.4875912408759125e-06, "loss": 0.6246, "step": 25080 }, { "epoch": 0.7322705906396894, "grad_norm": 0.728837508367394, "learning_rate": 1.4874290348742903e-06, "loss": 0.6493, "step": 25081 }, { "epoch": 0.7322997868675367, "grad_norm": 0.7257484051715068, "learning_rate": 1.4872668288726685e-06, "loss": 0.5984, "step": 25082 }, { "epoch": 0.7323289830953841, "grad_norm": 0.7339341668718576, "learning_rate": 1.4871046228710465e-06, "loss": 0.6487, "step": 25083 }, { "epoch": 0.7323581793232314, "grad_norm": 0.8028155093238621, "learning_rate": 1.4869424168694243e-06, "loss": 0.7266, "step": 25084 }, { "epoch": 0.7323873755510788, "grad_norm": 0.6960944696204362, "learning_rate": 1.4867802108678023e-06, "loss": 0.5901, "step": 25085 }, { "epoch": 0.7324165717789262, "grad_norm": 0.7598652141462474, "learning_rate": 1.48661800486618e-06, "loss": 0.6603, "step": 25086 }, { "epoch": 0.7324457680067735, "grad_norm": 0.7616076096681478, "learning_rate": 1.486455798864558e-06, "loss": 0.6572, "step": 25087 }, { "epoch": 0.7324749642346209, "grad_norm": 0.748172487950983, "learning_rate": 1.4862935928629361e-06, "loss": 0.7027, "step": 25088 }, { "epoch": 0.7325041604624682, "grad_norm": 0.7012890026949895, "learning_rate": 1.486131386861314e-06, "loss": 0.615, "step": 25089 }, { "epoch": 0.7325333566903156, "grad_norm": 0.6908584423534067, "learning_rate": 1.485969180859692e-06, "loss": 0.5855, "step": 25090 }, { "epoch": 0.732562552918163, "grad_norm": 0.6883927483118024, "learning_rate": 1.4858069748580697e-06, "loss": 0.612, "step": 25091 }, { "epoch": 0.7325917491460103, "grad_norm": 0.7057302632974065, "learning_rate": 1.485644768856448e-06, "loss": 0.6434, "step": 25092 }, { "epoch": 0.7326209453738577, "grad_norm": 0.6562136793195367, "learning_rate": 1.485482562854826e-06, "loss": 0.5303, "step": 25093 }, { "epoch": 0.732650141601705, "grad_norm": 0.7094745447240827, "learning_rate": 1.4853203568532037e-06, "loss": 0.5433, "step": 25094 }, { "epoch": 0.7326793378295524, "grad_norm": 0.7059853285617914, "learning_rate": 1.4851581508515817e-06, "loss": 0.5886, "step": 25095 }, { "epoch": 0.7327085340573998, "grad_norm": 0.7360821937388243, "learning_rate": 1.4849959448499595e-06, "loss": 0.7283, "step": 25096 }, { "epoch": 0.7327377302852471, "grad_norm": 0.6800429081643515, "learning_rate": 1.4848337388483375e-06, "loss": 0.5617, "step": 25097 }, { "epoch": 0.7327669265130945, "grad_norm": 0.7406665011005688, "learning_rate": 1.4846715328467153e-06, "loss": 0.6788, "step": 25098 }, { "epoch": 0.7327961227409419, "grad_norm": 0.7605638390613236, "learning_rate": 1.4845093268450933e-06, "loss": 0.7145, "step": 25099 }, { "epoch": 0.7328253189687892, "grad_norm": 0.7105602317636549, "learning_rate": 1.4843471208434711e-06, "loss": 0.6217, "step": 25100 }, { "epoch": 0.7328545151966366, "grad_norm": 0.7374865609463787, "learning_rate": 1.4841849148418493e-06, "loss": 0.7229, "step": 25101 }, { "epoch": 0.7328837114244839, "grad_norm": 0.7543398659553149, "learning_rate": 1.4840227088402274e-06, "loss": 0.6543, "step": 25102 }, { "epoch": 0.7329129076523313, "grad_norm": 0.6444194578172278, "learning_rate": 1.4838605028386051e-06, "loss": 0.5205, "step": 25103 }, { "epoch": 0.7329421038801787, "grad_norm": 0.7401397579140855, "learning_rate": 1.4836982968369832e-06, "loss": 0.6683, "step": 25104 }, { "epoch": 0.732971300108026, "grad_norm": 0.7166866029496749, "learning_rate": 1.483536090835361e-06, "loss": 0.6341, "step": 25105 }, { "epoch": 0.7330004963358734, "grad_norm": 0.7500784856341893, "learning_rate": 1.483373884833739e-06, "loss": 0.7216, "step": 25106 }, { "epoch": 0.7330296925637207, "grad_norm": 0.7319332928017946, "learning_rate": 1.483211678832117e-06, "loss": 0.6685, "step": 25107 }, { "epoch": 0.7330588887915681, "grad_norm": 0.6917728504723527, "learning_rate": 1.4830494728304948e-06, "loss": 0.6009, "step": 25108 }, { "epoch": 0.7330880850194155, "grad_norm": 0.7243293412514253, "learning_rate": 1.4828872668288728e-06, "loss": 0.6237, "step": 25109 }, { "epoch": 0.7331172812472628, "grad_norm": 0.7745282314527026, "learning_rate": 1.4827250608272506e-06, "loss": 0.7386, "step": 25110 }, { "epoch": 0.7331464774751102, "grad_norm": 0.73023960431687, "learning_rate": 1.4825628548256288e-06, "loss": 0.6794, "step": 25111 }, { "epoch": 0.7331756737029576, "grad_norm": 0.8246488552846093, "learning_rate": 1.4824006488240068e-06, "loss": 0.6187, "step": 25112 }, { "epoch": 0.7332048699308049, "grad_norm": 0.7090020804023597, "learning_rate": 1.4822384428223846e-06, "loss": 0.5919, "step": 25113 }, { "epoch": 0.7332340661586523, "grad_norm": 0.7134470580097464, "learning_rate": 1.4820762368207626e-06, "loss": 0.6296, "step": 25114 }, { "epoch": 0.7332632623864996, "grad_norm": 0.7353785848313756, "learning_rate": 1.4819140308191404e-06, "loss": 0.6365, "step": 25115 }, { "epoch": 0.733292458614347, "grad_norm": 0.7606729398633575, "learning_rate": 1.4817518248175184e-06, "loss": 0.6802, "step": 25116 }, { "epoch": 0.7333216548421944, "grad_norm": 0.7093705105868857, "learning_rate": 1.4815896188158962e-06, "loss": 0.6349, "step": 25117 }, { "epoch": 0.7333508510700417, "grad_norm": 0.7133943393940225, "learning_rate": 1.4814274128142742e-06, "loss": 0.6419, "step": 25118 }, { "epoch": 0.7333800472978891, "grad_norm": 0.723306133456772, "learning_rate": 1.481265206812652e-06, "loss": 0.625, "step": 25119 }, { "epoch": 0.7334092435257364, "grad_norm": 0.7015251411670284, "learning_rate": 1.4811030008110302e-06, "loss": 0.6357, "step": 25120 }, { "epoch": 0.7334384397535838, "grad_norm": 0.7248774556020405, "learning_rate": 1.4809407948094082e-06, "loss": 0.6727, "step": 25121 }, { "epoch": 0.7334676359814312, "grad_norm": 0.7465263664670899, "learning_rate": 1.480778588807786e-06, "loss": 0.7075, "step": 25122 }, { "epoch": 0.7334968322092785, "grad_norm": 0.7231869493535124, "learning_rate": 1.480616382806164e-06, "loss": 0.6591, "step": 25123 }, { "epoch": 0.7335260284371259, "grad_norm": 0.7297772130317319, "learning_rate": 1.4804541768045418e-06, "loss": 0.6459, "step": 25124 }, { "epoch": 0.7335552246649732, "grad_norm": 0.7024079767871065, "learning_rate": 1.4802919708029198e-06, "loss": 0.6177, "step": 25125 }, { "epoch": 0.7335844208928206, "grad_norm": 0.7392302859109412, "learning_rate": 1.4801297648012978e-06, "loss": 0.6353, "step": 25126 }, { "epoch": 0.733613617120668, "grad_norm": 0.6894591134991228, "learning_rate": 1.4799675587996756e-06, "loss": 0.6374, "step": 25127 }, { "epoch": 0.7336428133485153, "grad_norm": 0.7110349525688879, "learning_rate": 1.4798053527980536e-06, "loss": 0.5688, "step": 25128 }, { "epoch": 0.7336720095763627, "grad_norm": 0.6572623421796279, "learning_rate": 1.4796431467964314e-06, "loss": 0.502, "step": 25129 }, { "epoch": 0.73370120580421, "grad_norm": 0.7204004752061943, "learning_rate": 1.4794809407948096e-06, "loss": 0.6632, "step": 25130 }, { "epoch": 0.7337304020320574, "grad_norm": 0.6711831285629009, "learning_rate": 1.4793187347931876e-06, "loss": 0.531, "step": 25131 }, { "epoch": 0.7337595982599048, "grad_norm": 0.7772782549287517, "learning_rate": 1.4791565287915654e-06, "loss": 0.6734, "step": 25132 }, { "epoch": 0.7337887944877521, "grad_norm": 0.7179622259051348, "learning_rate": 1.4789943227899434e-06, "loss": 0.6597, "step": 25133 }, { "epoch": 0.7338179907155995, "grad_norm": 0.7736085246324732, "learning_rate": 1.4788321167883212e-06, "loss": 0.7472, "step": 25134 }, { "epoch": 0.7338471869434469, "grad_norm": 0.7368333647463177, "learning_rate": 1.4786699107866992e-06, "loss": 0.64, "step": 25135 }, { "epoch": 0.7338763831712942, "grad_norm": 0.7569116146750914, "learning_rate": 1.478507704785077e-06, "loss": 0.7139, "step": 25136 }, { "epoch": 0.7339055793991416, "grad_norm": 0.7120042762109974, "learning_rate": 1.478345498783455e-06, "loss": 0.6646, "step": 25137 }, { "epoch": 0.7339347756269889, "grad_norm": 0.7469368640652655, "learning_rate": 1.4781832927818328e-06, "loss": 0.6436, "step": 25138 }, { "epoch": 0.7339639718548363, "grad_norm": 0.6869663854627618, "learning_rate": 1.478021086780211e-06, "loss": 0.6417, "step": 25139 }, { "epoch": 0.7339931680826837, "grad_norm": 0.7899148972472624, "learning_rate": 1.477858880778589e-06, "loss": 0.5522, "step": 25140 }, { "epoch": 0.734022364310531, "grad_norm": 0.760417464538125, "learning_rate": 1.4776966747769669e-06, "loss": 0.6871, "step": 25141 }, { "epoch": 0.7340515605383784, "grad_norm": 0.7263741477494065, "learning_rate": 1.4775344687753449e-06, "loss": 0.6178, "step": 25142 }, { "epoch": 0.7340807567662258, "grad_norm": 0.7308836006808423, "learning_rate": 1.4773722627737227e-06, "loss": 0.6396, "step": 25143 }, { "epoch": 0.7341099529940731, "grad_norm": 0.7248536352643541, "learning_rate": 1.4772100567721007e-06, "loss": 0.6256, "step": 25144 }, { "epoch": 0.7341391492219205, "grad_norm": 0.70741755586281, "learning_rate": 1.4770478507704785e-06, "loss": 0.5585, "step": 25145 }, { "epoch": 0.734168345449768, "grad_norm": 0.7148849537686277, "learning_rate": 1.4768856447688565e-06, "loss": 0.6527, "step": 25146 }, { "epoch": 0.7341975416776153, "grad_norm": 0.7755626433386987, "learning_rate": 1.4767234387672345e-06, "loss": 0.6756, "step": 25147 }, { "epoch": 0.7342267379054627, "grad_norm": 0.6923867432557625, "learning_rate": 1.4765612327656125e-06, "loss": 0.5542, "step": 25148 }, { "epoch": 0.73425593413331, "grad_norm": 0.686798680276082, "learning_rate": 1.4763990267639905e-06, "loss": 0.5377, "step": 25149 }, { "epoch": 0.7342851303611574, "grad_norm": 0.7865175392760073, "learning_rate": 1.4762368207623685e-06, "loss": 0.7189, "step": 25150 }, { "epoch": 0.7343143265890048, "grad_norm": 0.7303765927500756, "learning_rate": 1.4760746147607463e-06, "loss": 0.6466, "step": 25151 }, { "epoch": 0.7343435228168521, "grad_norm": 0.7683021851793888, "learning_rate": 1.4759124087591243e-06, "loss": 0.7296, "step": 25152 }, { "epoch": 0.7343727190446995, "grad_norm": 0.7862185144700178, "learning_rate": 1.475750202757502e-06, "loss": 0.7698, "step": 25153 }, { "epoch": 0.7344019152725468, "grad_norm": 0.709772607118005, "learning_rate": 1.4755879967558801e-06, "loss": 0.5912, "step": 25154 }, { "epoch": 0.7344311115003942, "grad_norm": 0.6715883567520762, "learning_rate": 1.475425790754258e-06, "loss": 0.5861, "step": 25155 }, { "epoch": 0.7344603077282416, "grad_norm": 0.7923388131519773, "learning_rate": 1.475263584752636e-06, "loss": 0.6066, "step": 25156 }, { "epoch": 0.7344895039560889, "grad_norm": 0.7329400927504476, "learning_rate": 1.4751013787510137e-06, "loss": 0.6957, "step": 25157 }, { "epoch": 0.7345187001839363, "grad_norm": 0.7863229522758669, "learning_rate": 1.474939172749392e-06, "loss": 0.6109, "step": 25158 }, { "epoch": 0.7345478964117836, "grad_norm": 0.7545254343776802, "learning_rate": 1.47477696674777e-06, "loss": 0.6397, "step": 25159 }, { "epoch": 0.734577092639631, "grad_norm": 0.8192685047369561, "learning_rate": 1.4746147607461477e-06, "loss": 0.6964, "step": 25160 }, { "epoch": 0.7346062888674784, "grad_norm": 0.7271977357547642, "learning_rate": 1.4744525547445257e-06, "loss": 0.643, "step": 25161 }, { "epoch": 0.7346354850953257, "grad_norm": 0.7368141374210567, "learning_rate": 1.4742903487429035e-06, "loss": 0.6751, "step": 25162 }, { "epoch": 0.7346646813231731, "grad_norm": 0.7278587879557337, "learning_rate": 1.4741281427412815e-06, "loss": 0.6535, "step": 25163 }, { "epoch": 0.7346938775510204, "grad_norm": 0.6875774179156235, "learning_rate": 1.4739659367396593e-06, "loss": 0.5947, "step": 25164 }, { "epoch": 0.7347230737788678, "grad_norm": 0.7176803082115013, "learning_rate": 1.4738037307380373e-06, "loss": 0.6288, "step": 25165 }, { "epoch": 0.7347522700067152, "grad_norm": 0.7202622094655675, "learning_rate": 1.4736415247364153e-06, "loss": 0.6691, "step": 25166 }, { "epoch": 0.7347814662345625, "grad_norm": 0.7310075815433459, "learning_rate": 1.4734793187347933e-06, "loss": 0.5605, "step": 25167 }, { "epoch": 0.7348106624624099, "grad_norm": 0.7555034713448715, "learning_rate": 1.4733171127331714e-06, "loss": 0.6623, "step": 25168 }, { "epoch": 0.7348398586902573, "grad_norm": 0.7625999372560688, "learning_rate": 1.4731549067315494e-06, "loss": 0.6178, "step": 25169 }, { "epoch": 0.7348690549181046, "grad_norm": 0.7460371770753234, "learning_rate": 1.4729927007299272e-06, "loss": 0.6342, "step": 25170 }, { "epoch": 0.734898251145952, "grad_norm": 0.768822188208592, "learning_rate": 1.4728304947283052e-06, "loss": 0.6608, "step": 25171 }, { "epoch": 0.7349274473737993, "grad_norm": 0.7540337708582355, "learning_rate": 1.472668288726683e-06, "loss": 0.6873, "step": 25172 }, { "epoch": 0.7349566436016467, "grad_norm": 0.7185415114818364, "learning_rate": 1.472506082725061e-06, "loss": 0.6513, "step": 25173 }, { "epoch": 0.7349858398294941, "grad_norm": 0.7055282781486772, "learning_rate": 1.4723438767234388e-06, "loss": 0.622, "step": 25174 }, { "epoch": 0.7350150360573414, "grad_norm": 0.6749361955048074, "learning_rate": 1.4721816707218168e-06, "loss": 0.5669, "step": 25175 }, { "epoch": 0.7350442322851888, "grad_norm": 0.7867555989019437, "learning_rate": 1.4720194647201946e-06, "loss": 0.723, "step": 25176 }, { "epoch": 0.7350734285130361, "grad_norm": 0.7336608525767113, "learning_rate": 1.4718572587185728e-06, "loss": 0.6692, "step": 25177 }, { "epoch": 0.7351026247408835, "grad_norm": 0.7524320155540687, "learning_rate": 1.4716950527169508e-06, "loss": 0.6754, "step": 25178 }, { "epoch": 0.7351318209687309, "grad_norm": 0.8972661525306086, "learning_rate": 1.4715328467153286e-06, "loss": 0.667, "step": 25179 }, { "epoch": 0.7351610171965782, "grad_norm": 0.8185271541186103, "learning_rate": 1.4713706407137066e-06, "loss": 0.7255, "step": 25180 }, { "epoch": 0.7351902134244256, "grad_norm": 0.7603610524077887, "learning_rate": 1.4712084347120844e-06, "loss": 0.6891, "step": 25181 }, { "epoch": 0.735219409652273, "grad_norm": 0.6906798635755089, "learning_rate": 1.4710462287104624e-06, "loss": 0.61, "step": 25182 }, { "epoch": 0.7352486058801203, "grad_norm": 0.7556842323793813, "learning_rate": 1.4708840227088402e-06, "loss": 0.713, "step": 25183 }, { "epoch": 0.7352778021079677, "grad_norm": 0.6753022171670928, "learning_rate": 1.4707218167072182e-06, "loss": 0.5626, "step": 25184 }, { "epoch": 0.735306998335815, "grad_norm": 0.7642420388033461, "learning_rate": 1.4705596107055962e-06, "loss": 0.6752, "step": 25185 }, { "epoch": 0.7353361945636624, "grad_norm": 0.804100303846822, "learning_rate": 1.4703974047039742e-06, "loss": 0.569, "step": 25186 }, { "epoch": 0.7353653907915098, "grad_norm": 0.7162375674790652, "learning_rate": 1.4702351987023522e-06, "loss": 0.6676, "step": 25187 }, { "epoch": 0.7353945870193571, "grad_norm": 0.8181784190283871, "learning_rate": 1.4700729927007302e-06, "loss": 0.7201, "step": 25188 }, { "epoch": 0.7354237832472045, "grad_norm": 0.7126550137005371, "learning_rate": 1.469910786699108e-06, "loss": 0.6402, "step": 25189 }, { "epoch": 0.7354529794750518, "grad_norm": 0.7106138183396044, "learning_rate": 1.469748580697486e-06, "loss": 0.6188, "step": 25190 }, { "epoch": 0.7354821757028992, "grad_norm": 0.8259452474880096, "learning_rate": 1.4695863746958638e-06, "loss": 0.7016, "step": 25191 }, { "epoch": 0.7355113719307466, "grad_norm": 0.721350179655978, "learning_rate": 1.4694241686942418e-06, "loss": 0.6373, "step": 25192 }, { "epoch": 0.7355405681585939, "grad_norm": 0.7448268512973975, "learning_rate": 1.4692619626926196e-06, "loss": 0.7181, "step": 25193 }, { "epoch": 0.7355697643864413, "grad_norm": 0.6861098850801128, "learning_rate": 1.4690997566909976e-06, "loss": 0.6226, "step": 25194 }, { "epoch": 0.7355989606142886, "grad_norm": 0.91080498770412, "learning_rate": 1.4689375506893754e-06, "loss": 0.6856, "step": 25195 }, { "epoch": 0.735628156842136, "grad_norm": 0.7368233196974541, "learning_rate": 1.4687753446877536e-06, "loss": 0.677, "step": 25196 }, { "epoch": 0.7356573530699834, "grad_norm": 0.7414857174322209, "learning_rate": 1.4686131386861316e-06, "loss": 0.6293, "step": 25197 }, { "epoch": 0.7356865492978307, "grad_norm": 0.7120519902062289, "learning_rate": 1.4684509326845094e-06, "loss": 0.5907, "step": 25198 }, { "epoch": 0.7357157455256781, "grad_norm": 0.7188322285149161, "learning_rate": 1.4682887266828874e-06, "loss": 0.6134, "step": 25199 }, { "epoch": 0.7357449417535254, "grad_norm": 0.7254962268305569, "learning_rate": 1.4681265206812652e-06, "loss": 0.6361, "step": 25200 }, { "epoch": 0.7357741379813728, "grad_norm": 0.8324618235374305, "learning_rate": 1.4679643146796433e-06, "loss": 0.7712, "step": 25201 }, { "epoch": 0.7358033342092202, "grad_norm": 0.7764213402605311, "learning_rate": 1.467802108678021e-06, "loss": 0.7134, "step": 25202 }, { "epoch": 0.7358325304370675, "grad_norm": 0.730056836586693, "learning_rate": 1.467639902676399e-06, "loss": 0.5711, "step": 25203 }, { "epoch": 0.7358617266649149, "grad_norm": 0.7380131911304039, "learning_rate": 1.467477696674777e-06, "loss": 0.6752, "step": 25204 }, { "epoch": 0.7358909228927623, "grad_norm": 0.6890242177431825, "learning_rate": 1.467315490673155e-06, "loss": 0.5902, "step": 25205 }, { "epoch": 0.7359201191206096, "grad_norm": 0.7133249266317876, "learning_rate": 1.467153284671533e-06, "loss": 0.6186, "step": 25206 }, { "epoch": 0.735949315348457, "grad_norm": 0.7577255800009309, "learning_rate": 1.4669910786699109e-06, "loss": 0.626, "step": 25207 }, { "epoch": 0.7359785115763043, "grad_norm": 0.6817689308095136, "learning_rate": 1.4668288726682889e-06, "loss": 0.5662, "step": 25208 }, { "epoch": 0.7360077078041517, "grad_norm": 0.7171548370026635, "learning_rate": 1.4666666666666669e-06, "loss": 0.6105, "step": 25209 }, { "epoch": 0.7360369040319991, "grad_norm": 0.7633914646646155, "learning_rate": 1.4665044606650447e-06, "loss": 0.6847, "step": 25210 }, { "epoch": 0.7360661002598464, "grad_norm": 0.7559659328645761, "learning_rate": 1.4663422546634227e-06, "loss": 0.7001, "step": 25211 }, { "epoch": 0.7360952964876938, "grad_norm": 0.7258340349924927, "learning_rate": 1.4661800486618005e-06, "loss": 0.6934, "step": 25212 }, { "epoch": 0.7361244927155411, "grad_norm": 0.7021292625421198, "learning_rate": 1.4660178426601785e-06, "loss": 0.588, "step": 25213 }, { "epoch": 0.7361536889433885, "grad_norm": 0.740102611519373, "learning_rate": 1.4658556366585567e-06, "loss": 0.718, "step": 25214 }, { "epoch": 0.7361828851712359, "grad_norm": 0.7219561599918592, "learning_rate": 1.4656934306569345e-06, "loss": 0.6257, "step": 25215 }, { "epoch": 0.7362120813990832, "grad_norm": 0.6827474515137438, "learning_rate": 1.4655312246553125e-06, "loss": 0.5751, "step": 25216 }, { "epoch": 0.7362412776269306, "grad_norm": 0.7629314100053125, "learning_rate": 1.4653690186536903e-06, "loss": 0.6739, "step": 25217 }, { "epoch": 0.736270473854778, "grad_norm": 0.7288528629769297, "learning_rate": 1.4652068126520683e-06, "loss": 0.6489, "step": 25218 }, { "epoch": 0.7362996700826253, "grad_norm": 0.7392435623379724, "learning_rate": 1.465044606650446e-06, "loss": 0.6403, "step": 25219 }, { "epoch": 0.7363288663104727, "grad_norm": 0.7112941866862659, "learning_rate": 1.4648824006488241e-06, "loss": 0.6697, "step": 25220 }, { "epoch": 0.73635806253832, "grad_norm": 0.7017121900870196, "learning_rate": 1.464720194647202e-06, "loss": 0.5972, "step": 25221 }, { "epoch": 0.7363872587661674, "grad_norm": 0.7447536953414734, "learning_rate": 1.46455798864558e-06, "loss": 0.6531, "step": 25222 }, { "epoch": 0.7364164549940148, "grad_norm": 0.7396901164324254, "learning_rate": 1.464395782643958e-06, "loss": 0.7217, "step": 25223 }, { "epoch": 0.7364456512218621, "grad_norm": 0.6801052454522731, "learning_rate": 1.464233576642336e-06, "loss": 0.5933, "step": 25224 }, { "epoch": 0.7364748474497095, "grad_norm": 0.7205243265660891, "learning_rate": 1.464071370640714e-06, "loss": 0.6363, "step": 25225 }, { "epoch": 0.7365040436775568, "grad_norm": 0.7182449014273973, "learning_rate": 1.4639091646390917e-06, "loss": 0.6206, "step": 25226 }, { "epoch": 0.7365332399054042, "grad_norm": 0.7267674079336565, "learning_rate": 1.4637469586374697e-06, "loss": 0.6066, "step": 25227 }, { "epoch": 0.7365624361332516, "grad_norm": 0.7253404201498878, "learning_rate": 1.4635847526358477e-06, "loss": 0.6776, "step": 25228 }, { "epoch": 0.7365916323610989, "grad_norm": 0.712645311466041, "learning_rate": 1.4634225466342255e-06, "loss": 0.5935, "step": 25229 }, { "epoch": 0.7366208285889463, "grad_norm": 0.7851674020117256, "learning_rate": 1.4632603406326035e-06, "loss": 0.7634, "step": 25230 }, { "epoch": 0.7366500248167936, "grad_norm": 0.7297180857312467, "learning_rate": 1.4630981346309813e-06, "loss": 0.6422, "step": 25231 }, { "epoch": 0.736679221044641, "grad_norm": 0.7390005039527461, "learning_rate": 1.4629359286293593e-06, "loss": 0.6537, "step": 25232 }, { "epoch": 0.7367084172724884, "grad_norm": 0.6734578030416413, "learning_rate": 1.4627737226277376e-06, "loss": 0.5692, "step": 25233 }, { "epoch": 0.7367376135003357, "grad_norm": 0.6881549253265138, "learning_rate": 1.4626115166261154e-06, "loss": 0.604, "step": 25234 }, { "epoch": 0.7367668097281831, "grad_norm": 0.7551701415584391, "learning_rate": 1.4624493106244934e-06, "loss": 0.6439, "step": 25235 }, { "epoch": 0.7367960059560305, "grad_norm": 0.7512252778257966, "learning_rate": 1.4622871046228712e-06, "loss": 0.6973, "step": 25236 }, { "epoch": 0.7368252021838778, "grad_norm": 0.7327031526457443, "learning_rate": 1.4621248986212492e-06, "loss": 0.6777, "step": 25237 }, { "epoch": 0.7368543984117252, "grad_norm": 0.7057798884710199, "learning_rate": 1.461962692619627e-06, "loss": 0.6493, "step": 25238 }, { "epoch": 0.7368835946395725, "grad_norm": 0.7391321620985637, "learning_rate": 1.461800486618005e-06, "loss": 0.6458, "step": 25239 }, { "epoch": 0.7369127908674199, "grad_norm": 0.6881247418563311, "learning_rate": 1.4616382806163828e-06, "loss": 0.5788, "step": 25240 }, { "epoch": 0.7369419870952673, "grad_norm": 0.8189188480146141, "learning_rate": 1.4614760746147608e-06, "loss": 0.7238, "step": 25241 }, { "epoch": 0.7369711833231146, "grad_norm": 0.7733921983284613, "learning_rate": 1.4613138686131388e-06, "loss": 0.7122, "step": 25242 }, { "epoch": 0.737000379550962, "grad_norm": 0.663692908892606, "learning_rate": 1.4611516626115168e-06, "loss": 0.516, "step": 25243 }, { "epoch": 0.7370295757788093, "grad_norm": 0.7219834497492182, "learning_rate": 1.4609894566098948e-06, "loss": 0.6263, "step": 25244 }, { "epoch": 0.7370587720066567, "grad_norm": 0.650123574709964, "learning_rate": 1.4608272506082726e-06, "loss": 0.5624, "step": 25245 }, { "epoch": 0.7370879682345041, "grad_norm": 0.7693881032097973, "learning_rate": 1.4606650446066506e-06, "loss": 0.7171, "step": 25246 }, { "epoch": 0.7371171644623514, "grad_norm": 0.7047425180620708, "learning_rate": 1.4605028386050286e-06, "loss": 0.6294, "step": 25247 }, { "epoch": 0.7371463606901988, "grad_norm": 0.6861985294208799, "learning_rate": 1.4603406326034064e-06, "loss": 0.6409, "step": 25248 }, { "epoch": 0.7371755569180461, "grad_norm": 0.7364678135106862, "learning_rate": 1.4601784266017844e-06, "loss": 0.6399, "step": 25249 }, { "epoch": 0.7372047531458935, "grad_norm": 0.7337420789758033, "learning_rate": 1.4600162206001622e-06, "loss": 0.6646, "step": 25250 }, { "epoch": 0.7372339493737409, "grad_norm": 0.7327312987659466, "learning_rate": 1.4598540145985402e-06, "loss": 0.6757, "step": 25251 }, { "epoch": 0.7372631456015882, "grad_norm": 0.7381943015701855, "learning_rate": 1.4596918085969184e-06, "loss": 0.6959, "step": 25252 }, { "epoch": 0.7372923418294356, "grad_norm": 0.7274808315572074, "learning_rate": 1.4595296025952962e-06, "loss": 0.6559, "step": 25253 }, { "epoch": 0.737321538057283, "grad_norm": 0.6714190076347674, "learning_rate": 1.4593673965936742e-06, "loss": 0.5241, "step": 25254 }, { "epoch": 0.7373507342851303, "grad_norm": 0.686998722814871, "learning_rate": 1.459205190592052e-06, "loss": 0.5824, "step": 25255 }, { "epoch": 0.7373799305129777, "grad_norm": 0.780426488630675, "learning_rate": 1.45904298459043e-06, "loss": 0.7375, "step": 25256 }, { "epoch": 0.737409126740825, "grad_norm": 0.7453478859957092, "learning_rate": 1.4588807785888078e-06, "loss": 0.6217, "step": 25257 }, { "epoch": 0.7374383229686724, "grad_norm": 0.8307930994112322, "learning_rate": 1.4587185725871858e-06, "loss": 0.7614, "step": 25258 }, { "epoch": 0.7374675191965198, "grad_norm": 0.7482559250346877, "learning_rate": 1.4585563665855636e-06, "loss": 0.6454, "step": 25259 }, { "epoch": 0.7374967154243671, "grad_norm": 0.7300202367596766, "learning_rate": 1.4583941605839416e-06, "loss": 0.6612, "step": 25260 }, { "epoch": 0.7375259116522145, "grad_norm": 0.8015990428489964, "learning_rate": 1.4582319545823196e-06, "loss": 0.7037, "step": 25261 }, { "epoch": 0.7375551078800618, "grad_norm": 0.6913701971202618, "learning_rate": 1.4580697485806976e-06, "loss": 0.5885, "step": 25262 }, { "epoch": 0.7375843041079092, "grad_norm": 0.7668476095279636, "learning_rate": 1.4579075425790756e-06, "loss": 0.66, "step": 25263 }, { "epoch": 0.7376135003357566, "grad_norm": 0.6989018435636954, "learning_rate": 1.4577453365774534e-06, "loss": 0.6261, "step": 25264 }, { "epoch": 0.7376426965636039, "grad_norm": 0.682428857881062, "learning_rate": 1.4575831305758315e-06, "loss": 0.5823, "step": 25265 }, { "epoch": 0.7376718927914513, "grad_norm": 0.7265948136369461, "learning_rate": 1.4574209245742095e-06, "loss": 0.6551, "step": 25266 }, { "epoch": 0.7377010890192988, "grad_norm": 0.7533619790059521, "learning_rate": 1.4572587185725873e-06, "loss": 0.6907, "step": 25267 }, { "epoch": 0.7377302852471461, "grad_norm": 0.8183218812638041, "learning_rate": 1.4570965125709653e-06, "loss": 0.6483, "step": 25268 }, { "epoch": 0.7377594814749935, "grad_norm": 0.7432716381904126, "learning_rate": 1.456934306569343e-06, "loss": 0.621, "step": 25269 }, { "epoch": 0.7377886777028408, "grad_norm": 0.7075270073831634, "learning_rate": 1.456772100567721e-06, "loss": 0.6625, "step": 25270 }, { "epoch": 0.7378178739306882, "grad_norm": 0.7439350166586471, "learning_rate": 1.4566098945660993e-06, "loss": 0.6791, "step": 25271 }, { "epoch": 0.7378470701585356, "grad_norm": 0.7498980180825368, "learning_rate": 1.456447688564477e-06, "loss": 0.6414, "step": 25272 }, { "epoch": 0.7378762663863829, "grad_norm": 0.7317493416186629, "learning_rate": 1.456285482562855e-06, "loss": 0.6385, "step": 25273 }, { "epoch": 0.7379054626142303, "grad_norm": 0.7513785108386798, "learning_rate": 1.4561232765612329e-06, "loss": 0.6802, "step": 25274 }, { "epoch": 0.7379346588420777, "grad_norm": 0.6714750271185652, "learning_rate": 1.4559610705596109e-06, "loss": 0.5702, "step": 25275 }, { "epoch": 0.737963855069925, "grad_norm": 0.6945778044882075, "learning_rate": 1.4557988645579887e-06, "loss": 0.6283, "step": 25276 }, { "epoch": 0.7379930512977724, "grad_norm": 0.7453609717136227, "learning_rate": 1.4556366585563667e-06, "loss": 0.6975, "step": 25277 }, { "epoch": 0.7380222475256197, "grad_norm": 0.740262574508503, "learning_rate": 1.4554744525547445e-06, "loss": 0.6904, "step": 25278 }, { "epoch": 0.7380514437534671, "grad_norm": 0.7057100689772636, "learning_rate": 1.4553122465531225e-06, "loss": 0.6059, "step": 25279 }, { "epoch": 0.7380806399813145, "grad_norm": 0.8521687922918321, "learning_rate": 1.4551500405515003e-06, "loss": 0.6626, "step": 25280 }, { "epoch": 0.7381098362091618, "grad_norm": 0.7554611426551396, "learning_rate": 1.4549878345498785e-06, "loss": 0.6537, "step": 25281 }, { "epoch": 0.7381390324370092, "grad_norm": 0.7595394927716667, "learning_rate": 1.4548256285482565e-06, "loss": 0.6611, "step": 25282 }, { "epoch": 0.7381682286648565, "grad_norm": 0.7652276420007641, "learning_rate": 1.4546634225466343e-06, "loss": 0.7032, "step": 25283 }, { "epoch": 0.7381974248927039, "grad_norm": 0.7313435225893592, "learning_rate": 1.4545012165450123e-06, "loss": 0.6218, "step": 25284 }, { "epoch": 0.7382266211205513, "grad_norm": 0.6969786673027203, "learning_rate": 1.4543390105433903e-06, "loss": 0.5696, "step": 25285 }, { "epoch": 0.7382558173483986, "grad_norm": 0.7173228833886826, "learning_rate": 1.4541768045417681e-06, "loss": 0.6604, "step": 25286 }, { "epoch": 0.738285013576246, "grad_norm": 0.7665797982456392, "learning_rate": 1.4540145985401461e-06, "loss": 0.7106, "step": 25287 }, { "epoch": 0.7383142098040933, "grad_norm": 0.7719979961771765, "learning_rate": 1.453852392538524e-06, "loss": 0.688, "step": 25288 }, { "epoch": 0.7383434060319407, "grad_norm": 0.6876754385241969, "learning_rate": 1.453690186536902e-06, "loss": 0.5562, "step": 25289 }, { "epoch": 0.7383726022597881, "grad_norm": 0.659534390495919, "learning_rate": 1.4535279805352801e-06, "loss": 0.5314, "step": 25290 }, { "epoch": 0.7384017984876354, "grad_norm": 0.7150330638838587, "learning_rate": 1.453365774533658e-06, "loss": 0.6392, "step": 25291 }, { "epoch": 0.7384309947154828, "grad_norm": 0.6871759530666111, "learning_rate": 1.453203568532036e-06, "loss": 0.5987, "step": 25292 }, { "epoch": 0.7384601909433302, "grad_norm": 0.7022806119368107, "learning_rate": 1.4530413625304137e-06, "loss": 0.6152, "step": 25293 }, { "epoch": 0.7384893871711775, "grad_norm": 0.6568392965999938, "learning_rate": 1.4528791565287917e-06, "loss": 0.5373, "step": 25294 }, { "epoch": 0.7385185833990249, "grad_norm": 0.7141478142258656, "learning_rate": 1.4527169505271695e-06, "loss": 0.6259, "step": 25295 }, { "epoch": 0.7385477796268722, "grad_norm": 0.8927530597209451, "learning_rate": 1.4525547445255475e-06, "loss": 0.6675, "step": 25296 }, { "epoch": 0.7385769758547196, "grad_norm": 0.696781371119165, "learning_rate": 1.4523925385239253e-06, "loss": 0.6226, "step": 25297 }, { "epoch": 0.738606172082567, "grad_norm": 0.7171551291532822, "learning_rate": 1.4522303325223033e-06, "loss": 0.6764, "step": 25298 }, { "epoch": 0.7386353683104143, "grad_norm": 0.7416617233272538, "learning_rate": 1.4520681265206816e-06, "loss": 0.6791, "step": 25299 }, { "epoch": 0.7386645645382617, "grad_norm": 0.8430250492843064, "learning_rate": 1.4519059205190594e-06, "loss": 0.651, "step": 25300 }, { "epoch": 0.738693760766109, "grad_norm": 0.7348677898337144, "learning_rate": 1.4517437145174374e-06, "loss": 0.6796, "step": 25301 }, { "epoch": 0.7387229569939564, "grad_norm": 0.6638379513213767, "learning_rate": 1.4515815085158152e-06, "loss": 0.5893, "step": 25302 }, { "epoch": 0.7387521532218038, "grad_norm": 0.7501686008797672, "learning_rate": 1.4514193025141932e-06, "loss": 0.6823, "step": 25303 }, { "epoch": 0.7387813494496511, "grad_norm": 0.706950604655602, "learning_rate": 1.4512570965125712e-06, "loss": 0.6104, "step": 25304 }, { "epoch": 0.7388105456774985, "grad_norm": 0.7071466345759271, "learning_rate": 1.451094890510949e-06, "loss": 0.6237, "step": 25305 }, { "epoch": 0.7388397419053458, "grad_norm": 0.7185442731255948, "learning_rate": 1.450932684509327e-06, "loss": 0.6151, "step": 25306 }, { "epoch": 0.7388689381331932, "grad_norm": 0.6858867474465291, "learning_rate": 1.4507704785077048e-06, "loss": 0.5521, "step": 25307 }, { "epoch": 0.7388981343610406, "grad_norm": 0.7111537274722811, "learning_rate": 1.4506082725060828e-06, "loss": 0.6112, "step": 25308 }, { "epoch": 0.7389273305888879, "grad_norm": 0.694450362607748, "learning_rate": 1.450446066504461e-06, "loss": 0.6186, "step": 25309 }, { "epoch": 0.7389565268167353, "grad_norm": 0.6640378900830604, "learning_rate": 1.4502838605028388e-06, "loss": 0.5436, "step": 25310 }, { "epoch": 0.7389857230445827, "grad_norm": 0.7584327851424796, "learning_rate": 1.4501216545012168e-06, "loss": 0.661, "step": 25311 }, { "epoch": 0.73901491927243, "grad_norm": 0.7251714753701853, "learning_rate": 1.4499594484995946e-06, "loss": 0.651, "step": 25312 }, { "epoch": 0.7390441155002774, "grad_norm": 0.7003236640133913, "learning_rate": 1.4497972424979726e-06, "loss": 0.6475, "step": 25313 }, { "epoch": 0.7390733117281247, "grad_norm": 0.726757085695124, "learning_rate": 1.4496350364963504e-06, "loss": 0.68, "step": 25314 }, { "epoch": 0.7391025079559721, "grad_norm": 0.733894933077135, "learning_rate": 1.4494728304947284e-06, "loss": 0.6826, "step": 25315 }, { "epoch": 0.7391317041838195, "grad_norm": 0.7184879586493893, "learning_rate": 1.4493106244931062e-06, "loss": 0.6225, "step": 25316 }, { "epoch": 0.7391609004116668, "grad_norm": 0.7349361125838236, "learning_rate": 1.4491484184914842e-06, "loss": 0.6348, "step": 25317 }, { "epoch": 0.7391900966395142, "grad_norm": 0.6943635785353106, "learning_rate": 1.4489862124898624e-06, "loss": 0.6058, "step": 25318 }, { "epoch": 0.7392192928673615, "grad_norm": 0.737879216408499, "learning_rate": 1.4488240064882402e-06, "loss": 0.6654, "step": 25319 }, { "epoch": 0.7392484890952089, "grad_norm": 0.7612830791860414, "learning_rate": 1.4486618004866182e-06, "loss": 0.6677, "step": 25320 }, { "epoch": 0.7392776853230563, "grad_norm": 0.6940920190259097, "learning_rate": 1.448499594484996e-06, "loss": 0.6077, "step": 25321 }, { "epoch": 0.7393068815509036, "grad_norm": 0.713234274034363, "learning_rate": 1.448337388483374e-06, "loss": 0.6606, "step": 25322 }, { "epoch": 0.739336077778751, "grad_norm": 0.7347449829608952, "learning_rate": 1.448175182481752e-06, "loss": 0.6494, "step": 25323 }, { "epoch": 0.7393652740065983, "grad_norm": 0.7131486307503819, "learning_rate": 1.4480129764801298e-06, "loss": 0.5938, "step": 25324 }, { "epoch": 0.7393944702344457, "grad_norm": 0.7256136924636484, "learning_rate": 1.4478507704785078e-06, "loss": 0.6272, "step": 25325 }, { "epoch": 0.7394236664622931, "grad_norm": 0.673719269223515, "learning_rate": 1.4476885644768856e-06, "loss": 0.588, "step": 25326 }, { "epoch": 0.7394528626901404, "grad_norm": 0.7181632899531549, "learning_rate": 1.4475263584752636e-06, "loss": 0.6148, "step": 25327 }, { "epoch": 0.7394820589179878, "grad_norm": 0.6892333060202435, "learning_rate": 1.4473641524736419e-06, "loss": 0.5998, "step": 25328 }, { "epoch": 0.7395112551458352, "grad_norm": 0.709234316471141, "learning_rate": 1.4472019464720197e-06, "loss": 0.5902, "step": 25329 }, { "epoch": 0.7395404513736825, "grad_norm": 0.7800106807359666, "learning_rate": 1.4470397404703977e-06, "loss": 0.6579, "step": 25330 }, { "epoch": 0.7395696476015299, "grad_norm": 0.7041430147642483, "learning_rate": 1.4468775344687755e-06, "loss": 0.6085, "step": 25331 }, { "epoch": 0.7395988438293772, "grad_norm": 0.7144965310939816, "learning_rate": 1.4467153284671535e-06, "loss": 0.6305, "step": 25332 }, { "epoch": 0.7396280400572246, "grad_norm": 0.7084691411336668, "learning_rate": 1.4465531224655313e-06, "loss": 0.621, "step": 25333 }, { "epoch": 0.739657236285072, "grad_norm": 0.7123240426168137, "learning_rate": 1.4463909164639093e-06, "loss": 0.6511, "step": 25334 }, { "epoch": 0.7396864325129193, "grad_norm": 0.783302397025019, "learning_rate": 1.446228710462287e-06, "loss": 0.6765, "step": 25335 }, { "epoch": 0.7397156287407667, "grad_norm": 0.7313371830970672, "learning_rate": 1.446066504460665e-06, "loss": 0.6944, "step": 25336 }, { "epoch": 0.739744824968614, "grad_norm": 0.6892036787226055, "learning_rate": 1.4459042984590433e-06, "loss": 0.5797, "step": 25337 }, { "epoch": 0.7397740211964614, "grad_norm": 0.7145742892261485, "learning_rate": 1.445742092457421e-06, "loss": 0.6241, "step": 25338 }, { "epoch": 0.7398032174243088, "grad_norm": 0.7679601052489985, "learning_rate": 1.445579886455799e-06, "loss": 0.6941, "step": 25339 }, { "epoch": 0.7398324136521561, "grad_norm": 0.7857376704131508, "learning_rate": 1.4454176804541769e-06, "loss": 0.6718, "step": 25340 }, { "epoch": 0.7398616098800035, "grad_norm": 0.7471503133846759, "learning_rate": 1.4452554744525549e-06, "loss": 0.7207, "step": 25341 }, { "epoch": 0.7398908061078509, "grad_norm": 0.7414771011251349, "learning_rate": 1.4450932684509327e-06, "loss": 0.673, "step": 25342 }, { "epoch": 0.7399200023356982, "grad_norm": 0.7193821580299568, "learning_rate": 1.4449310624493107e-06, "loss": 0.6545, "step": 25343 }, { "epoch": 0.7399491985635456, "grad_norm": 0.6893217420571323, "learning_rate": 1.4447688564476887e-06, "loss": 0.5313, "step": 25344 }, { "epoch": 0.7399783947913929, "grad_norm": 0.7804305006345439, "learning_rate": 1.4446066504460665e-06, "loss": 0.6054, "step": 25345 }, { "epoch": 0.7400075910192403, "grad_norm": 0.7010617904924892, "learning_rate": 1.4444444444444445e-06, "loss": 0.645, "step": 25346 }, { "epoch": 0.7400367872470877, "grad_norm": 0.6844706625972549, "learning_rate": 1.4442822384428227e-06, "loss": 0.5972, "step": 25347 }, { "epoch": 0.740065983474935, "grad_norm": 0.7433896819407578, "learning_rate": 1.4441200324412005e-06, "loss": 0.6617, "step": 25348 }, { "epoch": 0.7400951797027824, "grad_norm": 0.7693675266204673, "learning_rate": 1.4439578264395785e-06, "loss": 0.7136, "step": 25349 }, { "epoch": 0.7401243759306297, "grad_norm": 0.6941352996526665, "learning_rate": 1.4437956204379563e-06, "loss": 0.6029, "step": 25350 }, { "epoch": 0.7401535721584771, "grad_norm": 0.7224050823050329, "learning_rate": 1.4436334144363343e-06, "loss": 0.5683, "step": 25351 }, { "epoch": 0.7401827683863245, "grad_norm": 0.8984583247999236, "learning_rate": 1.4434712084347121e-06, "loss": 0.6778, "step": 25352 }, { "epoch": 0.7402119646141718, "grad_norm": 0.6519258982376873, "learning_rate": 1.4433090024330901e-06, "loss": 0.5949, "step": 25353 }, { "epoch": 0.7402411608420192, "grad_norm": 0.6993369884112586, "learning_rate": 1.443146796431468e-06, "loss": 0.6161, "step": 25354 }, { "epoch": 0.7402703570698665, "grad_norm": 0.7292450441616566, "learning_rate": 1.442984590429846e-06, "loss": 0.5852, "step": 25355 }, { "epoch": 0.7402995532977139, "grad_norm": 0.7777748524363873, "learning_rate": 1.4428223844282241e-06, "loss": 0.6742, "step": 25356 }, { "epoch": 0.7403287495255613, "grad_norm": 0.7460160320053505, "learning_rate": 1.442660178426602e-06, "loss": 0.6027, "step": 25357 }, { "epoch": 0.7403579457534086, "grad_norm": 0.7194427501019911, "learning_rate": 1.44249797242498e-06, "loss": 0.6106, "step": 25358 }, { "epoch": 0.740387141981256, "grad_norm": 0.718494440102745, "learning_rate": 1.4423357664233577e-06, "loss": 0.6548, "step": 25359 }, { "epoch": 0.7404163382091034, "grad_norm": 0.7011122479193005, "learning_rate": 1.4421735604217357e-06, "loss": 0.6004, "step": 25360 }, { "epoch": 0.7404455344369507, "grad_norm": 0.7554005799150975, "learning_rate": 1.4420113544201135e-06, "loss": 0.6744, "step": 25361 }, { "epoch": 0.7404747306647981, "grad_norm": 0.7105586126232172, "learning_rate": 1.4418491484184915e-06, "loss": 0.6336, "step": 25362 }, { "epoch": 0.7405039268926454, "grad_norm": 0.6984543472084844, "learning_rate": 1.4416869424168696e-06, "loss": 0.5424, "step": 25363 }, { "epoch": 0.7405331231204928, "grad_norm": 0.7237638292139175, "learning_rate": 1.4415247364152473e-06, "loss": 0.6265, "step": 25364 }, { "epoch": 0.7405623193483402, "grad_norm": 0.7006722446028867, "learning_rate": 1.4413625304136256e-06, "loss": 0.5771, "step": 25365 }, { "epoch": 0.7405915155761875, "grad_norm": 0.7430434341386793, "learning_rate": 1.4412003244120036e-06, "loss": 0.637, "step": 25366 }, { "epoch": 0.7406207118040349, "grad_norm": 0.7413414529226746, "learning_rate": 1.4410381184103814e-06, "loss": 0.6306, "step": 25367 }, { "epoch": 0.7406499080318822, "grad_norm": 0.705402631903352, "learning_rate": 1.4408759124087594e-06, "loss": 0.641, "step": 25368 }, { "epoch": 0.7406791042597296, "grad_norm": 0.7068380406934198, "learning_rate": 1.4407137064071372e-06, "loss": 0.5963, "step": 25369 }, { "epoch": 0.740708300487577, "grad_norm": 0.7333277725364215, "learning_rate": 1.4405515004055152e-06, "loss": 0.6633, "step": 25370 }, { "epoch": 0.7407374967154243, "grad_norm": 0.6627165376719676, "learning_rate": 1.440389294403893e-06, "loss": 0.5636, "step": 25371 }, { "epoch": 0.7407666929432717, "grad_norm": 0.7327244090847693, "learning_rate": 1.440227088402271e-06, "loss": 0.6389, "step": 25372 }, { "epoch": 0.740795889171119, "grad_norm": 0.7191011498355313, "learning_rate": 1.4400648824006488e-06, "loss": 0.6007, "step": 25373 }, { "epoch": 0.7408250853989664, "grad_norm": 0.8473092429077252, "learning_rate": 1.4399026763990268e-06, "loss": 0.7124, "step": 25374 }, { "epoch": 0.7408542816268138, "grad_norm": 0.7857318111655771, "learning_rate": 1.439740470397405e-06, "loss": 0.7404, "step": 25375 }, { "epoch": 0.7408834778546611, "grad_norm": 0.9468061795541913, "learning_rate": 1.4395782643957828e-06, "loss": 0.6794, "step": 25376 }, { "epoch": 0.7409126740825085, "grad_norm": 0.7642330497707831, "learning_rate": 1.4394160583941608e-06, "loss": 0.6754, "step": 25377 }, { "epoch": 0.7409418703103559, "grad_norm": 0.7696330159749567, "learning_rate": 1.4392538523925386e-06, "loss": 0.6879, "step": 25378 }, { "epoch": 0.7409710665382032, "grad_norm": 0.7113709352937718, "learning_rate": 1.4390916463909166e-06, "loss": 0.56, "step": 25379 }, { "epoch": 0.7410002627660506, "grad_norm": 0.6931769787500893, "learning_rate": 1.4389294403892944e-06, "loss": 0.6228, "step": 25380 }, { "epoch": 0.7410294589938979, "grad_norm": 0.7278062993611457, "learning_rate": 1.4387672343876724e-06, "loss": 0.6699, "step": 25381 }, { "epoch": 0.7410586552217453, "grad_norm": 0.6883325176639498, "learning_rate": 1.4386050283860504e-06, "loss": 0.6165, "step": 25382 }, { "epoch": 0.7410878514495927, "grad_norm": 0.734295337364543, "learning_rate": 1.4384428223844282e-06, "loss": 0.664, "step": 25383 }, { "epoch": 0.74111704767744, "grad_norm": 0.7051872200678959, "learning_rate": 1.4382806163828064e-06, "loss": 0.6509, "step": 25384 }, { "epoch": 0.7411462439052874, "grad_norm": 0.7287718430978786, "learning_rate": 1.4381184103811842e-06, "loss": 0.6293, "step": 25385 }, { "epoch": 0.7411754401331347, "grad_norm": 0.7646502038506071, "learning_rate": 1.4379562043795622e-06, "loss": 0.6648, "step": 25386 }, { "epoch": 0.7412046363609822, "grad_norm": 0.7053380415117501, "learning_rate": 1.4377939983779402e-06, "loss": 0.6364, "step": 25387 }, { "epoch": 0.7412338325888296, "grad_norm": 0.6947655782481629, "learning_rate": 1.437631792376318e-06, "loss": 0.5876, "step": 25388 }, { "epoch": 0.7412630288166769, "grad_norm": 0.7078502786407502, "learning_rate": 1.437469586374696e-06, "loss": 0.6169, "step": 25389 }, { "epoch": 0.7412922250445243, "grad_norm": 0.7597618724698775, "learning_rate": 1.4373073803730738e-06, "loss": 0.6994, "step": 25390 }, { "epoch": 0.7413214212723717, "grad_norm": 0.7396938087875444, "learning_rate": 1.4371451743714518e-06, "loss": 0.7014, "step": 25391 }, { "epoch": 0.741350617500219, "grad_norm": 0.65158909569837, "learning_rate": 1.4369829683698296e-06, "loss": 0.5404, "step": 25392 }, { "epoch": 0.7413798137280664, "grad_norm": 0.7755409636740823, "learning_rate": 1.4368207623682076e-06, "loss": 0.6847, "step": 25393 }, { "epoch": 0.7414090099559137, "grad_norm": 0.803576241223309, "learning_rate": 1.4366585563665859e-06, "loss": 0.5574, "step": 25394 }, { "epoch": 0.7414382061837611, "grad_norm": 0.745221929448164, "learning_rate": 1.4364963503649637e-06, "loss": 0.6747, "step": 25395 }, { "epoch": 0.7414674024116085, "grad_norm": 0.782385036218716, "learning_rate": 1.4363341443633417e-06, "loss": 0.7395, "step": 25396 }, { "epoch": 0.7414965986394558, "grad_norm": 0.7004676016044645, "learning_rate": 1.4361719383617195e-06, "loss": 0.5675, "step": 25397 }, { "epoch": 0.7415257948673032, "grad_norm": 0.7285726045512987, "learning_rate": 1.4360097323600975e-06, "loss": 0.6336, "step": 25398 }, { "epoch": 0.7415549910951506, "grad_norm": 0.7157415036507653, "learning_rate": 1.4358475263584753e-06, "loss": 0.6147, "step": 25399 }, { "epoch": 0.7415841873229979, "grad_norm": 0.6872579446872134, "learning_rate": 1.4356853203568533e-06, "loss": 0.5575, "step": 25400 }, { "epoch": 0.7416133835508453, "grad_norm": 0.7035279642213942, "learning_rate": 1.4355231143552313e-06, "loss": 0.6622, "step": 25401 }, { "epoch": 0.7416425797786926, "grad_norm": 0.7477106447682685, "learning_rate": 1.435360908353609e-06, "loss": 0.6237, "step": 25402 }, { "epoch": 0.74167177600654, "grad_norm": 0.7744218415453971, "learning_rate": 1.4351987023519873e-06, "loss": 0.668, "step": 25403 }, { "epoch": 0.7417009722343874, "grad_norm": 0.7507053494904505, "learning_rate": 1.435036496350365e-06, "loss": 0.7025, "step": 25404 }, { "epoch": 0.7417301684622347, "grad_norm": 0.7146849452816271, "learning_rate": 1.434874290348743e-06, "loss": 0.6324, "step": 25405 }, { "epoch": 0.7417593646900821, "grad_norm": 0.7232377292343334, "learning_rate": 1.434712084347121e-06, "loss": 0.6504, "step": 25406 }, { "epoch": 0.7417885609179294, "grad_norm": 0.7744607051287008, "learning_rate": 1.4345498783454989e-06, "loss": 0.6773, "step": 25407 }, { "epoch": 0.7418177571457768, "grad_norm": 0.7677137368423989, "learning_rate": 1.434387672343877e-06, "loss": 0.6755, "step": 25408 }, { "epoch": 0.7418469533736242, "grad_norm": 0.6818664843729337, "learning_rate": 1.4342254663422547e-06, "loss": 0.559, "step": 25409 }, { "epoch": 0.7418761496014715, "grad_norm": 0.7651450633953591, "learning_rate": 1.4340632603406327e-06, "loss": 0.6743, "step": 25410 }, { "epoch": 0.7419053458293189, "grad_norm": 0.7393748915613588, "learning_rate": 1.4339010543390105e-06, "loss": 0.651, "step": 25411 }, { "epoch": 0.7419345420571662, "grad_norm": 0.7377381285401203, "learning_rate": 1.4337388483373885e-06, "loss": 0.6851, "step": 25412 }, { "epoch": 0.7419637382850136, "grad_norm": 0.722672848001455, "learning_rate": 1.4335766423357667e-06, "loss": 0.5971, "step": 25413 }, { "epoch": 0.741992934512861, "grad_norm": 0.7378211877923998, "learning_rate": 1.4334144363341445e-06, "loss": 0.6499, "step": 25414 }, { "epoch": 0.7420221307407083, "grad_norm": 0.7681137765740329, "learning_rate": 1.4332522303325225e-06, "loss": 0.6833, "step": 25415 }, { "epoch": 0.7420513269685557, "grad_norm": 0.7008956156761406, "learning_rate": 1.4330900243309003e-06, "loss": 0.5727, "step": 25416 }, { "epoch": 0.742080523196403, "grad_norm": 0.7172784414976389, "learning_rate": 1.4329278183292783e-06, "loss": 0.6098, "step": 25417 }, { "epoch": 0.7421097194242504, "grad_norm": 0.6682126195012449, "learning_rate": 1.4327656123276561e-06, "loss": 0.567, "step": 25418 }, { "epoch": 0.7421389156520978, "grad_norm": 0.7110328505949864, "learning_rate": 1.4326034063260341e-06, "loss": 0.6033, "step": 25419 }, { "epoch": 0.7421681118799451, "grad_norm": 0.7423027581312025, "learning_rate": 1.4324412003244121e-06, "loss": 0.6342, "step": 25420 }, { "epoch": 0.7421973081077925, "grad_norm": 0.7195282704253919, "learning_rate": 1.43227899432279e-06, "loss": 0.6436, "step": 25421 }, { "epoch": 0.7422265043356399, "grad_norm": 0.7212025884329353, "learning_rate": 1.4321167883211681e-06, "loss": 0.6351, "step": 25422 }, { "epoch": 0.7422557005634872, "grad_norm": 0.7190832173134518, "learning_rate": 1.431954582319546e-06, "loss": 0.6232, "step": 25423 }, { "epoch": 0.7422848967913346, "grad_norm": 0.6289366315628303, "learning_rate": 1.431792376317924e-06, "loss": 0.5053, "step": 25424 }, { "epoch": 0.7423140930191819, "grad_norm": 0.6583681882585769, "learning_rate": 1.431630170316302e-06, "loss": 0.55, "step": 25425 }, { "epoch": 0.7423432892470293, "grad_norm": 0.8855766876199268, "learning_rate": 1.4314679643146797e-06, "loss": 0.5926, "step": 25426 }, { "epoch": 0.7423724854748767, "grad_norm": 0.7907571902281323, "learning_rate": 1.4313057583130578e-06, "loss": 0.6262, "step": 25427 }, { "epoch": 0.742401681702724, "grad_norm": 0.6818889447048166, "learning_rate": 1.4311435523114355e-06, "loss": 0.6048, "step": 25428 }, { "epoch": 0.7424308779305714, "grad_norm": 0.6933762167730294, "learning_rate": 1.4309813463098136e-06, "loss": 0.5732, "step": 25429 }, { "epoch": 0.7424600741584187, "grad_norm": 0.7393327333256473, "learning_rate": 1.4308191403081914e-06, "loss": 0.6933, "step": 25430 }, { "epoch": 0.7424892703862661, "grad_norm": 0.7153236257521173, "learning_rate": 1.4306569343065694e-06, "loss": 0.555, "step": 25431 }, { "epoch": 0.7425184666141135, "grad_norm": 0.7243364940057556, "learning_rate": 1.4304947283049476e-06, "loss": 0.6014, "step": 25432 }, { "epoch": 0.7425476628419608, "grad_norm": 0.705914827764359, "learning_rate": 1.4303325223033254e-06, "loss": 0.5933, "step": 25433 }, { "epoch": 0.7425768590698082, "grad_norm": 0.7602772469561215, "learning_rate": 1.4301703163017034e-06, "loss": 0.7045, "step": 25434 }, { "epoch": 0.7426060552976556, "grad_norm": 0.7535305652564357, "learning_rate": 1.4300081103000812e-06, "loss": 0.7138, "step": 25435 }, { "epoch": 0.7426352515255029, "grad_norm": 0.7508243747801966, "learning_rate": 1.4298459042984592e-06, "loss": 0.6821, "step": 25436 }, { "epoch": 0.7426644477533503, "grad_norm": 0.7423044614563821, "learning_rate": 1.429683698296837e-06, "loss": 0.6666, "step": 25437 }, { "epoch": 0.7426936439811976, "grad_norm": 0.7394719428295198, "learning_rate": 1.429521492295215e-06, "loss": 0.67, "step": 25438 }, { "epoch": 0.742722840209045, "grad_norm": 0.8010679270434523, "learning_rate": 1.429359286293593e-06, "loss": 0.6671, "step": 25439 }, { "epoch": 0.7427520364368924, "grad_norm": 0.7218048233596377, "learning_rate": 1.4291970802919708e-06, "loss": 0.5809, "step": 25440 }, { "epoch": 0.7427812326647397, "grad_norm": 0.8017219507638694, "learning_rate": 1.429034874290349e-06, "loss": 0.7197, "step": 25441 }, { "epoch": 0.7428104288925871, "grad_norm": 0.7508877159345475, "learning_rate": 1.4288726682887268e-06, "loss": 0.6669, "step": 25442 }, { "epoch": 0.7428396251204344, "grad_norm": 0.7222521364054637, "learning_rate": 1.4287104622871048e-06, "loss": 0.6466, "step": 25443 }, { "epoch": 0.7428688213482818, "grad_norm": 0.7458324030622463, "learning_rate": 1.4285482562854828e-06, "loss": 0.6652, "step": 25444 }, { "epoch": 0.7428980175761292, "grad_norm": 0.7451227509699844, "learning_rate": 1.4283860502838606e-06, "loss": 0.661, "step": 25445 }, { "epoch": 0.7429272138039765, "grad_norm": 0.7352308610751466, "learning_rate": 1.4282238442822386e-06, "loss": 0.6126, "step": 25446 }, { "epoch": 0.7429564100318239, "grad_norm": 0.6794680892634738, "learning_rate": 1.4280616382806164e-06, "loss": 0.5878, "step": 25447 }, { "epoch": 0.7429856062596712, "grad_norm": 0.6787288856128539, "learning_rate": 1.4278994322789944e-06, "loss": 0.5769, "step": 25448 }, { "epoch": 0.7430148024875186, "grad_norm": 0.9049910177057119, "learning_rate": 1.4277372262773722e-06, "loss": 0.654, "step": 25449 }, { "epoch": 0.743043998715366, "grad_norm": 0.7233930955919218, "learning_rate": 1.4275750202757504e-06, "loss": 0.6018, "step": 25450 }, { "epoch": 0.7430731949432133, "grad_norm": 0.7042285822001378, "learning_rate": 1.4274128142741284e-06, "loss": 0.6111, "step": 25451 }, { "epoch": 0.7431023911710607, "grad_norm": 0.7268568216889181, "learning_rate": 1.4272506082725062e-06, "loss": 0.6134, "step": 25452 }, { "epoch": 0.7431315873989081, "grad_norm": 0.7446932375257058, "learning_rate": 1.4270884022708842e-06, "loss": 0.6914, "step": 25453 }, { "epoch": 0.7431607836267554, "grad_norm": 0.6650367957545834, "learning_rate": 1.426926196269262e-06, "loss": 0.5466, "step": 25454 }, { "epoch": 0.7431899798546028, "grad_norm": 0.7627063994146409, "learning_rate": 1.42676399026764e-06, "loss": 0.7143, "step": 25455 }, { "epoch": 0.7432191760824501, "grad_norm": 0.7537733069291553, "learning_rate": 1.4266017842660178e-06, "loss": 0.6939, "step": 25456 }, { "epoch": 0.7432483723102975, "grad_norm": 0.7399031588179876, "learning_rate": 1.4264395782643958e-06, "loss": 0.6706, "step": 25457 }, { "epoch": 0.7432775685381449, "grad_norm": 0.7253538576202843, "learning_rate": 1.4262773722627736e-06, "loss": 0.6237, "step": 25458 }, { "epoch": 0.7433067647659922, "grad_norm": 0.6812312031120697, "learning_rate": 1.4261151662611516e-06, "loss": 0.549, "step": 25459 }, { "epoch": 0.7433359609938396, "grad_norm": 0.6536198979012997, "learning_rate": 1.4259529602595299e-06, "loss": 0.5245, "step": 25460 }, { "epoch": 0.743365157221687, "grad_norm": 0.706386143080315, "learning_rate": 1.4257907542579077e-06, "loss": 0.6535, "step": 25461 }, { "epoch": 0.7433943534495343, "grad_norm": 0.7657847142590941, "learning_rate": 1.4256285482562857e-06, "loss": 0.6709, "step": 25462 }, { "epoch": 0.7434235496773817, "grad_norm": 0.7156004989604339, "learning_rate": 1.4254663422546637e-06, "loss": 0.6029, "step": 25463 }, { "epoch": 0.743452745905229, "grad_norm": 0.7875834555369553, "learning_rate": 1.4253041362530415e-06, "loss": 0.7128, "step": 25464 }, { "epoch": 0.7434819421330764, "grad_norm": 0.7102279451198549, "learning_rate": 1.4251419302514195e-06, "loss": 0.6278, "step": 25465 }, { "epoch": 0.7435111383609238, "grad_norm": 0.6931791955818414, "learning_rate": 1.4249797242497973e-06, "loss": 0.599, "step": 25466 }, { "epoch": 0.7435403345887711, "grad_norm": 0.6909900947712402, "learning_rate": 1.4248175182481753e-06, "loss": 0.5898, "step": 25467 }, { "epoch": 0.7435695308166185, "grad_norm": 0.7266495415918429, "learning_rate": 1.424655312246553e-06, "loss": 0.6541, "step": 25468 }, { "epoch": 0.7435987270444658, "grad_norm": 0.7410520244612474, "learning_rate": 1.4244931062449313e-06, "loss": 0.6466, "step": 25469 }, { "epoch": 0.7436279232723132, "grad_norm": 0.7192029026434014, "learning_rate": 1.4243309002433093e-06, "loss": 0.654, "step": 25470 }, { "epoch": 0.7436571195001606, "grad_norm": 0.7574713223350464, "learning_rate": 1.424168694241687e-06, "loss": 0.6849, "step": 25471 }, { "epoch": 0.7436863157280079, "grad_norm": 0.7081896740624237, "learning_rate": 1.424006488240065e-06, "loss": 0.6516, "step": 25472 }, { "epoch": 0.7437155119558553, "grad_norm": 0.7361604161848934, "learning_rate": 1.4238442822384429e-06, "loss": 0.6537, "step": 25473 }, { "epoch": 0.7437447081837026, "grad_norm": 0.7674061546824725, "learning_rate": 1.423682076236821e-06, "loss": 0.6605, "step": 25474 }, { "epoch": 0.74377390441155, "grad_norm": 0.7105988485366739, "learning_rate": 1.4235198702351987e-06, "loss": 0.5823, "step": 25475 }, { "epoch": 0.7438031006393974, "grad_norm": 0.7005737051843633, "learning_rate": 1.4233576642335767e-06, "loss": 0.6151, "step": 25476 }, { "epoch": 0.7438322968672447, "grad_norm": 0.6976540179585698, "learning_rate": 1.4231954582319545e-06, "loss": 0.5921, "step": 25477 }, { "epoch": 0.7438614930950921, "grad_norm": 0.7142982837415618, "learning_rate": 1.4230332522303325e-06, "loss": 0.6461, "step": 25478 }, { "epoch": 0.7438906893229394, "grad_norm": 0.7181977777279112, "learning_rate": 1.4228710462287107e-06, "loss": 0.6201, "step": 25479 }, { "epoch": 0.7439198855507868, "grad_norm": 0.7809652705276994, "learning_rate": 1.4227088402270885e-06, "loss": 0.7389, "step": 25480 }, { "epoch": 0.7439490817786342, "grad_norm": 0.7350093207048936, "learning_rate": 1.4225466342254665e-06, "loss": 0.6882, "step": 25481 }, { "epoch": 0.7439782780064815, "grad_norm": 0.7387719850721755, "learning_rate": 1.4223844282238445e-06, "loss": 0.65, "step": 25482 }, { "epoch": 0.7440074742343289, "grad_norm": 0.7414001243779083, "learning_rate": 1.4222222222222223e-06, "loss": 0.6479, "step": 25483 }, { "epoch": 0.7440366704621763, "grad_norm": 0.7732253570428943, "learning_rate": 1.4220600162206003e-06, "loss": 0.6769, "step": 25484 }, { "epoch": 0.7440658666900236, "grad_norm": 0.7673129799563808, "learning_rate": 1.4218978102189781e-06, "loss": 0.632, "step": 25485 }, { "epoch": 0.744095062917871, "grad_norm": 0.7266005856098031, "learning_rate": 1.4217356042173561e-06, "loss": 0.6377, "step": 25486 }, { "epoch": 0.7441242591457183, "grad_norm": 0.7006489638439242, "learning_rate": 1.421573398215734e-06, "loss": 0.5873, "step": 25487 }, { "epoch": 0.7441534553735657, "grad_norm": 0.7464317399413558, "learning_rate": 1.4214111922141121e-06, "loss": 0.6494, "step": 25488 }, { "epoch": 0.7441826516014131, "grad_norm": 0.8503717775657988, "learning_rate": 1.4212489862124902e-06, "loss": 0.6459, "step": 25489 }, { "epoch": 0.7442118478292604, "grad_norm": 0.7347686995965486, "learning_rate": 1.421086780210868e-06, "loss": 0.6274, "step": 25490 }, { "epoch": 0.7442410440571078, "grad_norm": 0.7252497324642977, "learning_rate": 1.420924574209246e-06, "loss": 0.6667, "step": 25491 }, { "epoch": 0.7442702402849551, "grad_norm": 0.7696908331084531, "learning_rate": 1.4207623682076237e-06, "loss": 0.7006, "step": 25492 }, { "epoch": 0.7442994365128025, "grad_norm": 0.823228369756426, "learning_rate": 1.4206001622060018e-06, "loss": 0.73, "step": 25493 }, { "epoch": 0.7443286327406499, "grad_norm": 0.7468491677773641, "learning_rate": 1.4204379562043796e-06, "loss": 0.7206, "step": 25494 }, { "epoch": 0.7443578289684972, "grad_norm": 0.7770181427772632, "learning_rate": 1.4202757502027576e-06, "loss": 0.705, "step": 25495 }, { "epoch": 0.7443870251963446, "grad_norm": 0.7403481952129658, "learning_rate": 1.4201135442011354e-06, "loss": 0.6203, "step": 25496 }, { "epoch": 0.744416221424192, "grad_norm": 0.7223645244421505, "learning_rate": 1.4199513381995134e-06, "loss": 0.6471, "step": 25497 }, { "epoch": 0.7444454176520393, "grad_norm": 0.769064494457601, "learning_rate": 1.4197891321978916e-06, "loss": 0.6496, "step": 25498 }, { "epoch": 0.7444746138798867, "grad_norm": 0.7077551152640751, "learning_rate": 1.4196269261962694e-06, "loss": 0.5942, "step": 25499 }, { "epoch": 0.744503810107734, "grad_norm": 0.75867723779787, "learning_rate": 1.4194647201946474e-06, "loss": 0.6703, "step": 25500 }, { "epoch": 0.7445330063355814, "grad_norm": 0.6771229105629147, "learning_rate": 1.4193025141930254e-06, "loss": 0.5719, "step": 25501 }, { "epoch": 0.7445622025634288, "grad_norm": 0.7403769760545525, "learning_rate": 1.4191403081914032e-06, "loss": 0.6891, "step": 25502 }, { "epoch": 0.7445913987912761, "grad_norm": 0.7275099554334643, "learning_rate": 1.4189781021897812e-06, "loss": 0.6323, "step": 25503 }, { "epoch": 0.7446205950191235, "grad_norm": 0.6743617292838142, "learning_rate": 1.418815896188159e-06, "loss": 0.5393, "step": 25504 }, { "epoch": 0.7446497912469708, "grad_norm": 0.7448945656580775, "learning_rate": 1.418653690186537e-06, "loss": 0.6564, "step": 25505 }, { "epoch": 0.7446789874748182, "grad_norm": 0.7376888504841913, "learning_rate": 1.4184914841849148e-06, "loss": 0.6507, "step": 25506 }, { "epoch": 0.7447081837026656, "grad_norm": 0.7336512316938489, "learning_rate": 1.418329278183293e-06, "loss": 0.5665, "step": 25507 }, { "epoch": 0.744737379930513, "grad_norm": 0.7693586602523723, "learning_rate": 1.418167072181671e-06, "loss": 0.6649, "step": 25508 }, { "epoch": 0.7447665761583604, "grad_norm": 0.6701439924708037, "learning_rate": 1.4180048661800488e-06, "loss": 0.5404, "step": 25509 }, { "epoch": 0.7447957723862078, "grad_norm": 0.760454573276885, "learning_rate": 1.4178426601784268e-06, "loss": 0.6561, "step": 25510 }, { "epoch": 0.7448249686140551, "grad_norm": 0.7834989580187013, "learning_rate": 1.4176804541768046e-06, "loss": 0.6937, "step": 25511 }, { "epoch": 0.7448541648419025, "grad_norm": 0.6833931279112699, "learning_rate": 1.4175182481751826e-06, "loss": 0.5439, "step": 25512 }, { "epoch": 0.7448833610697498, "grad_norm": 0.8343375640700378, "learning_rate": 1.4173560421735604e-06, "loss": 0.6954, "step": 25513 }, { "epoch": 0.7449125572975972, "grad_norm": 0.7662541778149987, "learning_rate": 1.4171938361719384e-06, "loss": 0.71, "step": 25514 }, { "epoch": 0.7449417535254446, "grad_norm": 0.699935913111237, "learning_rate": 1.4170316301703162e-06, "loss": 0.5668, "step": 25515 }, { "epoch": 0.7449709497532919, "grad_norm": 0.7756211679165252, "learning_rate": 1.4168694241686944e-06, "loss": 0.6906, "step": 25516 }, { "epoch": 0.7450001459811393, "grad_norm": 0.6925652392422289, "learning_rate": 1.4167072181670724e-06, "loss": 0.5943, "step": 25517 }, { "epoch": 0.7450293422089866, "grad_norm": 0.7513366751504734, "learning_rate": 1.4165450121654502e-06, "loss": 0.5864, "step": 25518 }, { "epoch": 0.745058538436834, "grad_norm": 0.7042116715523579, "learning_rate": 1.4163828061638282e-06, "loss": 0.6028, "step": 25519 }, { "epoch": 0.7450877346646814, "grad_norm": 0.7458718104240537, "learning_rate": 1.416220600162206e-06, "loss": 0.7282, "step": 25520 }, { "epoch": 0.7451169308925287, "grad_norm": 0.7505269585529781, "learning_rate": 1.416058394160584e-06, "loss": 0.6248, "step": 25521 }, { "epoch": 0.7451461271203761, "grad_norm": 0.6958445374702422, "learning_rate": 1.415896188158962e-06, "loss": 0.645, "step": 25522 }, { "epoch": 0.7451753233482235, "grad_norm": 0.6510034092280217, "learning_rate": 1.4157339821573398e-06, "loss": 0.5635, "step": 25523 }, { "epoch": 0.7452045195760708, "grad_norm": 0.7317056928130531, "learning_rate": 1.4155717761557178e-06, "loss": 0.5761, "step": 25524 }, { "epoch": 0.7452337158039182, "grad_norm": 0.7631563622286667, "learning_rate": 1.4154095701540956e-06, "loss": 0.5821, "step": 25525 }, { "epoch": 0.7452629120317655, "grad_norm": 0.7212708019740319, "learning_rate": 1.4152473641524739e-06, "loss": 0.642, "step": 25526 }, { "epoch": 0.7452921082596129, "grad_norm": 0.7526222888791846, "learning_rate": 1.4150851581508519e-06, "loss": 0.6873, "step": 25527 }, { "epoch": 0.7453213044874603, "grad_norm": 0.7261427449474359, "learning_rate": 1.4149229521492297e-06, "loss": 0.6483, "step": 25528 }, { "epoch": 0.7453505007153076, "grad_norm": 0.6681867343479966, "learning_rate": 1.4147607461476077e-06, "loss": 0.5787, "step": 25529 }, { "epoch": 0.745379696943155, "grad_norm": 0.6713394729496496, "learning_rate": 1.4145985401459855e-06, "loss": 0.5939, "step": 25530 }, { "epoch": 0.7454088931710023, "grad_norm": 0.6736863021416686, "learning_rate": 1.4144363341443635e-06, "loss": 0.5723, "step": 25531 }, { "epoch": 0.7454380893988497, "grad_norm": 0.7312650651287294, "learning_rate": 1.4142741281427413e-06, "loss": 0.6913, "step": 25532 }, { "epoch": 0.7454672856266971, "grad_norm": 0.7554961477412577, "learning_rate": 1.4141119221411193e-06, "loss": 0.68, "step": 25533 }, { "epoch": 0.7454964818545444, "grad_norm": 0.7152104614786706, "learning_rate": 1.413949716139497e-06, "loss": 0.6836, "step": 25534 }, { "epoch": 0.7455256780823918, "grad_norm": 0.7044243028746092, "learning_rate": 1.4137875101378753e-06, "loss": 0.6599, "step": 25535 }, { "epoch": 0.7455548743102391, "grad_norm": 0.7033480156460248, "learning_rate": 1.4136253041362533e-06, "loss": 0.5848, "step": 25536 }, { "epoch": 0.7455840705380865, "grad_norm": 0.7650710557550826, "learning_rate": 1.413463098134631e-06, "loss": 0.679, "step": 25537 }, { "epoch": 0.7456132667659339, "grad_norm": 0.6971016650042017, "learning_rate": 1.413300892133009e-06, "loss": 0.5782, "step": 25538 }, { "epoch": 0.7456424629937812, "grad_norm": 0.7421863747942562, "learning_rate": 1.4131386861313869e-06, "loss": 0.6876, "step": 25539 }, { "epoch": 0.7456716592216286, "grad_norm": 0.8107498016039435, "learning_rate": 1.412976480129765e-06, "loss": 0.6561, "step": 25540 }, { "epoch": 0.745700855449476, "grad_norm": 0.7350568066147374, "learning_rate": 1.412814274128143e-06, "loss": 0.6419, "step": 25541 }, { "epoch": 0.7457300516773233, "grad_norm": 0.7406364938675402, "learning_rate": 1.4126520681265207e-06, "loss": 0.6444, "step": 25542 }, { "epoch": 0.7457592479051707, "grad_norm": 0.7075825582139053, "learning_rate": 1.4124898621248987e-06, "loss": 0.6065, "step": 25543 }, { "epoch": 0.745788444133018, "grad_norm": 0.7542243651076823, "learning_rate": 1.4123276561232765e-06, "loss": 0.682, "step": 25544 }, { "epoch": 0.7458176403608654, "grad_norm": 0.7582229887152497, "learning_rate": 1.4121654501216547e-06, "loss": 0.6606, "step": 25545 }, { "epoch": 0.7458468365887128, "grad_norm": 0.7606533975644837, "learning_rate": 1.4120032441200327e-06, "loss": 0.6896, "step": 25546 }, { "epoch": 0.7458760328165601, "grad_norm": 0.7728746182355387, "learning_rate": 1.4118410381184105e-06, "loss": 0.6618, "step": 25547 }, { "epoch": 0.7459052290444075, "grad_norm": 0.7794308678458413, "learning_rate": 1.4116788321167885e-06, "loss": 0.7747, "step": 25548 }, { "epoch": 0.7459344252722548, "grad_norm": 0.7653375457316082, "learning_rate": 1.4115166261151663e-06, "loss": 0.7063, "step": 25549 }, { "epoch": 0.7459636215001022, "grad_norm": 0.6914072336817466, "learning_rate": 1.4113544201135443e-06, "loss": 0.617, "step": 25550 }, { "epoch": 0.7459928177279496, "grad_norm": 0.7174055472531652, "learning_rate": 1.4111922141119221e-06, "loss": 0.6235, "step": 25551 }, { "epoch": 0.7460220139557969, "grad_norm": 0.7148098488222433, "learning_rate": 1.4110300081103001e-06, "loss": 0.6226, "step": 25552 }, { "epoch": 0.7460512101836443, "grad_norm": 0.7705809957935673, "learning_rate": 1.410867802108678e-06, "loss": 0.6997, "step": 25553 }, { "epoch": 0.7460804064114916, "grad_norm": 0.7799939261543244, "learning_rate": 1.4107055961070561e-06, "loss": 0.5682, "step": 25554 }, { "epoch": 0.746109602639339, "grad_norm": 0.7224103246640429, "learning_rate": 1.4105433901054342e-06, "loss": 0.6009, "step": 25555 }, { "epoch": 0.7461387988671864, "grad_norm": 0.706292498562052, "learning_rate": 1.410381184103812e-06, "loss": 0.6213, "step": 25556 }, { "epoch": 0.7461679950950337, "grad_norm": 0.8053822312525215, "learning_rate": 1.41021897810219e-06, "loss": 0.6679, "step": 25557 }, { "epoch": 0.7461971913228811, "grad_norm": 0.7343778501409349, "learning_rate": 1.4100567721005678e-06, "loss": 0.6332, "step": 25558 }, { "epoch": 0.7462263875507285, "grad_norm": 0.7799756948783014, "learning_rate": 1.4098945660989458e-06, "loss": 0.7405, "step": 25559 }, { "epoch": 0.7462555837785758, "grad_norm": 0.7186191913246924, "learning_rate": 1.4097323600973238e-06, "loss": 0.628, "step": 25560 }, { "epoch": 0.7462847800064232, "grad_norm": 0.7009601142248089, "learning_rate": 1.4095701540957016e-06, "loss": 0.6044, "step": 25561 }, { "epoch": 0.7463139762342705, "grad_norm": 0.7023722700272177, "learning_rate": 1.4094079480940796e-06, "loss": 0.6504, "step": 25562 }, { "epoch": 0.7463431724621179, "grad_norm": 0.7316865519160831, "learning_rate": 1.4092457420924574e-06, "loss": 0.6622, "step": 25563 }, { "epoch": 0.7463723686899653, "grad_norm": 0.7323859388171394, "learning_rate": 1.4090835360908356e-06, "loss": 0.6885, "step": 25564 }, { "epoch": 0.7464015649178126, "grad_norm": 0.7586703345348434, "learning_rate": 1.4089213300892136e-06, "loss": 0.6206, "step": 25565 }, { "epoch": 0.74643076114566, "grad_norm": 0.7957957760655383, "learning_rate": 1.4087591240875914e-06, "loss": 0.6746, "step": 25566 }, { "epoch": 0.7464599573735073, "grad_norm": 0.7188411431240316, "learning_rate": 1.4085969180859694e-06, "loss": 0.6659, "step": 25567 }, { "epoch": 0.7464891536013547, "grad_norm": 0.7019581471580225, "learning_rate": 1.4084347120843472e-06, "loss": 0.5779, "step": 25568 }, { "epoch": 0.7465183498292021, "grad_norm": 0.7017600453956074, "learning_rate": 1.4082725060827252e-06, "loss": 0.6006, "step": 25569 }, { "epoch": 0.7465475460570494, "grad_norm": 0.708614764559601, "learning_rate": 1.408110300081103e-06, "loss": 0.618, "step": 25570 }, { "epoch": 0.7465767422848968, "grad_norm": 0.7194632347545962, "learning_rate": 1.407948094079481e-06, "loss": 0.5918, "step": 25571 }, { "epoch": 0.7466059385127441, "grad_norm": 0.690450369663726, "learning_rate": 1.4077858880778588e-06, "loss": 0.5694, "step": 25572 }, { "epoch": 0.7466351347405915, "grad_norm": 0.7505578144457201, "learning_rate": 1.407623682076237e-06, "loss": 0.6594, "step": 25573 }, { "epoch": 0.7466643309684389, "grad_norm": 0.7417762151499513, "learning_rate": 1.407461476074615e-06, "loss": 0.6747, "step": 25574 }, { "epoch": 0.7466935271962862, "grad_norm": 0.720270097254291, "learning_rate": 1.4072992700729928e-06, "loss": 0.6171, "step": 25575 }, { "epoch": 0.7467227234241336, "grad_norm": 0.6951223765632966, "learning_rate": 1.4071370640713708e-06, "loss": 0.5717, "step": 25576 }, { "epoch": 0.746751919651981, "grad_norm": 0.7597755321220521, "learning_rate": 1.4069748580697486e-06, "loss": 0.619, "step": 25577 }, { "epoch": 0.7467811158798283, "grad_norm": 0.6825617233736587, "learning_rate": 1.4068126520681266e-06, "loss": 0.6107, "step": 25578 }, { "epoch": 0.7468103121076757, "grad_norm": 0.6889122125994079, "learning_rate": 1.4066504460665046e-06, "loss": 0.6179, "step": 25579 }, { "epoch": 0.746839508335523, "grad_norm": 0.7246448416830013, "learning_rate": 1.4064882400648824e-06, "loss": 0.6633, "step": 25580 }, { "epoch": 0.7468687045633704, "grad_norm": 0.7296180060016507, "learning_rate": 1.4063260340632604e-06, "loss": 0.6249, "step": 25581 }, { "epoch": 0.7468979007912178, "grad_norm": 0.7456226597688805, "learning_rate": 1.4061638280616382e-06, "loss": 0.6514, "step": 25582 }, { "epoch": 0.7469270970190651, "grad_norm": 0.6931635005353697, "learning_rate": 1.4060016220600164e-06, "loss": 0.6178, "step": 25583 }, { "epoch": 0.7469562932469125, "grad_norm": 0.763851461231688, "learning_rate": 1.4058394160583944e-06, "loss": 0.6952, "step": 25584 }, { "epoch": 0.7469854894747598, "grad_norm": 0.6427129662170032, "learning_rate": 1.4056772100567722e-06, "loss": 0.5427, "step": 25585 }, { "epoch": 0.7470146857026072, "grad_norm": 0.7046914466486707, "learning_rate": 1.4055150040551502e-06, "loss": 0.6086, "step": 25586 }, { "epoch": 0.7470438819304546, "grad_norm": 0.704793191632489, "learning_rate": 1.405352798053528e-06, "loss": 0.6149, "step": 25587 }, { "epoch": 0.7470730781583019, "grad_norm": 0.7589491105082071, "learning_rate": 1.405190592051906e-06, "loss": 0.6512, "step": 25588 }, { "epoch": 0.7471022743861493, "grad_norm": 0.6991241090792807, "learning_rate": 1.4050283860502838e-06, "loss": 0.5974, "step": 25589 }, { "epoch": 0.7471314706139967, "grad_norm": 0.7069569376087578, "learning_rate": 1.4048661800486619e-06, "loss": 0.5838, "step": 25590 }, { "epoch": 0.747160666841844, "grad_norm": 0.8012989431020895, "learning_rate": 1.4047039740470396e-06, "loss": 0.6454, "step": 25591 }, { "epoch": 0.7471898630696914, "grad_norm": 0.6854375046988365, "learning_rate": 1.4045417680454179e-06, "loss": 0.5894, "step": 25592 }, { "epoch": 0.7472190592975387, "grad_norm": 0.6924982616647724, "learning_rate": 1.4043795620437959e-06, "loss": 0.6045, "step": 25593 }, { "epoch": 0.7472482555253861, "grad_norm": 0.726486535867694, "learning_rate": 1.4042173560421737e-06, "loss": 0.6806, "step": 25594 }, { "epoch": 0.7472774517532335, "grad_norm": 0.6951824919112914, "learning_rate": 1.4040551500405517e-06, "loss": 0.5477, "step": 25595 }, { "epoch": 0.7473066479810808, "grad_norm": 0.7321166081135653, "learning_rate": 1.4038929440389295e-06, "loss": 0.6119, "step": 25596 }, { "epoch": 0.7473358442089282, "grad_norm": 0.7144314369785241, "learning_rate": 1.4037307380373075e-06, "loss": 0.6541, "step": 25597 }, { "epoch": 0.7473650404367755, "grad_norm": 0.7582458541953657, "learning_rate": 1.4035685320356855e-06, "loss": 0.697, "step": 25598 }, { "epoch": 0.7473942366646229, "grad_norm": 0.8051724520999559, "learning_rate": 1.4034063260340633e-06, "loss": 0.737, "step": 25599 }, { "epoch": 0.7474234328924703, "grad_norm": 0.6849606268369636, "learning_rate": 1.4032441200324413e-06, "loss": 0.5864, "step": 25600 }, { "epoch": 0.7474526291203176, "grad_norm": 0.721529175807427, "learning_rate": 1.4030819140308193e-06, "loss": 0.6542, "step": 25601 }, { "epoch": 0.747481825348165, "grad_norm": 0.7724288931214568, "learning_rate": 1.4029197080291973e-06, "loss": 0.6763, "step": 25602 }, { "epoch": 0.7475110215760123, "grad_norm": 0.7320336161083403, "learning_rate": 1.4027575020275753e-06, "loss": 0.7079, "step": 25603 }, { "epoch": 0.7475402178038597, "grad_norm": 0.6996136894263411, "learning_rate": 1.402595296025953e-06, "loss": 0.6056, "step": 25604 }, { "epoch": 0.7475694140317071, "grad_norm": 0.773497345511221, "learning_rate": 1.402433090024331e-06, "loss": 0.6588, "step": 25605 }, { "epoch": 0.7475986102595544, "grad_norm": 0.7052329280601519, "learning_rate": 1.402270884022709e-06, "loss": 0.6403, "step": 25606 }, { "epoch": 0.7476278064874018, "grad_norm": 0.7207014693890049, "learning_rate": 1.402108678021087e-06, "loss": 0.6336, "step": 25607 }, { "epoch": 0.7476570027152492, "grad_norm": 0.6938792062414147, "learning_rate": 1.4019464720194647e-06, "loss": 0.5991, "step": 25608 }, { "epoch": 0.7476861989430965, "grad_norm": 0.7479837752729357, "learning_rate": 1.4017842660178427e-06, "loss": 0.6355, "step": 25609 }, { "epoch": 0.7477153951709439, "grad_norm": 0.7319510270781829, "learning_rate": 1.4016220600162205e-06, "loss": 0.6109, "step": 25610 }, { "epoch": 0.7477445913987912, "grad_norm": 0.69038833193372, "learning_rate": 1.4014598540145987e-06, "loss": 0.6162, "step": 25611 }, { "epoch": 0.7477737876266386, "grad_norm": 0.7492093403794202, "learning_rate": 1.4012976480129767e-06, "loss": 0.7066, "step": 25612 }, { "epoch": 0.747802983854486, "grad_norm": 0.752170803879825, "learning_rate": 1.4011354420113545e-06, "loss": 0.6896, "step": 25613 }, { "epoch": 0.7478321800823333, "grad_norm": 0.7609111680446696, "learning_rate": 1.4009732360097325e-06, "loss": 0.6976, "step": 25614 }, { "epoch": 0.7478613763101807, "grad_norm": 0.6896686622320676, "learning_rate": 1.4008110300081103e-06, "loss": 0.6066, "step": 25615 }, { "epoch": 0.747890572538028, "grad_norm": 0.7124480688720549, "learning_rate": 1.4006488240064883e-06, "loss": 0.6102, "step": 25616 }, { "epoch": 0.7479197687658754, "grad_norm": 0.7158785985688499, "learning_rate": 1.4004866180048663e-06, "loss": 0.6512, "step": 25617 }, { "epoch": 0.7479489649937228, "grad_norm": 0.7198148748809101, "learning_rate": 1.4003244120032441e-06, "loss": 0.6104, "step": 25618 }, { "epoch": 0.7479781612215701, "grad_norm": 0.7294057494098776, "learning_rate": 1.4001622060016221e-06, "loss": 0.6545, "step": 25619 }, { "epoch": 0.7480073574494175, "grad_norm": 0.7015885688273433, "learning_rate": 1.4000000000000001e-06, "loss": 0.6166, "step": 25620 }, { "epoch": 0.7480365536772648, "grad_norm": 0.712189580843764, "learning_rate": 1.3998377939983782e-06, "loss": 0.6101, "step": 25621 }, { "epoch": 0.7480657499051122, "grad_norm": 0.7160262277098646, "learning_rate": 1.3996755879967562e-06, "loss": 0.607, "step": 25622 }, { "epoch": 0.7480949461329596, "grad_norm": 0.7389859747385158, "learning_rate": 1.399513381995134e-06, "loss": 0.6706, "step": 25623 }, { "epoch": 0.7481241423608069, "grad_norm": 0.7457894564449632, "learning_rate": 1.399351175993512e-06, "loss": 0.6433, "step": 25624 }, { "epoch": 0.7481533385886543, "grad_norm": 0.7630248392844986, "learning_rate": 1.3991889699918898e-06, "loss": 0.6805, "step": 25625 }, { "epoch": 0.7481825348165017, "grad_norm": 0.7217983613824784, "learning_rate": 1.3990267639902678e-06, "loss": 0.6676, "step": 25626 }, { "epoch": 0.748211731044349, "grad_norm": 0.7357778028705546, "learning_rate": 1.3988645579886456e-06, "loss": 0.5677, "step": 25627 }, { "epoch": 0.7482409272721965, "grad_norm": 0.6994383107718622, "learning_rate": 1.3987023519870236e-06, "loss": 0.5848, "step": 25628 }, { "epoch": 0.7482701235000438, "grad_norm": 0.7062547182677272, "learning_rate": 1.3985401459854014e-06, "loss": 0.6289, "step": 25629 }, { "epoch": 0.7482993197278912, "grad_norm": 0.77330986705219, "learning_rate": 1.3983779399837796e-06, "loss": 0.6443, "step": 25630 }, { "epoch": 0.7483285159557386, "grad_norm": 0.7539821766117739, "learning_rate": 1.3982157339821576e-06, "loss": 0.6645, "step": 25631 }, { "epoch": 0.7483577121835859, "grad_norm": 0.7312940331857046, "learning_rate": 1.3980535279805354e-06, "loss": 0.6406, "step": 25632 }, { "epoch": 0.7483869084114333, "grad_norm": 0.7105901091411158, "learning_rate": 1.3978913219789134e-06, "loss": 0.6154, "step": 25633 }, { "epoch": 0.7484161046392807, "grad_norm": 0.7867258200408185, "learning_rate": 1.3977291159772912e-06, "loss": 0.7463, "step": 25634 }, { "epoch": 0.748445300867128, "grad_norm": 0.7495074794555715, "learning_rate": 1.3975669099756692e-06, "loss": 0.6086, "step": 25635 }, { "epoch": 0.7484744970949754, "grad_norm": 0.7366456439170782, "learning_rate": 1.3974047039740472e-06, "loss": 0.6741, "step": 25636 }, { "epoch": 0.7485036933228227, "grad_norm": 0.6794728866860597, "learning_rate": 1.397242497972425e-06, "loss": 0.5866, "step": 25637 }, { "epoch": 0.7485328895506701, "grad_norm": 0.7883404222118404, "learning_rate": 1.397080291970803e-06, "loss": 0.7048, "step": 25638 }, { "epoch": 0.7485620857785175, "grad_norm": 0.6777769155457581, "learning_rate": 1.396918085969181e-06, "loss": 0.5758, "step": 25639 }, { "epoch": 0.7485912820063648, "grad_norm": 0.7442325954261552, "learning_rate": 1.396755879967559e-06, "loss": 0.6671, "step": 25640 }, { "epoch": 0.7486204782342122, "grad_norm": 0.7232708685931307, "learning_rate": 1.396593673965937e-06, "loss": 0.6695, "step": 25641 }, { "epoch": 0.7486496744620595, "grad_norm": 0.7208135362319775, "learning_rate": 1.3964314679643148e-06, "loss": 0.6052, "step": 25642 }, { "epoch": 0.7486788706899069, "grad_norm": 0.740112088494973, "learning_rate": 1.3962692619626928e-06, "loss": 0.6966, "step": 25643 }, { "epoch": 0.7487080669177543, "grad_norm": 0.691873150634257, "learning_rate": 1.3961070559610706e-06, "loss": 0.6135, "step": 25644 }, { "epoch": 0.7487372631456016, "grad_norm": 0.7646303528617934, "learning_rate": 1.3959448499594486e-06, "loss": 0.6504, "step": 25645 }, { "epoch": 0.748766459373449, "grad_norm": 0.7501147980498339, "learning_rate": 1.3957826439578264e-06, "loss": 0.6431, "step": 25646 }, { "epoch": 0.7487956556012964, "grad_norm": 0.7707641576954554, "learning_rate": 1.3956204379562044e-06, "loss": 0.7277, "step": 25647 }, { "epoch": 0.7488248518291437, "grad_norm": 0.6425916707010678, "learning_rate": 1.3954582319545822e-06, "loss": 0.5037, "step": 25648 }, { "epoch": 0.7488540480569911, "grad_norm": 0.78006848302197, "learning_rate": 1.3952960259529604e-06, "loss": 0.6648, "step": 25649 }, { "epoch": 0.7488832442848384, "grad_norm": 0.7053994313519429, "learning_rate": 1.3951338199513384e-06, "loss": 0.5927, "step": 25650 }, { "epoch": 0.7489124405126858, "grad_norm": 0.7060102289091837, "learning_rate": 1.3949716139497162e-06, "loss": 0.6118, "step": 25651 }, { "epoch": 0.7489416367405332, "grad_norm": 0.7531043876788492, "learning_rate": 1.3948094079480942e-06, "loss": 0.6778, "step": 25652 }, { "epoch": 0.7489708329683805, "grad_norm": 0.718840170652428, "learning_rate": 1.394647201946472e-06, "loss": 0.6243, "step": 25653 }, { "epoch": 0.7490000291962279, "grad_norm": 0.7487228756891363, "learning_rate": 1.39448499594485e-06, "loss": 0.6384, "step": 25654 }, { "epoch": 0.7490292254240752, "grad_norm": 0.6757476273353832, "learning_rate": 1.3943227899432278e-06, "loss": 0.62, "step": 25655 }, { "epoch": 0.7490584216519226, "grad_norm": 0.7355744632702022, "learning_rate": 1.3941605839416059e-06, "loss": 0.6704, "step": 25656 }, { "epoch": 0.74908761787977, "grad_norm": 0.7795562594454108, "learning_rate": 1.3939983779399839e-06, "loss": 0.7041, "step": 25657 }, { "epoch": 0.7491168141076173, "grad_norm": 0.7074187616321868, "learning_rate": 1.3938361719383619e-06, "loss": 0.5891, "step": 25658 }, { "epoch": 0.7491460103354647, "grad_norm": 0.7462365777520265, "learning_rate": 1.3936739659367399e-06, "loss": 0.6077, "step": 25659 }, { "epoch": 0.749175206563312, "grad_norm": 0.739522103630103, "learning_rate": 1.3935117599351179e-06, "loss": 0.6507, "step": 25660 }, { "epoch": 0.7492044027911594, "grad_norm": 0.7429961666191867, "learning_rate": 1.3933495539334957e-06, "loss": 0.7166, "step": 25661 }, { "epoch": 0.7492335990190068, "grad_norm": 0.7238817352668945, "learning_rate": 1.3931873479318737e-06, "loss": 0.6383, "step": 25662 }, { "epoch": 0.7492627952468541, "grad_norm": 0.7070714291239496, "learning_rate": 1.3930251419302515e-06, "loss": 0.6642, "step": 25663 }, { "epoch": 0.7492919914747015, "grad_norm": 0.7395147163704129, "learning_rate": 1.3928629359286295e-06, "loss": 0.6188, "step": 25664 }, { "epoch": 0.7493211877025489, "grad_norm": 0.714654503683649, "learning_rate": 1.3927007299270073e-06, "loss": 0.6139, "step": 25665 }, { "epoch": 0.7493503839303962, "grad_norm": 0.699086924556549, "learning_rate": 1.3925385239253853e-06, "loss": 0.574, "step": 25666 }, { "epoch": 0.7493795801582436, "grad_norm": 0.6889970764995796, "learning_rate": 1.392376317923763e-06, "loss": 0.6003, "step": 25667 }, { "epoch": 0.7494087763860909, "grad_norm": 0.7108867375625256, "learning_rate": 1.3922141119221413e-06, "loss": 0.6655, "step": 25668 }, { "epoch": 0.7494379726139383, "grad_norm": 0.8617749275925793, "learning_rate": 1.3920519059205193e-06, "loss": 0.8124, "step": 25669 }, { "epoch": 0.7494671688417857, "grad_norm": 0.7324351455542292, "learning_rate": 1.391889699918897e-06, "loss": 0.6331, "step": 25670 }, { "epoch": 0.749496365069633, "grad_norm": 0.7114381271890171, "learning_rate": 1.3917274939172751e-06, "loss": 0.6245, "step": 25671 }, { "epoch": 0.7495255612974804, "grad_norm": 0.6759235055841426, "learning_rate": 1.391565287915653e-06, "loss": 0.5779, "step": 25672 }, { "epoch": 0.7495547575253277, "grad_norm": 0.6990495321473931, "learning_rate": 1.391403081914031e-06, "loss": 0.5669, "step": 25673 }, { "epoch": 0.7495839537531751, "grad_norm": 0.6850460833312634, "learning_rate": 1.3912408759124087e-06, "loss": 0.5964, "step": 25674 }, { "epoch": 0.7496131499810225, "grad_norm": 0.8102385944599284, "learning_rate": 1.3910786699107867e-06, "loss": 0.701, "step": 25675 }, { "epoch": 0.7496423462088698, "grad_norm": 0.7146262883612957, "learning_rate": 1.3909164639091647e-06, "loss": 0.5934, "step": 25676 }, { "epoch": 0.7496715424367172, "grad_norm": 0.7187976488518983, "learning_rate": 1.3907542579075427e-06, "loss": 0.6206, "step": 25677 }, { "epoch": 0.7497007386645645, "grad_norm": 0.6505863511915744, "learning_rate": 1.3905920519059207e-06, "loss": 0.5223, "step": 25678 }, { "epoch": 0.7497299348924119, "grad_norm": 0.720047902239668, "learning_rate": 1.3904298459042987e-06, "loss": 0.6329, "step": 25679 }, { "epoch": 0.7497591311202593, "grad_norm": 0.7309039926285418, "learning_rate": 1.3902676399026765e-06, "loss": 0.65, "step": 25680 }, { "epoch": 0.7497883273481066, "grad_norm": 0.7282119498093536, "learning_rate": 1.3901054339010545e-06, "loss": 0.5925, "step": 25681 }, { "epoch": 0.749817523575954, "grad_norm": 0.7221436054335736, "learning_rate": 1.3899432278994323e-06, "loss": 0.6021, "step": 25682 }, { "epoch": 0.7498467198038014, "grad_norm": 0.7266984569397014, "learning_rate": 1.3897810218978103e-06, "loss": 0.6211, "step": 25683 }, { "epoch": 0.7498759160316487, "grad_norm": 0.738140193316802, "learning_rate": 1.3896188158961881e-06, "loss": 0.6279, "step": 25684 }, { "epoch": 0.7499051122594961, "grad_norm": 0.7164143299552964, "learning_rate": 1.3894566098945661e-06, "loss": 0.6582, "step": 25685 }, { "epoch": 0.7499343084873434, "grad_norm": 0.7414341218026269, "learning_rate": 1.3892944038929444e-06, "loss": 0.6983, "step": 25686 }, { "epoch": 0.7499635047151908, "grad_norm": 0.7396967033662557, "learning_rate": 1.3891321978913222e-06, "loss": 0.5515, "step": 25687 }, { "epoch": 0.7499927009430382, "grad_norm": 0.7319984337351462, "learning_rate": 1.3889699918897002e-06, "loss": 0.6485, "step": 25688 }, { "epoch": 0.7500218971708855, "grad_norm": 0.7099566316514203, "learning_rate": 1.388807785888078e-06, "loss": 0.6113, "step": 25689 }, { "epoch": 0.7500510933987329, "grad_norm": 0.7007105091628487, "learning_rate": 1.388645579886456e-06, "loss": 0.6364, "step": 25690 }, { "epoch": 0.7500802896265802, "grad_norm": 0.7601378283795963, "learning_rate": 1.3884833738848338e-06, "loss": 0.6861, "step": 25691 }, { "epoch": 0.7501094858544276, "grad_norm": 0.8002438176216914, "learning_rate": 1.3883211678832118e-06, "loss": 0.709, "step": 25692 }, { "epoch": 0.750138682082275, "grad_norm": 0.7736236312771241, "learning_rate": 1.3881589618815896e-06, "loss": 0.7103, "step": 25693 }, { "epoch": 0.7501678783101223, "grad_norm": 0.7141985958465183, "learning_rate": 1.3879967558799676e-06, "loss": 0.6411, "step": 25694 }, { "epoch": 0.7501970745379697, "grad_norm": 0.7051609351978751, "learning_rate": 1.3878345498783456e-06, "loss": 0.6535, "step": 25695 }, { "epoch": 0.750226270765817, "grad_norm": 0.7567368429453099, "learning_rate": 1.3876723438767236e-06, "loss": 0.6399, "step": 25696 }, { "epoch": 0.7502554669936644, "grad_norm": 0.7687681324391032, "learning_rate": 1.3875101378751016e-06, "loss": 0.6631, "step": 25697 }, { "epoch": 0.7502846632215118, "grad_norm": 0.7429624063588957, "learning_rate": 1.3873479318734796e-06, "loss": 0.7031, "step": 25698 }, { "epoch": 0.7503138594493591, "grad_norm": 0.705253650257408, "learning_rate": 1.3871857258718574e-06, "loss": 0.6212, "step": 25699 }, { "epoch": 0.7503430556772065, "grad_norm": 0.706385139152981, "learning_rate": 1.3870235198702354e-06, "loss": 0.6368, "step": 25700 }, { "epoch": 0.7503722519050539, "grad_norm": 0.7282735266644768, "learning_rate": 1.3868613138686132e-06, "loss": 0.6376, "step": 25701 }, { "epoch": 0.7504014481329012, "grad_norm": 0.7617978578206049, "learning_rate": 1.3866991078669912e-06, "loss": 0.6812, "step": 25702 }, { "epoch": 0.7504306443607486, "grad_norm": 0.7156669135680455, "learning_rate": 1.386536901865369e-06, "loss": 0.5742, "step": 25703 }, { "epoch": 0.7504598405885959, "grad_norm": 0.7255694014479657, "learning_rate": 1.386374695863747e-06, "loss": 0.611, "step": 25704 }, { "epoch": 0.7504890368164433, "grad_norm": 0.7126697386472194, "learning_rate": 1.3862124898621252e-06, "loss": 0.6424, "step": 25705 }, { "epoch": 0.7505182330442907, "grad_norm": 0.7707625200348761, "learning_rate": 1.386050283860503e-06, "loss": 0.7238, "step": 25706 }, { "epoch": 0.750547429272138, "grad_norm": 0.7034773932528146, "learning_rate": 1.385888077858881e-06, "loss": 0.6032, "step": 25707 }, { "epoch": 0.7505766254999854, "grad_norm": 0.7285542461864134, "learning_rate": 1.3857258718572588e-06, "loss": 0.6242, "step": 25708 }, { "epoch": 0.7506058217278327, "grad_norm": 0.7092698242608704, "learning_rate": 1.3855636658556368e-06, "loss": 0.6567, "step": 25709 }, { "epoch": 0.7506350179556801, "grad_norm": 0.685735922570298, "learning_rate": 1.3854014598540146e-06, "loss": 0.5819, "step": 25710 }, { "epoch": 0.7506642141835275, "grad_norm": 0.6778964893943091, "learning_rate": 1.3852392538523926e-06, "loss": 0.5696, "step": 25711 }, { "epoch": 0.7506934104113748, "grad_norm": 0.7183252018179089, "learning_rate": 1.3850770478507704e-06, "loss": 0.6374, "step": 25712 }, { "epoch": 0.7507226066392222, "grad_norm": 0.679193328654645, "learning_rate": 1.3849148418491484e-06, "loss": 0.5955, "step": 25713 }, { "epoch": 0.7507518028670696, "grad_norm": 0.8295392895817586, "learning_rate": 1.3847526358475264e-06, "loss": 0.8104, "step": 25714 }, { "epoch": 0.7507809990949169, "grad_norm": 0.7294331075555532, "learning_rate": 1.3845904298459044e-06, "loss": 0.6263, "step": 25715 }, { "epoch": 0.7508101953227643, "grad_norm": 0.9340744910549456, "learning_rate": 1.3844282238442824e-06, "loss": 0.6251, "step": 25716 }, { "epoch": 0.7508393915506116, "grad_norm": 0.652749141121287, "learning_rate": 1.3842660178426602e-06, "loss": 0.5673, "step": 25717 }, { "epoch": 0.750868587778459, "grad_norm": 0.69411057378857, "learning_rate": 1.3841038118410383e-06, "loss": 0.5948, "step": 25718 }, { "epoch": 0.7508977840063064, "grad_norm": 0.7463525031565397, "learning_rate": 1.3839416058394163e-06, "loss": 0.643, "step": 25719 }, { "epoch": 0.7509269802341537, "grad_norm": 0.7426949756138922, "learning_rate": 1.383779399837794e-06, "loss": 0.6596, "step": 25720 }, { "epoch": 0.7509561764620011, "grad_norm": 0.728447274403226, "learning_rate": 1.383617193836172e-06, "loss": 0.6333, "step": 25721 }, { "epoch": 0.7509853726898484, "grad_norm": 0.7666903125384471, "learning_rate": 1.3834549878345499e-06, "loss": 0.7524, "step": 25722 }, { "epoch": 0.7510145689176958, "grad_norm": 0.6719968580467334, "learning_rate": 1.3832927818329279e-06, "loss": 0.5392, "step": 25723 }, { "epoch": 0.7510437651455432, "grad_norm": 0.7387979208250014, "learning_rate": 1.383130575831306e-06, "loss": 0.6343, "step": 25724 }, { "epoch": 0.7510729613733905, "grad_norm": 0.8303356470212162, "learning_rate": 1.3829683698296839e-06, "loss": 0.7228, "step": 25725 }, { "epoch": 0.7511021576012379, "grad_norm": 0.7470779375751676, "learning_rate": 1.3828061638280619e-06, "loss": 0.6776, "step": 25726 }, { "epoch": 0.7511313538290852, "grad_norm": 0.7163965783480216, "learning_rate": 1.3826439578264397e-06, "loss": 0.6403, "step": 25727 }, { "epoch": 0.7511605500569326, "grad_norm": 0.7642370897787576, "learning_rate": 1.3824817518248177e-06, "loss": 0.7448, "step": 25728 }, { "epoch": 0.75118974628478, "grad_norm": 0.7276281014651595, "learning_rate": 1.3823195458231955e-06, "loss": 0.6235, "step": 25729 }, { "epoch": 0.7512189425126273, "grad_norm": 0.7183256316440852, "learning_rate": 1.3821573398215735e-06, "loss": 0.6427, "step": 25730 }, { "epoch": 0.7512481387404747, "grad_norm": 0.7750718455102348, "learning_rate": 1.3819951338199513e-06, "loss": 0.7239, "step": 25731 }, { "epoch": 0.751277334968322, "grad_norm": 0.7532944013957006, "learning_rate": 1.3818329278183293e-06, "loss": 0.6865, "step": 25732 }, { "epoch": 0.7513065311961694, "grad_norm": 0.7000068228556517, "learning_rate": 1.3816707218167073e-06, "loss": 0.5945, "step": 25733 }, { "epoch": 0.7513357274240168, "grad_norm": 0.7456996583176546, "learning_rate": 1.3815085158150853e-06, "loss": 0.6291, "step": 25734 }, { "epoch": 0.7513649236518641, "grad_norm": 0.6870290728915694, "learning_rate": 1.3813463098134633e-06, "loss": 0.608, "step": 25735 }, { "epoch": 0.7513941198797115, "grad_norm": 0.7443242555308238, "learning_rate": 1.381184103811841e-06, "loss": 0.6387, "step": 25736 }, { "epoch": 0.7514233161075589, "grad_norm": 0.7178577626978464, "learning_rate": 1.3810218978102191e-06, "loss": 0.6496, "step": 25737 }, { "epoch": 0.7514525123354062, "grad_norm": 0.7779062588978863, "learning_rate": 1.3808596918085971e-06, "loss": 0.7248, "step": 25738 }, { "epoch": 0.7514817085632536, "grad_norm": 0.6959123280163148, "learning_rate": 1.380697485806975e-06, "loss": 0.6056, "step": 25739 }, { "epoch": 0.7515109047911009, "grad_norm": 0.7670278079988322, "learning_rate": 1.380535279805353e-06, "loss": 0.6825, "step": 25740 }, { "epoch": 0.7515401010189483, "grad_norm": 0.6936464273422944, "learning_rate": 1.3803730738037307e-06, "loss": 0.6231, "step": 25741 }, { "epoch": 0.7515692972467957, "grad_norm": 0.6797213500243728, "learning_rate": 1.3802108678021087e-06, "loss": 0.6227, "step": 25742 }, { "epoch": 0.751598493474643, "grad_norm": 0.6971934492254027, "learning_rate": 1.380048661800487e-06, "loss": 0.6006, "step": 25743 }, { "epoch": 0.7516276897024904, "grad_norm": 0.807492377319189, "learning_rate": 1.3798864557988647e-06, "loss": 0.7473, "step": 25744 }, { "epoch": 0.7516568859303377, "grad_norm": 0.7407543624286083, "learning_rate": 1.3797242497972427e-06, "loss": 0.6278, "step": 25745 }, { "epoch": 0.7516860821581851, "grad_norm": 0.6925071992322704, "learning_rate": 1.3795620437956205e-06, "loss": 0.6131, "step": 25746 }, { "epoch": 0.7517152783860325, "grad_norm": 0.6403134300257141, "learning_rate": 1.3793998377939985e-06, "loss": 0.5042, "step": 25747 }, { "epoch": 0.7517444746138798, "grad_norm": 0.7727683314522924, "learning_rate": 1.3792376317923763e-06, "loss": 0.7125, "step": 25748 }, { "epoch": 0.7517736708417273, "grad_norm": 0.7620006820757798, "learning_rate": 1.3790754257907543e-06, "loss": 0.7004, "step": 25749 }, { "epoch": 0.7518028670695747, "grad_norm": 0.7363015968642804, "learning_rate": 1.3789132197891321e-06, "loss": 0.624, "step": 25750 }, { "epoch": 0.751832063297422, "grad_norm": 0.7063902481113894, "learning_rate": 1.3787510137875101e-06, "loss": 0.5927, "step": 25751 }, { "epoch": 0.7518612595252694, "grad_norm": 0.782996448849716, "learning_rate": 1.3785888077858884e-06, "loss": 0.6858, "step": 25752 }, { "epoch": 0.7518904557531167, "grad_norm": 0.7714592991662471, "learning_rate": 1.3784266017842662e-06, "loss": 0.6712, "step": 25753 }, { "epoch": 0.7519196519809641, "grad_norm": 0.7565059125244811, "learning_rate": 1.3782643957826442e-06, "loss": 0.6467, "step": 25754 }, { "epoch": 0.7519488482088115, "grad_norm": 0.6677103388640937, "learning_rate": 1.378102189781022e-06, "loss": 0.5437, "step": 25755 }, { "epoch": 0.7519780444366588, "grad_norm": 0.759052727759154, "learning_rate": 1.3779399837794e-06, "loss": 0.6736, "step": 25756 }, { "epoch": 0.7520072406645062, "grad_norm": 0.7141396445180965, "learning_rate": 1.377777777777778e-06, "loss": 0.6121, "step": 25757 }, { "epoch": 0.7520364368923536, "grad_norm": 0.7204833525161359, "learning_rate": 1.3776155717761558e-06, "loss": 0.6831, "step": 25758 }, { "epoch": 0.7520656331202009, "grad_norm": 0.7475792205488514, "learning_rate": 1.3774533657745338e-06, "loss": 0.6303, "step": 25759 }, { "epoch": 0.7520948293480483, "grad_norm": 0.9715458807250512, "learning_rate": 1.3772911597729116e-06, "loss": 0.7513, "step": 25760 }, { "epoch": 0.7521240255758956, "grad_norm": 0.8081918444319721, "learning_rate": 1.3771289537712896e-06, "loss": 0.6898, "step": 25761 }, { "epoch": 0.752153221803743, "grad_norm": 0.6837507396229117, "learning_rate": 1.3769667477696678e-06, "loss": 0.5615, "step": 25762 }, { "epoch": 0.7521824180315904, "grad_norm": 0.7208074913460875, "learning_rate": 1.3768045417680456e-06, "loss": 0.6264, "step": 25763 }, { "epoch": 0.7522116142594377, "grad_norm": 0.7376637969544775, "learning_rate": 1.3766423357664236e-06, "loss": 0.6912, "step": 25764 }, { "epoch": 0.7522408104872851, "grad_norm": 0.7418030145769895, "learning_rate": 1.3764801297648014e-06, "loss": 0.6436, "step": 25765 }, { "epoch": 0.7522700067151324, "grad_norm": 0.7479768541069478, "learning_rate": 1.3763179237631794e-06, "loss": 0.6581, "step": 25766 }, { "epoch": 0.7522992029429798, "grad_norm": 0.7213082033798628, "learning_rate": 1.3761557177615572e-06, "loss": 0.6519, "step": 25767 }, { "epoch": 0.7523283991708272, "grad_norm": 0.7270548922154083, "learning_rate": 1.3759935117599352e-06, "loss": 0.6619, "step": 25768 }, { "epoch": 0.7523575953986745, "grad_norm": 0.7497281991056943, "learning_rate": 1.375831305758313e-06, "loss": 0.6424, "step": 25769 }, { "epoch": 0.7523867916265219, "grad_norm": 0.7587716078312867, "learning_rate": 1.375669099756691e-06, "loss": 0.6974, "step": 25770 }, { "epoch": 0.7524159878543693, "grad_norm": 0.6936146693776163, "learning_rate": 1.3755068937550692e-06, "loss": 0.5995, "step": 25771 }, { "epoch": 0.7524451840822166, "grad_norm": 0.7962036056300889, "learning_rate": 1.375344687753447e-06, "loss": 0.7099, "step": 25772 }, { "epoch": 0.752474380310064, "grad_norm": 0.7013229958561272, "learning_rate": 1.375182481751825e-06, "loss": 0.6695, "step": 25773 }, { "epoch": 0.7525035765379113, "grad_norm": 0.8062351939602445, "learning_rate": 1.3750202757502028e-06, "loss": 0.7402, "step": 25774 }, { "epoch": 0.7525327727657587, "grad_norm": 0.7854302709817542, "learning_rate": 1.3748580697485808e-06, "loss": 0.6953, "step": 25775 }, { "epoch": 0.7525619689936061, "grad_norm": 0.6948051735794569, "learning_rate": 1.3746958637469588e-06, "loss": 0.5447, "step": 25776 }, { "epoch": 0.7525911652214534, "grad_norm": 0.736291192285709, "learning_rate": 1.3745336577453366e-06, "loss": 0.5981, "step": 25777 }, { "epoch": 0.7526203614493008, "grad_norm": 0.7485415110185949, "learning_rate": 1.3743714517437146e-06, "loss": 0.6654, "step": 25778 }, { "epoch": 0.7526495576771481, "grad_norm": 0.7400673572244579, "learning_rate": 1.3742092457420924e-06, "loss": 0.6602, "step": 25779 }, { "epoch": 0.7526787539049955, "grad_norm": 0.7359158336284918, "learning_rate": 1.3740470397404704e-06, "loss": 0.6354, "step": 25780 }, { "epoch": 0.7527079501328429, "grad_norm": 0.7466128484989598, "learning_rate": 1.3738848337388487e-06, "loss": 0.614, "step": 25781 }, { "epoch": 0.7527371463606902, "grad_norm": 0.8178153391411932, "learning_rate": 1.3737226277372265e-06, "loss": 0.624, "step": 25782 }, { "epoch": 0.7527663425885376, "grad_norm": 0.8226999142694124, "learning_rate": 1.3735604217356045e-06, "loss": 0.6658, "step": 25783 }, { "epoch": 0.752795538816385, "grad_norm": 0.6921399784980184, "learning_rate": 1.3733982157339823e-06, "loss": 0.5962, "step": 25784 }, { "epoch": 0.7528247350442323, "grad_norm": 0.7279286303128708, "learning_rate": 1.3732360097323603e-06, "loss": 0.6312, "step": 25785 }, { "epoch": 0.7528539312720797, "grad_norm": 0.7680977367832783, "learning_rate": 1.373073803730738e-06, "loss": 0.6535, "step": 25786 }, { "epoch": 0.752883127499927, "grad_norm": 0.7075726776024633, "learning_rate": 1.372911597729116e-06, "loss": 0.6477, "step": 25787 }, { "epoch": 0.7529123237277744, "grad_norm": 0.6793801951498978, "learning_rate": 1.3727493917274939e-06, "loss": 0.574, "step": 25788 }, { "epoch": 0.7529415199556218, "grad_norm": 0.8122989446717671, "learning_rate": 1.3725871857258719e-06, "loss": 0.6682, "step": 25789 }, { "epoch": 0.7529707161834691, "grad_norm": 0.778520766746399, "learning_rate": 1.37242497972425e-06, "loss": 0.7063, "step": 25790 }, { "epoch": 0.7529999124113165, "grad_norm": 0.6892476575000371, "learning_rate": 1.3722627737226279e-06, "loss": 0.5771, "step": 25791 }, { "epoch": 0.7530291086391638, "grad_norm": 0.7699378429190976, "learning_rate": 1.3721005677210059e-06, "loss": 0.6071, "step": 25792 }, { "epoch": 0.7530583048670112, "grad_norm": 0.7249102561602815, "learning_rate": 1.3719383617193837e-06, "loss": 0.5915, "step": 25793 }, { "epoch": 0.7530875010948586, "grad_norm": 0.6945038854085781, "learning_rate": 1.3717761557177617e-06, "loss": 0.5911, "step": 25794 }, { "epoch": 0.7531166973227059, "grad_norm": 0.7030168817117934, "learning_rate": 1.3716139497161397e-06, "loss": 0.6179, "step": 25795 }, { "epoch": 0.7531458935505533, "grad_norm": 0.7024993696093179, "learning_rate": 1.3714517437145175e-06, "loss": 0.6163, "step": 25796 }, { "epoch": 0.7531750897784006, "grad_norm": 0.7233288370663179, "learning_rate": 1.3712895377128955e-06, "loss": 0.6662, "step": 25797 }, { "epoch": 0.753204286006248, "grad_norm": 0.7355592381181456, "learning_rate": 1.3711273317112733e-06, "loss": 0.6481, "step": 25798 }, { "epoch": 0.7532334822340954, "grad_norm": 0.679041482067449, "learning_rate": 1.3709651257096513e-06, "loss": 0.5802, "step": 25799 }, { "epoch": 0.7532626784619427, "grad_norm": 0.7438197883237689, "learning_rate": 1.3708029197080295e-06, "loss": 0.6567, "step": 25800 }, { "epoch": 0.7532918746897901, "grad_norm": 0.6741759234151675, "learning_rate": 1.3706407137064073e-06, "loss": 0.5934, "step": 25801 }, { "epoch": 0.7533210709176374, "grad_norm": 0.7385954559467729, "learning_rate": 1.3704785077047853e-06, "loss": 0.6896, "step": 25802 }, { "epoch": 0.7533502671454848, "grad_norm": 0.7217233435109308, "learning_rate": 1.3703163017031631e-06, "loss": 0.5916, "step": 25803 }, { "epoch": 0.7533794633733322, "grad_norm": 0.7950590140569429, "learning_rate": 1.3701540957015411e-06, "loss": 0.7161, "step": 25804 }, { "epoch": 0.7534086596011795, "grad_norm": 0.7954250983043653, "learning_rate": 1.369991889699919e-06, "loss": 0.7002, "step": 25805 }, { "epoch": 0.7534378558290269, "grad_norm": 0.762446812385985, "learning_rate": 1.369829683698297e-06, "loss": 0.713, "step": 25806 }, { "epoch": 0.7534670520568743, "grad_norm": 0.7305039570706576, "learning_rate": 1.3696674776966747e-06, "loss": 0.65, "step": 25807 }, { "epoch": 0.7534962482847216, "grad_norm": 0.7089175714704449, "learning_rate": 1.3695052716950527e-06, "loss": 0.6156, "step": 25808 }, { "epoch": 0.753525444512569, "grad_norm": 0.6996119942825436, "learning_rate": 1.369343065693431e-06, "loss": 0.6164, "step": 25809 }, { "epoch": 0.7535546407404163, "grad_norm": 0.7588882756188695, "learning_rate": 1.3691808596918087e-06, "loss": 0.6894, "step": 25810 }, { "epoch": 0.7535838369682637, "grad_norm": 0.884702927287606, "learning_rate": 1.3690186536901867e-06, "loss": 0.6745, "step": 25811 }, { "epoch": 0.7536130331961111, "grad_norm": 0.698323160196583, "learning_rate": 1.3688564476885645e-06, "loss": 0.6172, "step": 25812 }, { "epoch": 0.7536422294239584, "grad_norm": 0.7021025031101333, "learning_rate": 1.3686942416869425e-06, "loss": 0.6296, "step": 25813 }, { "epoch": 0.7536714256518058, "grad_norm": 0.731376640131788, "learning_rate": 1.3685320356853206e-06, "loss": 0.6552, "step": 25814 }, { "epoch": 0.7537006218796531, "grad_norm": 0.6660250482382778, "learning_rate": 1.3683698296836983e-06, "loss": 0.559, "step": 25815 }, { "epoch": 0.7537298181075005, "grad_norm": 0.6805341469328091, "learning_rate": 1.3682076236820764e-06, "loss": 0.5893, "step": 25816 }, { "epoch": 0.7537590143353479, "grad_norm": 0.6978916132322722, "learning_rate": 1.3680454176804541e-06, "loss": 0.5833, "step": 25817 }, { "epoch": 0.7537882105631952, "grad_norm": 0.6601127709553928, "learning_rate": 1.3678832116788322e-06, "loss": 0.5229, "step": 25818 }, { "epoch": 0.7538174067910426, "grad_norm": 0.7936544792155601, "learning_rate": 1.3677210056772104e-06, "loss": 0.7122, "step": 25819 }, { "epoch": 0.75384660301889, "grad_norm": 0.730408299873922, "learning_rate": 1.3675587996755882e-06, "loss": 0.6359, "step": 25820 }, { "epoch": 0.7538757992467373, "grad_norm": 0.6702275919261146, "learning_rate": 1.3673965936739662e-06, "loss": 0.5537, "step": 25821 }, { "epoch": 0.7539049954745847, "grad_norm": 0.6955626234991582, "learning_rate": 1.367234387672344e-06, "loss": 0.6004, "step": 25822 }, { "epoch": 0.753934191702432, "grad_norm": 0.733132590830051, "learning_rate": 1.367072181670722e-06, "loss": 0.6028, "step": 25823 }, { "epoch": 0.7539633879302794, "grad_norm": 0.6854718405289199, "learning_rate": 1.3669099756690998e-06, "loss": 0.5916, "step": 25824 }, { "epoch": 0.7539925841581268, "grad_norm": 0.7127856597308821, "learning_rate": 1.3667477696674778e-06, "loss": 0.6224, "step": 25825 }, { "epoch": 0.7540217803859741, "grad_norm": 0.6986352500669271, "learning_rate": 1.3665855636658556e-06, "loss": 0.6034, "step": 25826 }, { "epoch": 0.7540509766138215, "grad_norm": 0.7481308849829721, "learning_rate": 1.3664233576642336e-06, "loss": 0.6424, "step": 25827 }, { "epoch": 0.7540801728416688, "grad_norm": 0.7097127248678828, "learning_rate": 1.3662611516626118e-06, "loss": 0.6187, "step": 25828 }, { "epoch": 0.7541093690695162, "grad_norm": 0.7124379920022921, "learning_rate": 1.3660989456609896e-06, "loss": 0.6239, "step": 25829 }, { "epoch": 0.7541385652973636, "grad_norm": 0.7993069615054375, "learning_rate": 1.3659367396593676e-06, "loss": 0.6822, "step": 25830 }, { "epoch": 0.7541677615252109, "grad_norm": 0.7133960284117531, "learning_rate": 1.3657745336577454e-06, "loss": 0.6276, "step": 25831 }, { "epoch": 0.7541969577530583, "grad_norm": 0.7165155389468434, "learning_rate": 1.3656123276561234e-06, "loss": 0.6043, "step": 25832 }, { "epoch": 0.7542261539809056, "grad_norm": 0.7313993485289345, "learning_rate": 1.3654501216545014e-06, "loss": 0.6329, "step": 25833 }, { "epoch": 0.754255350208753, "grad_norm": 0.7497045451467931, "learning_rate": 1.3652879156528792e-06, "loss": 0.6916, "step": 25834 }, { "epoch": 0.7542845464366004, "grad_norm": 0.7521875291149841, "learning_rate": 1.3651257096512572e-06, "loss": 0.728, "step": 25835 }, { "epoch": 0.7543137426644477, "grad_norm": 0.7508561297078262, "learning_rate": 1.364963503649635e-06, "loss": 0.6908, "step": 25836 }, { "epoch": 0.7543429388922951, "grad_norm": 0.6502705923058484, "learning_rate": 1.3648012976480132e-06, "loss": 0.5149, "step": 25837 }, { "epoch": 0.7543721351201425, "grad_norm": 0.7007736199766819, "learning_rate": 1.3646390916463912e-06, "loss": 0.6314, "step": 25838 }, { "epoch": 0.7544013313479898, "grad_norm": 0.7315737800311367, "learning_rate": 1.364476885644769e-06, "loss": 0.641, "step": 25839 }, { "epoch": 0.7544305275758372, "grad_norm": 0.7475827483118894, "learning_rate": 1.364314679643147e-06, "loss": 0.6302, "step": 25840 }, { "epoch": 0.7544597238036845, "grad_norm": 0.7335485710288548, "learning_rate": 1.3641524736415248e-06, "loss": 0.6678, "step": 25841 }, { "epoch": 0.7544889200315319, "grad_norm": 0.7466318272014144, "learning_rate": 1.3639902676399028e-06, "loss": 0.6643, "step": 25842 }, { "epoch": 0.7545181162593793, "grad_norm": 0.7465293645380885, "learning_rate": 1.3638280616382806e-06, "loss": 0.6831, "step": 25843 }, { "epoch": 0.7545473124872266, "grad_norm": 0.73331414461591, "learning_rate": 1.3636658556366586e-06, "loss": 0.6642, "step": 25844 }, { "epoch": 0.754576508715074, "grad_norm": 0.7289472265961375, "learning_rate": 1.3635036496350364e-06, "loss": 0.5249, "step": 25845 }, { "epoch": 0.7546057049429213, "grad_norm": 0.7287902977778907, "learning_rate": 1.3633414436334144e-06, "loss": 0.6523, "step": 25846 }, { "epoch": 0.7546349011707687, "grad_norm": 0.7043221864738463, "learning_rate": 1.3631792376317927e-06, "loss": 0.5889, "step": 25847 }, { "epoch": 0.7546640973986161, "grad_norm": 0.6941132431014208, "learning_rate": 1.3630170316301705e-06, "loss": 0.5662, "step": 25848 }, { "epoch": 0.7546932936264634, "grad_norm": 0.7249434675227607, "learning_rate": 1.3628548256285485e-06, "loss": 0.6051, "step": 25849 }, { "epoch": 0.7547224898543108, "grad_norm": 0.6744863514325251, "learning_rate": 1.3626926196269263e-06, "loss": 0.5709, "step": 25850 }, { "epoch": 0.7547516860821581, "grad_norm": 0.7851585858829239, "learning_rate": 1.3625304136253043e-06, "loss": 0.6896, "step": 25851 }, { "epoch": 0.7547808823100055, "grad_norm": 0.7550411830980559, "learning_rate": 1.362368207623682e-06, "loss": 0.5768, "step": 25852 }, { "epoch": 0.7548100785378529, "grad_norm": 0.7278284073341045, "learning_rate": 1.36220600162206e-06, "loss": 0.6444, "step": 25853 }, { "epoch": 0.7548392747657002, "grad_norm": 0.6685160315442252, "learning_rate": 1.362043795620438e-06, "loss": 0.6231, "step": 25854 }, { "epoch": 0.7548684709935476, "grad_norm": 0.7284394102924646, "learning_rate": 1.3618815896188159e-06, "loss": 0.69, "step": 25855 }, { "epoch": 0.754897667221395, "grad_norm": 0.7330718597981881, "learning_rate": 1.361719383617194e-06, "loss": 0.6267, "step": 25856 }, { "epoch": 0.7549268634492423, "grad_norm": 0.8037692299488358, "learning_rate": 1.361557177615572e-06, "loss": 0.7725, "step": 25857 }, { "epoch": 0.7549560596770897, "grad_norm": 0.8938845841086922, "learning_rate": 1.3613949716139499e-06, "loss": 0.5762, "step": 25858 }, { "epoch": 0.754985255904937, "grad_norm": 0.7641648631788794, "learning_rate": 1.3612327656123279e-06, "loss": 0.7021, "step": 25859 }, { "epoch": 0.7550144521327844, "grad_norm": 0.7520060390469835, "learning_rate": 1.3610705596107057e-06, "loss": 0.6637, "step": 25860 }, { "epoch": 0.7550436483606318, "grad_norm": 0.78807986618264, "learning_rate": 1.3609083536090837e-06, "loss": 0.7116, "step": 25861 }, { "epoch": 0.7550728445884791, "grad_norm": 0.7385256015510485, "learning_rate": 1.3607461476074615e-06, "loss": 0.692, "step": 25862 }, { "epoch": 0.7551020408163265, "grad_norm": 0.6659929038686693, "learning_rate": 1.3605839416058395e-06, "loss": 0.5495, "step": 25863 }, { "epoch": 0.7551312370441738, "grad_norm": 0.6904024043832773, "learning_rate": 1.3604217356042173e-06, "loss": 0.5918, "step": 25864 }, { "epoch": 0.7551604332720212, "grad_norm": 0.732813337302866, "learning_rate": 1.3602595296025953e-06, "loss": 0.6617, "step": 25865 }, { "epoch": 0.7551896294998686, "grad_norm": 0.7281033062587269, "learning_rate": 1.3600973236009735e-06, "loss": 0.6177, "step": 25866 }, { "epoch": 0.7552188257277159, "grad_norm": 0.7615586272202157, "learning_rate": 1.3599351175993513e-06, "loss": 0.7009, "step": 25867 }, { "epoch": 0.7552480219555633, "grad_norm": 0.7239883046658788, "learning_rate": 1.3597729115977293e-06, "loss": 0.6223, "step": 25868 }, { "epoch": 0.7552772181834106, "grad_norm": 0.7225933292550429, "learning_rate": 1.3596107055961071e-06, "loss": 0.613, "step": 25869 }, { "epoch": 0.7553064144112581, "grad_norm": 0.6591347611890668, "learning_rate": 1.3594484995944851e-06, "loss": 0.5445, "step": 25870 }, { "epoch": 0.7553356106391055, "grad_norm": 0.7349996939801945, "learning_rate": 1.359286293592863e-06, "loss": 0.6653, "step": 25871 }, { "epoch": 0.7553648068669528, "grad_norm": 0.7094341524337967, "learning_rate": 1.359124087591241e-06, "loss": 0.6407, "step": 25872 }, { "epoch": 0.7553940030948002, "grad_norm": 0.6894466766352413, "learning_rate": 1.358961881589619e-06, "loss": 0.5599, "step": 25873 }, { "epoch": 0.7554231993226476, "grad_norm": 0.6911594406600652, "learning_rate": 1.3587996755879967e-06, "loss": 0.549, "step": 25874 }, { "epoch": 0.7554523955504949, "grad_norm": 0.7108638772766863, "learning_rate": 1.358637469586375e-06, "loss": 0.6526, "step": 25875 }, { "epoch": 0.7554815917783423, "grad_norm": 0.7246771674490955, "learning_rate": 1.358475263584753e-06, "loss": 0.6399, "step": 25876 }, { "epoch": 0.7555107880061896, "grad_norm": 0.6886059181991188, "learning_rate": 1.3583130575831307e-06, "loss": 0.6049, "step": 25877 }, { "epoch": 0.755539984234037, "grad_norm": 0.752377459659788, "learning_rate": 1.3581508515815088e-06, "loss": 0.6273, "step": 25878 }, { "epoch": 0.7555691804618844, "grad_norm": 1.167178869150513, "learning_rate": 1.3579886455798865e-06, "loss": 0.6667, "step": 25879 }, { "epoch": 0.7555983766897317, "grad_norm": 0.6995466212148689, "learning_rate": 1.3578264395782646e-06, "loss": 0.6074, "step": 25880 }, { "epoch": 0.7556275729175791, "grad_norm": 0.7734707487734794, "learning_rate": 1.3576642335766423e-06, "loss": 0.6258, "step": 25881 }, { "epoch": 0.7556567691454265, "grad_norm": 0.6330304923393297, "learning_rate": 1.3575020275750204e-06, "loss": 0.5473, "step": 25882 }, { "epoch": 0.7556859653732738, "grad_norm": 0.8664980574760354, "learning_rate": 1.3573398215733981e-06, "loss": 0.6163, "step": 25883 }, { "epoch": 0.7557151616011212, "grad_norm": 0.6982190119286682, "learning_rate": 1.3571776155717762e-06, "loss": 0.6229, "step": 25884 }, { "epoch": 0.7557443578289685, "grad_norm": 0.7761424534526039, "learning_rate": 1.3570154095701544e-06, "loss": 0.668, "step": 25885 }, { "epoch": 0.7557735540568159, "grad_norm": 0.7000281251055256, "learning_rate": 1.3568532035685322e-06, "loss": 0.5983, "step": 25886 }, { "epoch": 0.7558027502846633, "grad_norm": 0.6807690832894119, "learning_rate": 1.3566909975669102e-06, "loss": 0.608, "step": 25887 }, { "epoch": 0.7558319465125106, "grad_norm": 0.7510428393238929, "learning_rate": 1.356528791565288e-06, "loss": 0.6833, "step": 25888 }, { "epoch": 0.755861142740358, "grad_norm": 0.7279670985723491, "learning_rate": 1.356366585563666e-06, "loss": 0.6722, "step": 25889 }, { "epoch": 0.7558903389682053, "grad_norm": 0.6923154650404415, "learning_rate": 1.3562043795620438e-06, "loss": 0.5752, "step": 25890 }, { "epoch": 0.7559195351960527, "grad_norm": 0.7190824737138931, "learning_rate": 1.3560421735604218e-06, "loss": 0.6264, "step": 25891 }, { "epoch": 0.7559487314239001, "grad_norm": 0.6904704641062915, "learning_rate": 1.3558799675587998e-06, "loss": 0.5795, "step": 25892 }, { "epoch": 0.7559779276517474, "grad_norm": 0.6732653214819647, "learning_rate": 1.3557177615571776e-06, "loss": 0.5668, "step": 25893 }, { "epoch": 0.7560071238795948, "grad_norm": 0.7225102265193207, "learning_rate": 1.3555555555555558e-06, "loss": 0.6449, "step": 25894 }, { "epoch": 0.7560363201074422, "grad_norm": 0.9581826222442399, "learning_rate": 1.3553933495539338e-06, "loss": 0.6741, "step": 25895 }, { "epoch": 0.7560655163352895, "grad_norm": 0.7298966767757368, "learning_rate": 1.3552311435523116e-06, "loss": 0.6757, "step": 25896 }, { "epoch": 0.7560947125631369, "grad_norm": 0.6894346449234635, "learning_rate": 1.3550689375506896e-06, "loss": 0.607, "step": 25897 }, { "epoch": 0.7561239087909842, "grad_norm": 0.7611316686761159, "learning_rate": 1.3549067315490674e-06, "loss": 0.7062, "step": 25898 }, { "epoch": 0.7561531050188316, "grad_norm": 0.7057389410223002, "learning_rate": 1.3547445255474454e-06, "loss": 0.5393, "step": 25899 }, { "epoch": 0.756182301246679, "grad_norm": 0.7168143982142797, "learning_rate": 1.3545823195458232e-06, "loss": 0.6538, "step": 25900 }, { "epoch": 0.7562114974745263, "grad_norm": 0.710302887550636, "learning_rate": 1.3544201135442012e-06, "loss": 0.6111, "step": 25901 }, { "epoch": 0.7562406937023737, "grad_norm": 0.7892506388684698, "learning_rate": 1.354257907542579e-06, "loss": 0.6175, "step": 25902 }, { "epoch": 0.756269889930221, "grad_norm": 0.6620965401611425, "learning_rate": 1.3540957015409572e-06, "loss": 0.554, "step": 25903 }, { "epoch": 0.7562990861580684, "grad_norm": 0.7192240997342302, "learning_rate": 1.3539334955393352e-06, "loss": 0.6354, "step": 25904 }, { "epoch": 0.7563282823859158, "grad_norm": 0.7422148463449356, "learning_rate": 1.353771289537713e-06, "loss": 0.7062, "step": 25905 }, { "epoch": 0.7563574786137631, "grad_norm": 0.7346880635401816, "learning_rate": 1.353609083536091e-06, "loss": 0.6357, "step": 25906 }, { "epoch": 0.7563866748416105, "grad_norm": 0.7375241590451231, "learning_rate": 1.3534468775344688e-06, "loss": 0.6633, "step": 25907 }, { "epoch": 0.7564158710694578, "grad_norm": 0.745927899039656, "learning_rate": 1.3532846715328468e-06, "loss": 0.668, "step": 25908 }, { "epoch": 0.7564450672973052, "grad_norm": 0.6691110646344753, "learning_rate": 1.3531224655312246e-06, "loss": 0.5803, "step": 25909 }, { "epoch": 0.7564742635251526, "grad_norm": 0.6999445288526195, "learning_rate": 1.3529602595296026e-06, "loss": 0.6209, "step": 25910 }, { "epoch": 0.7565034597529999, "grad_norm": 0.7239642075422048, "learning_rate": 1.3527980535279806e-06, "loss": 0.6598, "step": 25911 }, { "epoch": 0.7565326559808473, "grad_norm": 0.7172920419805969, "learning_rate": 1.3526358475263584e-06, "loss": 0.6096, "step": 25912 }, { "epoch": 0.7565618522086947, "grad_norm": 0.7576367602459506, "learning_rate": 1.3524736415247367e-06, "loss": 0.6636, "step": 25913 }, { "epoch": 0.756591048436542, "grad_norm": 0.7466419844836251, "learning_rate": 1.3523114355231145e-06, "loss": 0.6677, "step": 25914 }, { "epoch": 0.7566202446643894, "grad_norm": 0.7742531528108536, "learning_rate": 1.3521492295214925e-06, "loss": 0.7221, "step": 25915 }, { "epoch": 0.7566494408922367, "grad_norm": 0.7271789557757766, "learning_rate": 1.3519870235198705e-06, "loss": 0.5849, "step": 25916 }, { "epoch": 0.7566786371200841, "grad_norm": 0.7294240248672667, "learning_rate": 1.3518248175182483e-06, "loss": 0.648, "step": 25917 }, { "epoch": 0.7567078333479315, "grad_norm": 0.6866789592922143, "learning_rate": 1.3516626115166263e-06, "loss": 0.5819, "step": 25918 }, { "epoch": 0.7567370295757788, "grad_norm": 0.7582605081699801, "learning_rate": 1.351500405515004e-06, "loss": 0.727, "step": 25919 }, { "epoch": 0.7567662258036262, "grad_norm": 0.7055625702043787, "learning_rate": 1.351338199513382e-06, "loss": 0.6063, "step": 25920 }, { "epoch": 0.7567954220314735, "grad_norm": 0.716825234861274, "learning_rate": 1.3511759935117599e-06, "loss": 0.6079, "step": 25921 }, { "epoch": 0.7568246182593209, "grad_norm": 0.6707179468322478, "learning_rate": 1.351013787510138e-06, "loss": 0.5155, "step": 25922 }, { "epoch": 0.7568538144871683, "grad_norm": 0.6860925246416075, "learning_rate": 1.350851581508516e-06, "loss": 0.6441, "step": 25923 }, { "epoch": 0.7568830107150156, "grad_norm": 0.8407067031492081, "learning_rate": 1.3506893755068939e-06, "loss": 0.7348, "step": 25924 }, { "epoch": 0.756912206942863, "grad_norm": 0.6982979933658657, "learning_rate": 1.3505271695052719e-06, "loss": 0.5918, "step": 25925 }, { "epoch": 0.7569414031707103, "grad_norm": 0.6795449116877358, "learning_rate": 1.3503649635036497e-06, "loss": 0.5746, "step": 25926 }, { "epoch": 0.7569705993985577, "grad_norm": 0.6844058488667225, "learning_rate": 1.3502027575020277e-06, "loss": 0.5512, "step": 25927 }, { "epoch": 0.7569997956264051, "grad_norm": 0.698884914551073, "learning_rate": 1.3500405515004055e-06, "loss": 0.6398, "step": 25928 }, { "epoch": 0.7570289918542524, "grad_norm": 0.7193856581290852, "learning_rate": 1.3498783454987835e-06, "loss": 0.655, "step": 25929 }, { "epoch": 0.7570581880820998, "grad_norm": 0.7705384787421231, "learning_rate": 1.3497161394971615e-06, "loss": 0.6818, "step": 25930 }, { "epoch": 0.7570873843099472, "grad_norm": 0.7297834588121662, "learning_rate": 1.3495539334955393e-06, "loss": 0.695, "step": 25931 }, { "epoch": 0.7571165805377945, "grad_norm": 0.676498741967282, "learning_rate": 1.3493917274939175e-06, "loss": 0.5313, "step": 25932 }, { "epoch": 0.7571457767656419, "grad_norm": 0.6729171238092441, "learning_rate": 1.3492295214922953e-06, "loss": 0.5433, "step": 25933 }, { "epoch": 0.7571749729934892, "grad_norm": 0.7222092886920717, "learning_rate": 1.3490673154906733e-06, "loss": 0.6819, "step": 25934 }, { "epoch": 0.7572041692213366, "grad_norm": 0.6939642110281669, "learning_rate": 1.3489051094890513e-06, "loss": 0.5965, "step": 25935 }, { "epoch": 0.757233365449184, "grad_norm": 0.7023887708870062, "learning_rate": 1.3487429034874291e-06, "loss": 0.6305, "step": 25936 }, { "epoch": 0.7572625616770313, "grad_norm": 0.7737060816475474, "learning_rate": 1.3485806974858071e-06, "loss": 0.6944, "step": 25937 }, { "epoch": 0.7572917579048787, "grad_norm": 0.7065484503104706, "learning_rate": 1.348418491484185e-06, "loss": 0.5606, "step": 25938 }, { "epoch": 0.757320954132726, "grad_norm": 0.7345231131576015, "learning_rate": 1.348256285482563e-06, "loss": 0.6529, "step": 25939 }, { "epoch": 0.7573501503605734, "grad_norm": 0.7101810900394634, "learning_rate": 1.3480940794809407e-06, "loss": 0.5725, "step": 25940 }, { "epoch": 0.7573793465884208, "grad_norm": 0.7183065667795301, "learning_rate": 1.347931873479319e-06, "loss": 0.6582, "step": 25941 }, { "epoch": 0.7574085428162681, "grad_norm": 0.6573791049470551, "learning_rate": 1.347769667477697e-06, "loss": 0.5402, "step": 25942 }, { "epoch": 0.7574377390441155, "grad_norm": 0.7157657402206782, "learning_rate": 1.3476074614760747e-06, "loss": 0.6228, "step": 25943 }, { "epoch": 0.7574669352719628, "grad_norm": 0.7953653897907054, "learning_rate": 1.3474452554744528e-06, "loss": 0.6831, "step": 25944 }, { "epoch": 0.7574961314998102, "grad_norm": 0.7377854195992778, "learning_rate": 1.3472830494728305e-06, "loss": 0.65, "step": 25945 }, { "epoch": 0.7575253277276576, "grad_norm": 0.7135016175268045, "learning_rate": 1.3471208434712086e-06, "loss": 0.5891, "step": 25946 }, { "epoch": 0.7575545239555049, "grad_norm": 0.7702211738160178, "learning_rate": 1.3469586374695863e-06, "loss": 0.6926, "step": 25947 }, { "epoch": 0.7575837201833523, "grad_norm": 0.7595883598302499, "learning_rate": 1.3467964314679644e-06, "loss": 0.6731, "step": 25948 }, { "epoch": 0.7576129164111997, "grad_norm": 0.7067484493388154, "learning_rate": 1.3466342254663424e-06, "loss": 0.6266, "step": 25949 }, { "epoch": 0.757642112639047, "grad_norm": 0.7358181194678645, "learning_rate": 1.3464720194647202e-06, "loss": 0.605, "step": 25950 }, { "epoch": 0.7576713088668944, "grad_norm": 0.7010943500532382, "learning_rate": 1.3463098134630984e-06, "loss": 0.6215, "step": 25951 }, { "epoch": 0.7577005050947417, "grad_norm": 0.6490313704017598, "learning_rate": 1.3461476074614762e-06, "loss": 0.5702, "step": 25952 }, { "epoch": 0.7577297013225891, "grad_norm": 0.7464364007215255, "learning_rate": 1.3459854014598542e-06, "loss": 0.5892, "step": 25953 }, { "epoch": 0.7577588975504365, "grad_norm": 0.7192111693868554, "learning_rate": 1.3458231954582322e-06, "loss": 0.639, "step": 25954 }, { "epoch": 0.7577880937782838, "grad_norm": 0.7441439809955858, "learning_rate": 1.34566098945661e-06, "loss": 0.7007, "step": 25955 }, { "epoch": 0.7578172900061312, "grad_norm": 1.0008738181977022, "learning_rate": 1.345498783454988e-06, "loss": 0.6647, "step": 25956 }, { "epoch": 0.7578464862339785, "grad_norm": 0.7296889209150831, "learning_rate": 1.3453365774533658e-06, "loss": 0.663, "step": 25957 }, { "epoch": 0.7578756824618259, "grad_norm": 0.7334184575628125, "learning_rate": 1.3451743714517438e-06, "loss": 0.6373, "step": 25958 }, { "epoch": 0.7579048786896733, "grad_norm": 0.7111341124723679, "learning_rate": 1.3450121654501216e-06, "loss": 0.6549, "step": 25959 }, { "epoch": 0.7579340749175206, "grad_norm": 0.7802910180102873, "learning_rate": 1.3448499594484998e-06, "loss": 0.7517, "step": 25960 }, { "epoch": 0.757963271145368, "grad_norm": 0.7211455558977854, "learning_rate": 1.3446877534468778e-06, "loss": 0.635, "step": 25961 }, { "epoch": 0.7579924673732154, "grad_norm": 0.7149092575366254, "learning_rate": 1.3445255474452556e-06, "loss": 0.6442, "step": 25962 }, { "epoch": 0.7580216636010627, "grad_norm": 0.751130997988443, "learning_rate": 1.3443633414436336e-06, "loss": 0.6946, "step": 25963 }, { "epoch": 0.7580508598289101, "grad_norm": 0.6670248400814326, "learning_rate": 1.3442011354420114e-06, "loss": 0.556, "step": 25964 }, { "epoch": 0.7580800560567574, "grad_norm": 0.7117157812622661, "learning_rate": 1.3440389294403894e-06, "loss": 0.5683, "step": 25965 }, { "epoch": 0.7581092522846048, "grad_norm": 0.8238001184786509, "learning_rate": 1.3438767234387672e-06, "loss": 0.6498, "step": 25966 }, { "epoch": 0.7581384485124522, "grad_norm": 0.7460164505680285, "learning_rate": 1.3437145174371452e-06, "loss": 0.6102, "step": 25967 }, { "epoch": 0.7581676447402995, "grad_norm": 0.9525119998299255, "learning_rate": 1.3435523114355232e-06, "loss": 0.6673, "step": 25968 }, { "epoch": 0.7581968409681469, "grad_norm": 0.7659287516555054, "learning_rate": 1.343390105433901e-06, "loss": 0.6743, "step": 25969 }, { "epoch": 0.7582260371959942, "grad_norm": 0.7087955230570655, "learning_rate": 1.3432278994322792e-06, "loss": 0.5662, "step": 25970 }, { "epoch": 0.7582552334238416, "grad_norm": 0.7759521458890519, "learning_rate": 1.343065693430657e-06, "loss": 0.6403, "step": 25971 }, { "epoch": 0.758284429651689, "grad_norm": 0.7301350184616057, "learning_rate": 1.342903487429035e-06, "loss": 0.6649, "step": 25972 }, { "epoch": 0.7583136258795363, "grad_norm": 0.6886953510352471, "learning_rate": 1.342741281427413e-06, "loss": 0.5569, "step": 25973 }, { "epoch": 0.7583428221073837, "grad_norm": 0.745277629300467, "learning_rate": 1.3425790754257908e-06, "loss": 0.698, "step": 25974 }, { "epoch": 0.758372018335231, "grad_norm": 0.7889925379825907, "learning_rate": 1.3424168694241688e-06, "loss": 0.742, "step": 25975 }, { "epoch": 0.7584012145630784, "grad_norm": 0.7178972946817403, "learning_rate": 1.3422546634225466e-06, "loss": 0.5909, "step": 25976 }, { "epoch": 0.7584304107909258, "grad_norm": 0.7555796165313872, "learning_rate": 1.3420924574209246e-06, "loss": 0.6527, "step": 25977 }, { "epoch": 0.7584596070187731, "grad_norm": 0.7659229799982709, "learning_rate": 1.3419302514193024e-06, "loss": 0.662, "step": 25978 }, { "epoch": 0.7584888032466205, "grad_norm": 0.7149210238515264, "learning_rate": 1.3417680454176807e-06, "loss": 0.6793, "step": 25979 }, { "epoch": 0.7585179994744679, "grad_norm": 0.6969927173391739, "learning_rate": 1.3416058394160587e-06, "loss": 0.6266, "step": 25980 }, { "epoch": 0.7585471957023152, "grad_norm": 0.8318141425327905, "learning_rate": 1.3414436334144365e-06, "loss": 0.6002, "step": 25981 }, { "epoch": 0.7585763919301626, "grad_norm": 0.6937834087736221, "learning_rate": 1.3412814274128145e-06, "loss": 0.5962, "step": 25982 }, { "epoch": 0.7586055881580099, "grad_norm": 0.682057856131138, "learning_rate": 1.3411192214111923e-06, "loss": 0.6099, "step": 25983 }, { "epoch": 0.7586347843858573, "grad_norm": 0.7639298988807772, "learning_rate": 1.3409570154095703e-06, "loss": 0.6116, "step": 25984 }, { "epoch": 0.7586639806137047, "grad_norm": 0.729661402445535, "learning_rate": 1.340794809407948e-06, "loss": 0.612, "step": 25985 }, { "epoch": 0.758693176841552, "grad_norm": 0.7372812537543204, "learning_rate": 1.340632603406326e-06, "loss": 0.6785, "step": 25986 }, { "epoch": 0.7587223730693994, "grad_norm": 0.6737400601526529, "learning_rate": 1.3404703974047039e-06, "loss": 0.5549, "step": 25987 }, { "epoch": 0.7587515692972467, "grad_norm": 0.7217006245928455, "learning_rate": 1.340308191403082e-06, "loss": 0.6542, "step": 25988 }, { "epoch": 0.7587807655250941, "grad_norm": 0.7461801756479883, "learning_rate": 1.34014598540146e-06, "loss": 0.606, "step": 25989 }, { "epoch": 0.7588099617529416, "grad_norm": 0.7441306365564857, "learning_rate": 1.3399837793998379e-06, "loss": 0.6606, "step": 25990 }, { "epoch": 0.7588391579807889, "grad_norm": 0.7322382838553002, "learning_rate": 1.339821573398216e-06, "loss": 0.6364, "step": 25991 }, { "epoch": 0.7588683542086363, "grad_norm": 0.706388589017139, "learning_rate": 1.339659367396594e-06, "loss": 0.6327, "step": 25992 }, { "epoch": 0.7588975504364837, "grad_norm": 0.7360415867189503, "learning_rate": 1.3394971613949717e-06, "loss": 0.669, "step": 25993 }, { "epoch": 0.758926746664331, "grad_norm": 0.7775183082087694, "learning_rate": 1.3393349553933497e-06, "loss": 0.7721, "step": 25994 }, { "epoch": 0.7589559428921784, "grad_norm": 0.7060975422519167, "learning_rate": 1.3391727493917275e-06, "loss": 0.6265, "step": 25995 }, { "epoch": 0.7589851391200257, "grad_norm": 0.7509592773664782, "learning_rate": 1.3390105433901055e-06, "loss": 0.7361, "step": 25996 }, { "epoch": 0.7590143353478731, "grad_norm": 0.6981886363318816, "learning_rate": 1.3388483373884833e-06, "loss": 0.602, "step": 25997 }, { "epoch": 0.7590435315757205, "grad_norm": 0.7172246774038891, "learning_rate": 1.3386861313868615e-06, "loss": 0.6175, "step": 25998 }, { "epoch": 0.7590727278035678, "grad_norm": 0.6880869882307058, "learning_rate": 1.3385239253852395e-06, "loss": 0.5874, "step": 25999 }, { "epoch": 0.7591019240314152, "grad_norm": 0.7110208997201086, "learning_rate": 1.3383617193836173e-06, "loss": 0.5833, "step": 26000 }, { "epoch": 0.7591311202592625, "grad_norm": 0.7219265057121403, "learning_rate": 1.3381995133819953e-06, "loss": 0.6808, "step": 26001 }, { "epoch": 0.7591603164871099, "grad_norm": 0.7132239000583065, "learning_rate": 1.3380373073803731e-06, "loss": 0.6229, "step": 26002 }, { "epoch": 0.7591895127149573, "grad_norm": 0.7807208278623192, "learning_rate": 1.3378751013787511e-06, "loss": 0.707, "step": 26003 }, { "epoch": 0.7592187089428046, "grad_norm": 0.698361988543936, "learning_rate": 1.337712895377129e-06, "loss": 0.5471, "step": 26004 }, { "epoch": 0.759247905170652, "grad_norm": 0.7011003266075189, "learning_rate": 1.337550689375507e-06, "loss": 0.6172, "step": 26005 }, { "epoch": 0.7592771013984994, "grad_norm": 0.8214987145581159, "learning_rate": 1.3373884833738847e-06, "loss": 0.7649, "step": 26006 }, { "epoch": 0.7593062976263467, "grad_norm": 0.7239584070858011, "learning_rate": 1.337226277372263e-06, "loss": 0.6163, "step": 26007 }, { "epoch": 0.7593354938541941, "grad_norm": 0.6992060040099248, "learning_rate": 1.337064071370641e-06, "loss": 0.6194, "step": 26008 }, { "epoch": 0.7593646900820414, "grad_norm": 0.6650537634448299, "learning_rate": 1.3369018653690187e-06, "loss": 0.5271, "step": 26009 }, { "epoch": 0.7593938863098888, "grad_norm": 0.7180274624977532, "learning_rate": 1.3367396593673968e-06, "loss": 0.6625, "step": 26010 }, { "epoch": 0.7594230825377362, "grad_norm": 0.7426357576562826, "learning_rate": 1.3365774533657748e-06, "loss": 0.6325, "step": 26011 }, { "epoch": 0.7594522787655835, "grad_norm": 0.7181084957857257, "learning_rate": 1.3364152473641526e-06, "loss": 0.643, "step": 26012 }, { "epoch": 0.7594814749934309, "grad_norm": 0.7683511972301145, "learning_rate": 1.3362530413625306e-06, "loss": 0.7279, "step": 26013 }, { "epoch": 0.7595106712212782, "grad_norm": 0.6420864543291367, "learning_rate": 1.3360908353609084e-06, "loss": 0.5332, "step": 26014 }, { "epoch": 0.7595398674491256, "grad_norm": 0.7151191153345331, "learning_rate": 1.3359286293592864e-06, "loss": 0.6029, "step": 26015 }, { "epoch": 0.759569063676973, "grad_norm": 0.7422857488648984, "learning_rate": 1.3357664233576642e-06, "loss": 0.6492, "step": 26016 }, { "epoch": 0.7595982599048203, "grad_norm": 0.6736240399801875, "learning_rate": 1.3356042173560424e-06, "loss": 0.5869, "step": 26017 }, { "epoch": 0.7596274561326677, "grad_norm": 0.720789623205312, "learning_rate": 1.3354420113544204e-06, "loss": 0.6678, "step": 26018 }, { "epoch": 0.759656652360515, "grad_norm": 0.7326348064666137, "learning_rate": 1.3352798053527982e-06, "loss": 0.6509, "step": 26019 }, { "epoch": 0.7596858485883624, "grad_norm": 0.7020198280448766, "learning_rate": 1.3351175993511762e-06, "loss": 0.6129, "step": 26020 }, { "epoch": 0.7597150448162098, "grad_norm": 0.7203461139686443, "learning_rate": 1.334955393349554e-06, "loss": 0.6613, "step": 26021 }, { "epoch": 0.7597442410440571, "grad_norm": 0.7056202624348139, "learning_rate": 1.334793187347932e-06, "loss": 0.6061, "step": 26022 }, { "epoch": 0.7597734372719045, "grad_norm": 0.8339239273904129, "learning_rate": 1.3346309813463098e-06, "loss": 0.7086, "step": 26023 }, { "epoch": 0.7598026334997519, "grad_norm": 0.6680326626788454, "learning_rate": 1.3344687753446878e-06, "loss": 0.592, "step": 26024 }, { "epoch": 0.7598318297275992, "grad_norm": 0.7732677637650975, "learning_rate": 1.3343065693430656e-06, "loss": 0.7311, "step": 26025 }, { "epoch": 0.7598610259554466, "grad_norm": 0.6904600927620664, "learning_rate": 1.3341443633414438e-06, "loss": 0.611, "step": 26026 }, { "epoch": 0.7598902221832939, "grad_norm": 0.734849613270546, "learning_rate": 1.3339821573398218e-06, "loss": 0.6579, "step": 26027 }, { "epoch": 0.7599194184111413, "grad_norm": 0.6555484556887177, "learning_rate": 1.3338199513381996e-06, "loss": 0.5659, "step": 26028 }, { "epoch": 0.7599486146389887, "grad_norm": 0.6967105372778637, "learning_rate": 1.3336577453365776e-06, "loss": 0.6076, "step": 26029 }, { "epoch": 0.759977810866836, "grad_norm": 0.7285940340697998, "learning_rate": 1.3334955393349556e-06, "loss": 0.6398, "step": 26030 }, { "epoch": 0.7600070070946834, "grad_norm": 0.725983790242411, "learning_rate": 1.3333333333333334e-06, "loss": 0.6483, "step": 26031 }, { "epoch": 0.7600362033225307, "grad_norm": 0.685011862081092, "learning_rate": 1.3331711273317114e-06, "loss": 0.5923, "step": 26032 }, { "epoch": 0.7600653995503781, "grad_norm": 0.7343628665451547, "learning_rate": 1.3330089213300892e-06, "loss": 0.6388, "step": 26033 }, { "epoch": 0.7600945957782255, "grad_norm": 0.7732768436389272, "learning_rate": 1.3328467153284672e-06, "loss": 0.7036, "step": 26034 }, { "epoch": 0.7601237920060728, "grad_norm": 0.7667626388665831, "learning_rate": 1.332684509326845e-06, "loss": 0.6803, "step": 26035 }, { "epoch": 0.7601529882339202, "grad_norm": 0.7389110391191918, "learning_rate": 1.3325223033252232e-06, "loss": 0.693, "step": 26036 }, { "epoch": 0.7601821844617676, "grad_norm": 0.6798847014022068, "learning_rate": 1.3323600973236012e-06, "loss": 0.543, "step": 26037 }, { "epoch": 0.7602113806896149, "grad_norm": 0.6833160558544427, "learning_rate": 1.332197891321979e-06, "loss": 0.618, "step": 26038 }, { "epoch": 0.7602405769174623, "grad_norm": 0.7349253085889567, "learning_rate": 1.332035685320357e-06, "loss": 0.6385, "step": 26039 }, { "epoch": 0.7602697731453096, "grad_norm": 0.6309261920765031, "learning_rate": 1.3318734793187348e-06, "loss": 0.5192, "step": 26040 }, { "epoch": 0.760298969373157, "grad_norm": 0.7293536948304008, "learning_rate": 1.3317112733171128e-06, "loss": 0.6515, "step": 26041 }, { "epoch": 0.7603281656010044, "grad_norm": 0.7092344910666497, "learning_rate": 1.3315490673154906e-06, "loss": 0.5903, "step": 26042 }, { "epoch": 0.7603573618288517, "grad_norm": 0.7389874173790637, "learning_rate": 1.3313868613138686e-06, "loss": 0.6135, "step": 26043 }, { "epoch": 0.7603865580566991, "grad_norm": 0.7265115158631524, "learning_rate": 1.3312246553122464e-06, "loss": 0.609, "step": 26044 }, { "epoch": 0.7604157542845464, "grad_norm": 0.7900982272641943, "learning_rate": 1.3310624493106247e-06, "loss": 0.7254, "step": 26045 }, { "epoch": 0.7604449505123938, "grad_norm": 0.7066706733947511, "learning_rate": 1.3309002433090027e-06, "loss": 0.595, "step": 26046 }, { "epoch": 0.7604741467402412, "grad_norm": 0.7282878201420173, "learning_rate": 1.3307380373073805e-06, "loss": 0.6202, "step": 26047 }, { "epoch": 0.7605033429680885, "grad_norm": 0.6874858913658816, "learning_rate": 1.3305758313057585e-06, "loss": 0.5873, "step": 26048 }, { "epoch": 0.7605325391959359, "grad_norm": 0.7243559869192894, "learning_rate": 1.3304136253041363e-06, "loss": 0.6688, "step": 26049 }, { "epoch": 0.7605617354237832, "grad_norm": 0.7080510909037959, "learning_rate": 1.3302514193025143e-06, "loss": 0.6026, "step": 26050 }, { "epoch": 0.7605909316516306, "grad_norm": 0.7493209278869655, "learning_rate": 1.3300892133008923e-06, "loss": 0.6389, "step": 26051 }, { "epoch": 0.760620127879478, "grad_norm": 0.6750110948893917, "learning_rate": 1.32992700729927e-06, "loss": 0.5845, "step": 26052 }, { "epoch": 0.7606493241073253, "grad_norm": 0.7776016995087965, "learning_rate": 1.329764801297648e-06, "loss": 0.676, "step": 26053 }, { "epoch": 0.7606785203351727, "grad_norm": 0.6710480490941916, "learning_rate": 1.3296025952960263e-06, "loss": 0.5856, "step": 26054 }, { "epoch": 0.76070771656302, "grad_norm": 0.6926222151517049, "learning_rate": 1.329440389294404e-06, "loss": 0.6143, "step": 26055 }, { "epoch": 0.7607369127908674, "grad_norm": 0.7156687425029649, "learning_rate": 1.329278183292782e-06, "loss": 0.6751, "step": 26056 }, { "epoch": 0.7607661090187148, "grad_norm": 0.7287248554230842, "learning_rate": 1.32911597729116e-06, "loss": 0.6596, "step": 26057 }, { "epoch": 0.7607953052465621, "grad_norm": 0.7388462424436765, "learning_rate": 1.328953771289538e-06, "loss": 0.5847, "step": 26058 }, { "epoch": 0.7608245014744095, "grad_norm": 0.6823572046715131, "learning_rate": 1.3287915652879157e-06, "loss": 0.5772, "step": 26059 }, { "epoch": 0.7608536977022569, "grad_norm": 0.7005307494219026, "learning_rate": 1.3286293592862937e-06, "loss": 0.6288, "step": 26060 }, { "epoch": 0.7608828939301042, "grad_norm": 0.6833248722259496, "learning_rate": 1.3284671532846715e-06, "loss": 0.5589, "step": 26061 }, { "epoch": 0.7609120901579516, "grad_norm": 0.6921627385421684, "learning_rate": 1.3283049472830495e-06, "loss": 0.6192, "step": 26062 }, { "epoch": 0.7609412863857989, "grad_norm": 0.7642229457339581, "learning_rate": 1.3281427412814273e-06, "loss": 0.7288, "step": 26063 }, { "epoch": 0.7609704826136463, "grad_norm": 0.794256077551317, "learning_rate": 1.3279805352798055e-06, "loss": 0.6785, "step": 26064 }, { "epoch": 0.7609996788414937, "grad_norm": 0.7696455668687379, "learning_rate": 1.3278183292781835e-06, "loss": 0.6356, "step": 26065 }, { "epoch": 0.761028875069341, "grad_norm": 0.7713511943246967, "learning_rate": 1.3276561232765613e-06, "loss": 0.6581, "step": 26066 }, { "epoch": 0.7610580712971884, "grad_norm": 0.6746403280498477, "learning_rate": 1.3274939172749393e-06, "loss": 0.5761, "step": 26067 }, { "epoch": 0.7610872675250357, "grad_norm": 0.7038689413106946, "learning_rate": 1.3273317112733171e-06, "loss": 0.6292, "step": 26068 }, { "epoch": 0.7611164637528831, "grad_norm": 0.7108300363996606, "learning_rate": 1.3271695052716951e-06, "loss": 0.6461, "step": 26069 }, { "epoch": 0.7611456599807305, "grad_norm": 0.7610486578433003, "learning_rate": 1.3270072992700731e-06, "loss": 0.6763, "step": 26070 }, { "epoch": 0.7611748562085778, "grad_norm": 0.7171185766348208, "learning_rate": 1.326845093268451e-06, "loss": 0.629, "step": 26071 }, { "epoch": 0.7612040524364252, "grad_norm": 0.7371501187567993, "learning_rate": 1.326682887266829e-06, "loss": 0.5958, "step": 26072 }, { "epoch": 0.7612332486642726, "grad_norm": 0.6981813575114842, "learning_rate": 1.3265206812652072e-06, "loss": 0.5763, "step": 26073 }, { "epoch": 0.7612624448921199, "grad_norm": 0.7303282002943142, "learning_rate": 1.326358475263585e-06, "loss": 0.7153, "step": 26074 }, { "epoch": 0.7612916411199673, "grad_norm": 0.7314111762439992, "learning_rate": 1.326196269261963e-06, "loss": 0.6349, "step": 26075 }, { "epoch": 0.7613208373478146, "grad_norm": 0.7363558169173704, "learning_rate": 1.3260340632603408e-06, "loss": 0.6734, "step": 26076 }, { "epoch": 0.761350033575662, "grad_norm": 0.7583450299506213, "learning_rate": 1.3258718572587188e-06, "loss": 0.6761, "step": 26077 }, { "epoch": 0.7613792298035094, "grad_norm": 0.642372333414763, "learning_rate": 1.3257096512570966e-06, "loss": 0.5391, "step": 26078 }, { "epoch": 0.7614084260313567, "grad_norm": 0.7625771058227233, "learning_rate": 1.3255474452554746e-06, "loss": 0.6582, "step": 26079 }, { "epoch": 0.7614376222592041, "grad_norm": 0.9088985609794972, "learning_rate": 1.3253852392538524e-06, "loss": 0.6443, "step": 26080 }, { "epoch": 0.7614668184870514, "grad_norm": 0.7236065713374796, "learning_rate": 1.3252230332522304e-06, "loss": 0.5992, "step": 26081 }, { "epoch": 0.7614960147148988, "grad_norm": 0.6912365886829183, "learning_rate": 1.3250608272506082e-06, "loss": 0.5873, "step": 26082 }, { "epoch": 0.7615252109427462, "grad_norm": 0.7615606528652712, "learning_rate": 1.3248986212489864e-06, "loss": 0.6765, "step": 26083 }, { "epoch": 0.7615544071705935, "grad_norm": 0.7272686820081661, "learning_rate": 1.3247364152473644e-06, "loss": 0.5831, "step": 26084 }, { "epoch": 0.7615836033984409, "grad_norm": 0.7350578662714112, "learning_rate": 1.3245742092457422e-06, "loss": 0.6974, "step": 26085 }, { "epoch": 0.7616127996262883, "grad_norm": 0.757425097676663, "learning_rate": 1.3244120032441202e-06, "loss": 0.6555, "step": 26086 }, { "epoch": 0.7616419958541356, "grad_norm": 0.6885131275013244, "learning_rate": 1.324249797242498e-06, "loss": 0.6218, "step": 26087 }, { "epoch": 0.761671192081983, "grad_norm": 0.7443352863556291, "learning_rate": 1.324087591240876e-06, "loss": 0.6853, "step": 26088 }, { "epoch": 0.7617003883098303, "grad_norm": 0.6899849668406661, "learning_rate": 1.323925385239254e-06, "loss": 0.6107, "step": 26089 }, { "epoch": 0.7617295845376777, "grad_norm": 0.7640400227502305, "learning_rate": 1.3237631792376318e-06, "loss": 0.6859, "step": 26090 }, { "epoch": 0.7617587807655251, "grad_norm": 0.7353368148015135, "learning_rate": 1.3236009732360098e-06, "loss": 0.6436, "step": 26091 }, { "epoch": 0.7617879769933724, "grad_norm": 0.7418584572484535, "learning_rate": 1.3234387672343878e-06, "loss": 0.5808, "step": 26092 }, { "epoch": 0.7618171732212198, "grad_norm": 0.7265341882147184, "learning_rate": 1.3232765612327658e-06, "loss": 0.5845, "step": 26093 }, { "epoch": 0.7618463694490671, "grad_norm": 0.6978577957094405, "learning_rate": 1.3231143552311438e-06, "loss": 0.6338, "step": 26094 }, { "epoch": 0.7618755656769145, "grad_norm": 0.6737798459233296, "learning_rate": 1.3229521492295216e-06, "loss": 0.6003, "step": 26095 }, { "epoch": 0.7619047619047619, "grad_norm": 0.6620732274920598, "learning_rate": 1.3227899432278996e-06, "loss": 0.5169, "step": 26096 }, { "epoch": 0.7619339581326092, "grad_norm": 0.7455951743889461, "learning_rate": 1.3226277372262774e-06, "loss": 0.6638, "step": 26097 }, { "epoch": 0.7619631543604566, "grad_norm": 0.8222306758942575, "learning_rate": 1.3224655312246554e-06, "loss": 0.6906, "step": 26098 }, { "epoch": 0.761992350588304, "grad_norm": 0.6955640061234821, "learning_rate": 1.3223033252230332e-06, "loss": 0.5883, "step": 26099 }, { "epoch": 0.7620215468161513, "grad_norm": 0.7646042573679954, "learning_rate": 1.3221411192214112e-06, "loss": 0.7392, "step": 26100 }, { "epoch": 0.7620507430439987, "grad_norm": 0.6406537869555682, "learning_rate": 1.321978913219789e-06, "loss": 0.526, "step": 26101 }, { "epoch": 0.762079939271846, "grad_norm": 0.7038428687450767, "learning_rate": 1.3218167072181672e-06, "loss": 0.6126, "step": 26102 }, { "epoch": 0.7621091354996934, "grad_norm": 0.6697164697857098, "learning_rate": 1.3216545012165452e-06, "loss": 0.571, "step": 26103 }, { "epoch": 0.7621383317275408, "grad_norm": 0.6738496152535906, "learning_rate": 1.321492295214923e-06, "loss": 0.5749, "step": 26104 }, { "epoch": 0.7621675279553881, "grad_norm": 0.6708059818434506, "learning_rate": 1.321330089213301e-06, "loss": 0.5412, "step": 26105 }, { "epoch": 0.7621967241832355, "grad_norm": 0.7328914229216994, "learning_rate": 1.3211678832116788e-06, "loss": 0.644, "step": 26106 }, { "epoch": 0.7622259204110828, "grad_norm": 0.7308608433732796, "learning_rate": 1.3210056772100568e-06, "loss": 0.653, "step": 26107 }, { "epoch": 0.7622551166389302, "grad_norm": 0.7547021056868729, "learning_rate": 1.3208434712084349e-06, "loss": 0.665, "step": 26108 }, { "epoch": 0.7622843128667776, "grad_norm": 0.7111816267806284, "learning_rate": 1.3206812652068127e-06, "loss": 0.5979, "step": 26109 }, { "epoch": 0.7623135090946249, "grad_norm": 0.7014463744574495, "learning_rate": 1.3205190592051907e-06, "loss": 0.638, "step": 26110 }, { "epoch": 0.7623427053224724, "grad_norm": 0.7288145064589856, "learning_rate": 1.3203568532035687e-06, "loss": 0.6379, "step": 26111 }, { "epoch": 0.7623719015503198, "grad_norm": 0.7558027259715644, "learning_rate": 1.3201946472019467e-06, "loss": 0.6785, "step": 26112 }, { "epoch": 0.7624010977781671, "grad_norm": 0.6484094011474183, "learning_rate": 1.3200324412003247e-06, "loss": 0.5562, "step": 26113 }, { "epoch": 0.7624302940060145, "grad_norm": 0.7538165570764983, "learning_rate": 1.3198702351987025e-06, "loss": 0.6725, "step": 26114 }, { "epoch": 0.7624594902338618, "grad_norm": 0.6357851112792081, "learning_rate": 1.3197080291970805e-06, "loss": 0.5383, "step": 26115 }, { "epoch": 0.7624886864617092, "grad_norm": 0.7272166111427608, "learning_rate": 1.3195458231954583e-06, "loss": 0.6161, "step": 26116 }, { "epoch": 0.7625178826895566, "grad_norm": 0.7233844302523789, "learning_rate": 1.3193836171938363e-06, "loss": 0.6002, "step": 26117 }, { "epoch": 0.7625470789174039, "grad_norm": 0.7193584877802582, "learning_rate": 1.319221411192214e-06, "loss": 0.6473, "step": 26118 }, { "epoch": 0.7625762751452513, "grad_norm": 0.6829199573198386, "learning_rate": 1.319059205190592e-06, "loss": 0.5796, "step": 26119 }, { "epoch": 0.7626054713730986, "grad_norm": 0.6710805262877393, "learning_rate": 1.3188969991889699e-06, "loss": 0.5356, "step": 26120 }, { "epoch": 0.762634667600946, "grad_norm": 0.793118476289512, "learning_rate": 1.318734793187348e-06, "loss": 0.6838, "step": 26121 }, { "epoch": 0.7626638638287934, "grad_norm": 0.7583547098446771, "learning_rate": 1.318572587185726e-06, "loss": 0.6331, "step": 26122 }, { "epoch": 0.7626930600566407, "grad_norm": 0.7168587139494258, "learning_rate": 1.318410381184104e-06, "loss": 0.614, "step": 26123 }, { "epoch": 0.7627222562844881, "grad_norm": 0.7214859387322075, "learning_rate": 1.318248175182482e-06, "loss": 0.6707, "step": 26124 }, { "epoch": 0.7627514525123354, "grad_norm": 0.7506254127368963, "learning_rate": 1.3180859691808597e-06, "loss": 0.6449, "step": 26125 }, { "epoch": 0.7627806487401828, "grad_norm": 0.7098386490370165, "learning_rate": 1.3179237631792377e-06, "loss": 0.6143, "step": 26126 }, { "epoch": 0.7628098449680302, "grad_norm": 0.7117214702626447, "learning_rate": 1.3177615571776157e-06, "loss": 0.614, "step": 26127 }, { "epoch": 0.7628390411958775, "grad_norm": 0.741172686124671, "learning_rate": 1.3175993511759935e-06, "loss": 0.6518, "step": 26128 }, { "epoch": 0.7628682374237249, "grad_norm": 0.694166817087543, "learning_rate": 1.3174371451743715e-06, "loss": 0.5824, "step": 26129 }, { "epoch": 0.7628974336515723, "grad_norm": 0.7047191684138616, "learning_rate": 1.3172749391727495e-06, "loss": 0.6292, "step": 26130 }, { "epoch": 0.7629266298794196, "grad_norm": 0.7048302476287424, "learning_rate": 1.3171127331711275e-06, "loss": 0.5831, "step": 26131 }, { "epoch": 0.762955826107267, "grad_norm": 0.7535164572387762, "learning_rate": 1.3169505271695055e-06, "loss": 0.685, "step": 26132 }, { "epoch": 0.7629850223351143, "grad_norm": 0.7018137916730145, "learning_rate": 1.3167883211678833e-06, "loss": 0.6507, "step": 26133 }, { "epoch": 0.7630142185629617, "grad_norm": 0.7720177717667992, "learning_rate": 1.3166261151662613e-06, "loss": 0.6205, "step": 26134 }, { "epoch": 0.7630434147908091, "grad_norm": 0.6998187334049676, "learning_rate": 1.3164639091646391e-06, "loss": 0.6413, "step": 26135 }, { "epoch": 0.7630726110186564, "grad_norm": 0.7444748187957494, "learning_rate": 1.3163017031630171e-06, "loss": 0.6055, "step": 26136 }, { "epoch": 0.7631018072465038, "grad_norm": 0.7406490116938476, "learning_rate": 1.316139497161395e-06, "loss": 0.6285, "step": 26137 }, { "epoch": 0.7631310034743511, "grad_norm": 0.6952869903965709, "learning_rate": 1.315977291159773e-06, "loss": 0.6026, "step": 26138 }, { "epoch": 0.7631601997021985, "grad_norm": 0.7002170438770117, "learning_rate": 1.3158150851581512e-06, "loss": 0.5667, "step": 26139 }, { "epoch": 0.7631893959300459, "grad_norm": 0.7771343255406765, "learning_rate": 1.315652879156529e-06, "loss": 0.755, "step": 26140 }, { "epoch": 0.7632185921578932, "grad_norm": 0.7216183515266255, "learning_rate": 1.315490673154907e-06, "loss": 0.6187, "step": 26141 }, { "epoch": 0.7632477883857406, "grad_norm": 0.670013110864174, "learning_rate": 1.3153284671532848e-06, "loss": 0.5544, "step": 26142 }, { "epoch": 0.763276984613588, "grad_norm": 0.639493726928809, "learning_rate": 1.3151662611516628e-06, "loss": 0.5537, "step": 26143 }, { "epoch": 0.7633061808414353, "grad_norm": 0.7121018179376711, "learning_rate": 1.3150040551500406e-06, "loss": 0.6293, "step": 26144 }, { "epoch": 0.7633353770692827, "grad_norm": 0.7370109250045151, "learning_rate": 1.3148418491484186e-06, "loss": 0.6776, "step": 26145 }, { "epoch": 0.76336457329713, "grad_norm": 0.6874821180513936, "learning_rate": 1.3146796431467966e-06, "loss": 0.6152, "step": 26146 }, { "epoch": 0.7633937695249774, "grad_norm": 0.690794867549801, "learning_rate": 1.3145174371451744e-06, "loss": 0.5865, "step": 26147 }, { "epoch": 0.7634229657528248, "grad_norm": 0.7363541295692807, "learning_rate": 1.3143552311435524e-06, "loss": 0.6983, "step": 26148 }, { "epoch": 0.7634521619806721, "grad_norm": 0.7420376308103935, "learning_rate": 1.3141930251419304e-06, "loss": 0.6064, "step": 26149 }, { "epoch": 0.7634813582085195, "grad_norm": 0.7332979293752796, "learning_rate": 1.3140308191403084e-06, "loss": 0.6924, "step": 26150 }, { "epoch": 0.7635105544363668, "grad_norm": 0.7063928115866993, "learning_rate": 1.3138686131386864e-06, "loss": 0.6028, "step": 26151 }, { "epoch": 0.7635397506642142, "grad_norm": 0.6914070112431621, "learning_rate": 1.3137064071370642e-06, "loss": 0.546, "step": 26152 }, { "epoch": 0.7635689468920616, "grad_norm": 0.7093818777957215, "learning_rate": 1.3135442011354422e-06, "loss": 0.6788, "step": 26153 }, { "epoch": 0.7635981431199089, "grad_norm": 0.7430231958734087, "learning_rate": 1.31338199513382e-06, "loss": 0.6496, "step": 26154 }, { "epoch": 0.7636273393477563, "grad_norm": 0.7094150870083719, "learning_rate": 1.313219789132198e-06, "loss": 0.5945, "step": 26155 }, { "epoch": 0.7636565355756036, "grad_norm": 0.7314094015557909, "learning_rate": 1.3130575831305758e-06, "loss": 0.6417, "step": 26156 }, { "epoch": 0.763685731803451, "grad_norm": 0.7111575217349847, "learning_rate": 1.3128953771289538e-06, "loss": 0.5762, "step": 26157 }, { "epoch": 0.7637149280312984, "grad_norm": 0.7487285353716807, "learning_rate": 1.312733171127332e-06, "loss": 0.6134, "step": 26158 }, { "epoch": 0.7637441242591457, "grad_norm": 0.7142773290629693, "learning_rate": 1.3125709651257098e-06, "loss": 0.5783, "step": 26159 }, { "epoch": 0.7637733204869931, "grad_norm": 0.7239850248698675, "learning_rate": 1.3124087591240878e-06, "loss": 0.6276, "step": 26160 }, { "epoch": 0.7638025167148405, "grad_norm": 0.6910772062274184, "learning_rate": 1.3122465531224656e-06, "loss": 0.5884, "step": 26161 }, { "epoch": 0.7638317129426878, "grad_norm": 0.7960413047973417, "learning_rate": 1.3120843471208436e-06, "loss": 0.5955, "step": 26162 }, { "epoch": 0.7638609091705352, "grad_norm": 0.7271839638124679, "learning_rate": 1.3119221411192214e-06, "loss": 0.6322, "step": 26163 }, { "epoch": 0.7638901053983825, "grad_norm": 0.7261788994512465, "learning_rate": 1.3117599351175994e-06, "loss": 0.6409, "step": 26164 }, { "epoch": 0.7639193016262299, "grad_norm": 0.7130926946549487, "learning_rate": 1.3115977291159772e-06, "loss": 0.5735, "step": 26165 }, { "epoch": 0.7639484978540773, "grad_norm": 0.7422340281844361, "learning_rate": 1.3114355231143552e-06, "loss": 0.678, "step": 26166 }, { "epoch": 0.7639776940819246, "grad_norm": 0.7546800846728186, "learning_rate": 1.3112733171127332e-06, "loss": 0.6735, "step": 26167 }, { "epoch": 0.764006890309772, "grad_norm": 0.7024245341177391, "learning_rate": 1.3111111111111112e-06, "loss": 0.6, "step": 26168 }, { "epoch": 0.7640360865376193, "grad_norm": 0.7258677373043628, "learning_rate": 1.3109489051094892e-06, "loss": 0.6294, "step": 26169 }, { "epoch": 0.7640652827654667, "grad_norm": 0.8086074738712653, "learning_rate": 1.3107866991078673e-06, "loss": 0.7142, "step": 26170 }, { "epoch": 0.7640944789933141, "grad_norm": 0.716631117748543, "learning_rate": 1.310624493106245e-06, "loss": 0.6543, "step": 26171 }, { "epoch": 0.7641236752211614, "grad_norm": 0.7169942511572173, "learning_rate": 1.310462287104623e-06, "loss": 0.6008, "step": 26172 }, { "epoch": 0.7641528714490088, "grad_norm": 0.7765194872363371, "learning_rate": 1.3103000811030009e-06, "loss": 0.7269, "step": 26173 }, { "epoch": 0.7641820676768561, "grad_norm": 0.7703583599855343, "learning_rate": 1.3101378751013789e-06, "loss": 0.6179, "step": 26174 }, { "epoch": 0.7642112639047035, "grad_norm": 0.7456649828363158, "learning_rate": 1.3099756690997567e-06, "loss": 0.7025, "step": 26175 }, { "epoch": 0.7642404601325509, "grad_norm": 0.69084671759917, "learning_rate": 1.3098134630981347e-06, "loss": 0.5703, "step": 26176 }, { "epoch": 0.7642696563603982, "grad_norm": 0.6578675233153244, "learning_rate": 1.3096512570965129e-06, "loss": 0.5446, "step": 26177 }, { "epoch": 0.7642988525882456, "grad_norm": 0.7285677962231223, "learning_rate": 1.3094890510948907e-06, "loss": 0.6665, "step": 26178 }, { "epoch": 0.764328048816093, "grad_norm": 0.7201468811404798, "learning_rate": 1.3093268450932687e-06, "loss": 0.5768, "step": 26179 }, { "epoch": 0.7643572450439403, "grad_norm": 0.6976520807157032, "learning_rate": 1.3091646390916465e-06, "loss": 0.6055, "step": 26180 }, { "epoch": 0.7643864412717877, "grad_norm": 0.6939087443858506, "learning_rate": 1.3090024330900245e-06, "loss": 0.6217, "step": 26181 }, { "epoch": 0.764415637499635, "grad_norm": 0.7886472881656723, "learning_rate": 1.3088402270884023e-06, "loss": 0.7164, "step": 26182 }, { "epoch": 0.7644448337274824, "grad_norm": 0.7250878135349126, "learning_rate": 1.3086780210867803e-06, "loss": 0.6343, "step": 26183 }, { "epoch": 0.7644740299553298, "grad_norm": 0.6939680170931464, "learning_rate": 1.308515815085158e-06, "loss": 0.5618, "step": 26184 }, { "epoch": 0.7645032261831771, "grad_norm": 0.7507480326246182, "learning_rate": 1.308353609083536e-06, "loss": 0.6662, "step": 26185 }, { "epoch": 0.7645324224110245, "grad_norm": 0.7741936087932098, "learning_rate": 1.308191403081914e-06, "loss": 0.7513, "step": 26186 }, { "epoch": 0.7645616186388718, "grad_norm": 0.6977846680955888, "learning_rate": 1.308029197080292e-06, "loss": 0.6581, "step": 26187 }, { "epoch": 0.7645908148667192, "grad_norm": 0.7845185822106033, "learning_rate": 1.30786699107867e-06, "loss": 0.766, "step": 26188 }, { "epoch": 0.7646200110945666, "grad_norm": 0.6970665663902887, "learning_rate": 1.3077047850770481e-06, "loss": 0.5979, "step": 26189 }, { "epoch": 0.7646492073224139, "grad_norm": 0.8487807952987689, "learning_rate": 1.307542579075426e-06, "loss": 0.6777, "step": 26190 }, { "epoch": 0.7646784035502613, "grad_norm": 0.7443505902343809, "learning_rate": 1.307380373073804e-06, "loss": 0.6977, "step": 26191 }, { "epoch": 0.7647075997781086, "grad_norm": 0.7133681063276421, "learning_rate": 1.3072181670721817e-06, "loss": 0.6324, "step": 26192 }, { "epoch": 0.764736796005956, "grad_norm": 0.6974275554321495, "learning_rate": 1.3070559610705597e-06, "loss": 0.6219, "step": 26193 }, { "epoch": 0.7647659922338034, "grad_norm": 0.7544121179206073, "learning_rate": 1.3068937550689375e-06, "loss": 0.7037, "step": 26194 }, { "epoch": 0.7647951884616507, "grad_norm": 0.7306296134919591, "learning_rate": 1.3067315490673155e-06, "loss": 0.682, "step": 26195 }, { "epoch": 0.7648243846894981, "grad_norm": 0.7027958882564489, "learning_rate": 1.3065693430656937e-06, "loss": 0.5962, "step": 26196 }, { "epoch": 0.7648535809173455, "grad_norm": 0.7859960442710867, "learning_rate": 1.3064071370640715e-06, "loss": 0.7135, "step": 26197 }, { "epoch": 0.7648827771451928, "grad_norm": 0.7715163151294172, "learning_rate": 1.3062449310624495e-06, "loss": 0.6237, "step": 26198 }, { "epoch": 0.7649119733730402, "grad_norm": 0.7523631974282706, "learning_rate": 1.3060827250608273e-06, "loss": 0.6674, "step": 26199 }, { "epoch": 0.7649411696008875, "grad_norm": 0.7482691449293294, "learning_rate": 1.3059205190592053e-06, "loss": 0.6118, "step": 26200 }, { "epoch": 0.7649703658287349, "grad_norm": 0.685429580870801, "learning_rate": 1.3057583130575831e-06, "loss": 0.5665, "step": 26201 }, { "epoch": 0.7649995620565823, "grad_norm": 0.7549571829378535, "learning_rate": 1.3055961070559611e-06, "loss": 0.6321, "step": 26202 }, { "epoch": 0.7650287582844296, "grad_norm": 0.7299622384371055, "learning_rate": 1.305433901054339e-06, "loss": 0.6905, "step": 26203 }, { "epoch": 0.765057954512277, "grad_norm": 0.7706620544517981, "learning_rate": 1.305271695052717e-06, "loss": 0.6622, "step": 26204 }, { "epoch": 0.7650871507401243, "grad_norm": 0.768344033420375, "learning_rate": 1.3051094890510952e-06, "loss": 0.7303, "step": 26205 }, { "epoch": 0.7651163469679717, "grad_norm": 0.7571937251476508, "learning_rate": 1.304947283049473e-06, "loss": 0.6178, "step": 26206 }, { "epoch": 0.7651455431958191, "grad_norm": 0.6742403699189176, "learning_rate": 1.304785077047851e-06, "loss": 0.5775, "step": 26207 }, { "epoch": 0.7651747394236664, "grad_norm": 0.8555011438953325, "learning_rate": 1.304622871046229e-06, "loss": 0.75, "step": 26208 }, { "epoch": 0.7652039356515138, "grad_norm": 0.8400454884829458, "learning_rate": 1.3044606650446068e-06, "loss": 0.6654, "step": 26209 }, { "epoch": 0.7652331318793612, "grad_norm": 0.7829355008024008, "learning_rate": 1.3042984590429848e-06, "loss": 0.7005, "step": 26210 }, { "epoch": 0.7652623281072085, "grad_norm": 0.6926403375905474, "learning_rate": 1.3041362530413626e-06, "loss": 0.5887, "step": 26211 }, { "epoch": 0.7652915243350559, "grad_norm": 0.7532337434111663, "learning_rate": 1.3039740470397406e-06, "loss": 0.6772, "step": 26212 }, { "epoch": 0.7653207205629032, "grad_norm": 0.8061813789107236, "learning_rate": 1.3038118410381184e-06, "loss": 0.7578, "step": 26213 }, { "epoch": 0.7653499167907506, "grad_norm": 0.6608403355635064, "learning_rate": 1.3036496350364964e-06, "loss": 0.5258, "step": 26214 }, { "epoch": 0.765379113018598, "grad_norm": 0.7169967801667524, "learning_rate": 1.3034874290348746e-06, "loss": 0.6336, "step": 26215 }, { "epoch": 0.7654083092464453, "grad_norm": 0.6872415601214952, "learning_rate": 1.3033252230332524e-06, "loss": 0.5441, "step": 26216 }, { "epoch": 0.7654375054742927, "grad_norm": 0.7337975953883015, "learning_rate": 1.3031630170316304e-06, "loss": 0.6978, "step": 26217 }, { "epoch": 0.76546670170214, "grad_norm": 0.7572909814022979, "learning_rate": 1.3030008110300082e-06, "loss": 0.6508, "step": 26218 }, { "epoch": 0.7654958979299874, "grad_norm": 0.7050373751442781, "learning_rate": 1.3028386050283862e-06, "loss": 0.6318, "step": 26219 }, { "epoch": 0.7655250941578348, "grad_norm": 0.7131999114156862, "learning_rate": 1.302676399026764e-06, "loss": 0.6407, "step": 26220 }, { "epoch": 0.7655542903856821, "grad_norm": 0.6496520449914152, "learning_rate": 1.302514193025142e-06, "loss": 0.5263, "step": 26221 }, { "epoch": 0.7655834866135295, "grad_norm": 0.7292581739067824, "learning_rate": 1.3023519870235198e-06, "loss": 0.6915, "step": 26222 }, { "epoch": 0.7656126828413768, "grad_norm": 0.7153436779008877, "learning_rate": 1.3021897810218978e-06, "loss": 0.6756, "step": 26223 }, { "epoch": 0.7656418790692242, "grad_norm": 0.7503085090458226, "learning_rate": 1.302027575020276e-06, "loss": 0.676, "step": 26224 }, { "epoch": 0.7656710752970716, "grad_norm": 0.6721707624588558, "learning_rate": 1.3018653690186538e-06, "loss": 0.5388, "step": 26225 }, { "epoch": 0.7657002715249189, "grad_norm": 0.733115019075865, "learning_rate": 1.3017031630170318e-06, "loss": 0.6445, "step": 26226 }, { "epoch": 0.7657294677527663, "grad_norm": 0.7374083394512373, "learning_rate": 1.3015409570154096e-06, "loss": 0.6983, "step": 26227 }, { "epoch": 0.7657586639806137, "grad_norm": 0.7615270499136474, "learning_rate": 1.3013787510137876e-06, "loss": 0.7574, "step": 26228 }, { "epoch": 0.765787860208461, "grad_norm": 0.6910554851212606, "learning_rate": 1.3012165450121656e-06, "loss": 0.5815, "step": 26229 }, { "epoch": 0.7658170564363084, "grad_norm": 0.7288543433878512, "learning_rate": 1.3010543390105434e-06, "loss": 0.6985, "step": 26230 }, { "epoch": 0.7658462526641558, "grad_norm": 0.7724760518795408, "learning_rate": 1.3008921330089214e-06, "loss": 0.6878, "step": 26231 }, { "epoch": 0.7658754488920032, "grad_norm": 0.7105831813032188, "learning_rate": 1.3007299270072992e-06, "loss": 0.6127, "step": 26232 }, { "epoch": 0.7659046451198506, "grad_norm": 0.7354171450220331, "learning_rate": 1.3005677210056772e-06, "loss": 0.7022, "step": 26233 }, { "epoch": 0.7659338413476979, "grad_norm": 0.7110834391580724, "learning_rate": 1.3004055150040555e-06, "loss": 0.6391, "step": 26234 }, { "epoch": 0.7659630375755453, "grad_norm": 0.736511521410857, "learning_rate": 1.3002433090024332e-06, "loss": 0.6846, "step": 26235 }, { "epoch": 0.7659922338033927, "grad_norm": 0.7009823302909426, "learning_rate": 1.3000811030008113e-06, "loss": 0.6104, "step": 26236 }, { "epoch": 0.76602143003124, "grad_norm": 0.8002307065522278, "learning_rate": 1.299918896999189e-06, "loss": 0.6667, "step": 26237 }, { "epoch": 0.7660506262590874, "grad_norm": 0.6902789825580243, "learning_rate": 1.299756690997567e-06, "loss": 0.6204, "step": 26238 }, { "epoch": 0.7660798224869347, "grad_norm": 0.73234690498476, "learning_rate": 1.2995944849959449e-06, "loss": 0.6699, "step": 26239 }, { "epoch": 0.7661090187147821, "grad_norm": 0.6847062985347757, "learning_rate": 1.2994322789943229e-06, "loss": 0.5604, "step": 26240 }, { "epoch": 0.7661382149426295, "grad_norm": 0.7233355425285799, "learning_rate": 1.2992700729927007e-06, "loss": 0.6379, "step": 26241 }, { "epoch": 0.7661674111704768, "grad_norm": 0.7218405019054798, "learning_rate": 1.2991078669910787e-06, "loss": 0.6201, "step": 26242 }, { "epoch": 0.7661966073983242, "grad_norm": 0.708401700074107, "learning_rate": 1.2989456609894569e-06, "loss": 0.6618, "step": 26243 }, { "epoch": 0.7662258036261715, "grad_norm": 0.6917419458748684, "learning_rate": 1.2987834549878347e-06, "loss": 0.5668, "step": 26244 }, { "epoch": 0.7662549998540189, "grad_norm": 0.6792247905851344, "learning_rate": 1.2986212489862127e-06, "loss": 0.5691, "step": 26245 }, { "epoch": 0.7662841960818663, "grad_norm": 0.6975412450412776, "learning_rate": 1.2984590429845905e-06, "loss": 0.5823, "step": 26246 }, { "epoch": 0.7663133923097136, "grad_norm": 0.7290067403867814, "learning_rate": 1.2982968369829685e-06, "loss": 0.6371, "step": 26247 }, { "epoch": 0.766342588537561, "grad_norm": 0.7134433313827164, "learning_rate": 1.2981346309813465e-06, "loss": 0.6618, "step": 26248 }, { "epoch": 0.7663717847654083, "grad_norm": 0.746993451851534, "learning_rate": 1.2979724249797243e-06, "loss": 0.6845, "step": 26249 }, { "epoch": 0.7664009809932557, "grad_norm": 0.6698860917156595, "learning_rate": 1.2978102189781023e-06, "loss": 0.5285, "step": 26250 }, { "epoch": 0.7664301772211031, "grad_norm": 0.735207494832632, "learning_rate": 1.29764801297648e-06, "loss": 0.6673, "step": 26251 }, { "epoch": 0.7664593734489504, "grad_norm": 0.749164025561458, "learning_rate": 1.297485806974858e-06, "loss": 0.6544, "step": 26252 }, { "epoch": 0.7664885696767978, "grad_norm": 0.7272970849508521, "learning_rate": 1.2973236009732363e-06, "loss": 0.6218, "step": 26253 }, { "epoch": 0.7665177659046452, "grad_norm": 0.7615157177402009, "learning_rate": 1.2971613949716141e-06, "loss": 0.679, "step": 26254 }, { "epoch": 0.7665469621324925, "grad_norm": 0.700800502250539, "learning_rate": 1.2969991889699921e-06, "loss": 0.6058, "step": 26255 }, { "epoch": 0.7665761583603399, "grad_norm": 0.7180142727241666, "learning_rate": 1.29683698296837e-06, "loss": 0.6048, "step": 26256 }, { "epoch": 0.7666053545881872, "grad_norm": 0.7167540594293278, "learning_rate": 1.296674776966748e-06, "loss": 0.6523, "step": 26257 }, { "epoch": 0.7666345508160346, "grad_norm": 0.7383143535117339, "learning_rate": 1.2965125709651257e-06, "loss": 0.6616, "step": 26258 }, { "epoch": 0.766663747043882, "grad_norm": 0.7381661572172085, "learning_rate": 1.2963503649635037e-06, "loss": 0.6645, "step": 26259 }, { "epoch": 0.7666929432717293, "grad_norm": 0.7514017435308777, "learning_rate": 1.2961881589618815e-06, "loss": 0.6664, "step": 26260 }, { "epoch": 0.7667221394995767, "grad_norm": 0.7382915414122826, "learning_rate": 1.2960259529602595e-06, "loss": 0.6745, "step": 26261 }, { "epoch": 0.766751335727424, "grad_norm": 0.7178699006916843, "learning_rate": 1.2958637469586377e-06, "loss": 0.6317, "step": 26262 }, { "epoch": 0.7667805319552714, "grad_norm": 0.7779417303669783, "learning_rate": 1.2957015409570155e-06, "loss": 0.6801, "step": 26263 }, { "epoch": 0.7668097281831188, "grad_norm": 0.800158600712382, "learning_rate": 1.2955393349553935e-06, "loss": 0.6968, "step": 26264 }, { "epoch": 0.7668389244109661, "grad_norm": 0.7149740907694822, "learning_rate": 1.2953771289537713e-06, "loss": 0.6507, "step": 26265 }, { "epoch": 0.7668681206388135, "grad_norm": 0.7360782811063384, "learning_rate": 1.2952149229521493e-06, "loss": 0.6539, "step": 26266 }, { "epoch": 0.7668973168666608, "grad_norm": 0.7197191479265301, "learning_rate": 1.2950527169505273e-06, "loss": 0.6623, "step": 26267 }, { "epoch": 0.7669265130945082, "grad_norm": 0.6774784321399463, "learning_rate": 1.2948905109489051e-06, "loss": 0.587, "step": 26268 }, { "epoch": 0.7669557093223556, "grad_norm": 0.6973833024498869, "learning_rate": 1.2947283049472832e-06, "loss": 0.6171, "step": 26269 }, { "epoch": 0.7669849055502029, "grad_norm": 0.732315068553105, "learning_rate": 1.294566098945661e-06, "loss": 0.6471, "step": 26270 }, { "epoch": 0.7670141017780503, "grad_norm": 0.7943438255806715, "learning_rate": 1.294403892944039e-06, "loss": 0.5751, "step": 26271 }, { "epoch": 0.7670432980058977, "grad_norm": 0.6682719582617098, "learning_rate": 1.2942416869424172e-06, "loss": 0.5635, "step": 26272 }, { "epoch": 0.767072494233745, "grad_norm": 0.7174188833871741, "learning_rate": 1.294079480940795e-06, "loss": 0.6449, "step": 26273 }, { "epoch": 0.7671016904615924, "grad_norm": 0.6705540995998579, "learning_rate": 1.293917274939173e-06, "loss": 0.5396, "step": 26274 }, { "epoch": 0.7671308866894397, "grad_norm": 0.7488241895930208, "learning_rate": 1.2937550689375508e-06, "loss": 0.6551, "step": 26275 }, { "epoch": 0.7671600829172871, "grad_norm": 0.7231560787098787, "learning_rate": 1.2935928629359288e-06, "loss": 0.6056, "step": 26276 }, { "epoch": 0.7671892791451345, "grad_norm": 0.7827413420583997, "learning_rate": 1.2934306569343066e-06, "loss": 0.8063, "step": 26277 }, { "epoch": 0.7672184753729818, "grad_norm": 0.7122719208354517, "learning_rate": 1.2932684509326846e-06, "loss": 0.6344, "step": 26278 }, { "epoch": 0.7672476716008292, "grad_norm": 0.6713254892945458, "learning_rate": 1.2931062449310624e-06, "loss": 0.5956, "step": 26279 }, { "epoch": 0.7672768678286765, "grad_norm": 0.6720188919894697, "learning_rate": 1.2929440389294404e-06, "loss": 0.5732, "step": 26280 }, { "epoch": 0.7673060640565239, "grad_norm": 0.7378975824163033, "learning_rate": 1.2927818329278186e-06, "loss": 0.6321, "step": 26281 }, { "epoch": 0.7673352602843713, "grad_norm": 0.7228377174817315, "learning_rate": 1.2926196269261964e-06, "loss": 0.6527, "step": 26282 }, { "epoch": 0.7673644565122186, "grad_norm": 0.6935197679819114, "learning_rate": 1.2924574209245744e-06, "loss": 0.5927, "step": 26283 }, { "epoch": 0.767393652740066, "grad_norm": 0.6956165204982877, "learning_rate": 1.2922952149229522e-06, "loss": 0.5804, "step": 26284 }, { "epoch": 0.7674228489679134, "grad_norm": 0.6967136551882606, "learning_rate": 1.2921330089213302e-06, "loss": 0.6216, "step": 26285 }, { "epoch": 0.7674520451957607, "grad_norm": 0.7306766576149746, "learning_rate": 1.2919708029197082e-06, "loss": 0.6097, "step": 26286 }, { "epoch": 0.7674812414236081, "grad_norm": 0.7562320341555082, "learning_rate": 1.291808596918086e-06, "loss": 0.6633, "step": 26287 }, { "epoch": 0.7675104376514554, "grad_norm": 0.7643111039731483, "learning_rate": 1.291646390916464e-06, "loss": 0.6966, "step": 26288 }, { "epoch": 0.7675396338793028, "grad_norm": 0.7084589272973939, "learning_rate": 1.2914841849148418e-06, "loss": 0.6323, "step": 26289 }, { "epoch": 0.7675688301071502, "grad_norm": 0.6841350734508655, "learning_rate": 1.29132197891322e-06, "loss": 0.5864, "step": 26290 }, { "epoch": 0.7675980263349975, "grad_norm": 0.689923861403786, "learning_rate": 1.291159772911598e-06, "loss": 0.5835, "step": 26291 }, { "epoch": 0.7676272225628449, "grad_norm": 0.8165191231818633, "learning_rate": 1.2909975669099758e-06, "loss": 0.7432, "step": 26292 }, { "epoch": 0.7676564187906922, "grad_norm": 0.727361670457766, "learning_rate": 1.2908353609083538e-06, "loss": 0.6385, "step": 26293 }, { "epoch": 0.7676856150185396, "grad_norm": 0.7510624978350046, "learning_rate": 1.2906731549067316e-06, "loss": 0.7175, "step": 26294 }, { "epoch": 0.767714811246387, "grad_norm": 0.7299872720958922, "learning_rate": 1.2905109489051096e-06, "loss": 0.6946, "step": 26295 }, { "epoch": 0.7677440074742343, "grad_norm": 0.7438722482991394, "learning_rate": 1.2903487429034874e-06, "loss": 0.6698, "step": 26296 }, { "epoch": 0.7677732037020817, "grad_norm": 0.6856432680518677, "learning_rate": 1.2901865369018654e-06, "loss": 0.5844, "step": 26297 }, { "epoch": 0.767802399929929, "grad_norm": 0.714537171000209, "learning_rate": 1.2900243309002432e-06, "loss": 0.6448, "step": 26298 }, { "epoch": 0.7678315961577764, "grad_norm": 0.8827077280194042, "learning_rate": 1.2898621248986212e-06, "loss": 0.7077, "step": 26299 }, { "epoch": 0.7678607923856238, "grad_norm": 0.7003571471021939, "learning_rate": 1.2896999188969995e-06, "loss": 0.6142, "step": 26300 }, { "epoch": 0.7678899886134711, "grad_norm": 0.8224932467601392, "learning_rate": 1.2895377128953773e-06, "loss": 0.7503, "step": 26301 }, { "epoch": 0.7679191848413185, "grad_norm": 0.797193662664574, "learning_rate": 1.2893755068937553e-06, "loss": 0.6207, "step": 26302 }, { "epoch": 0.7679483810691659, "grad_norm": 0.7622838916674026, "learning_rate": 1.289213300892133e-06, "loss": 0.7176, "step": 26303 }, { "epoch": 0.7679775772970132, "grad_norm": 0.7701932732649668, "learning_rate": 1.289051094890511e-06, "loss": 0.7427, "step": 26304 }, { "epoch": 0.7680067735248606, "grad_norm": 0.701704860312832, "learning_rate": 1.288888888888889e-06, "loss": 0.6131, "step": 26305 }, { "epoch": 0.7680359697527079, "grad_norm": 0.73913908760291, "learning_rate": 1.2887266828872669e-06, "loss": 0.7166, "step": 26306 }, { "epoch": 0.7680651659805553, "grad_norm": 0.6827207469228302, "learning_rate": 1.2885644768856449e-06, "loss": 0.6247, "step": 26307 }, { "epoch": 0.7680943622084027, "grad_norm": 0.8184813221306708, "learning_rate": 1.2884022708840227e-06, "loss": 0.7872, "step": 26308 }, { "epoch": 0.76812355843625, "grad_norm": 0.7484711276619995, "learning_rate": 1.2882400648824009e-06, "loss": 0.6038, "step": 26309 }, { "epoch": 0.7681527546640974, "grad_norm": 0.7238403677139842, "learning_rate": 1.2880778588807789e-06, "loss": 0.6412, "step": 26310 }, { "epoch": 0.7681819508919447, "grad_norm": 0.720594671750397, "learning_rate": 1.2879156528791567e-06, "loss": 0.5841, "step": 26311 }, { "epoch": 0.7682111471197921, "grad_norm": 0.6989751942379026, "learning_rate": 1.2877534468775347e-06, "loss": 0.611, "step": 26312 }, { "epoch": 0.7682403433476395, "grad_norm": 0.7696806586175591, "learning_rate": 1.2875912408759125e-06, "loss": 0.7197, "step": 26313 }, { "epoch": 0.7682695395754868, "grad_norm": 0.7093176870749157, "learning_rate": 1.2874290348742905e-06, "loss": 0.5897, "step": 26314 }, { "epoch": 0.7682987358033342, "grad_norm": 0.7415425643385638, "learning_rate": 1.2872668288726683e-06, "loss": 0.608, "step": 26315 }, { "epoch": 0.7683279320311815, "grad_norm": 0.6772990117903795, "learning_rate": 1.2871046228710463e-06, "loss": 0.5678, "step": 26316 }, { "epoch": 0.7683571282590289, "grad_norm": 0.7241458730703298, "learning_rate": 1.286942416869424e-06, "loss": 0.6412, "step": 26317 }, { "epoch": 0.7683863244868763, "grad_norm": 0.7324038009175423, "learning_rate": 1.286780210867802e-06, "loss": 0.6796, "step": 26318 }, { "epoch": 0.7684155207147236, "grad_norm": 0.6685783624130796, "learning_rate": 1.2866180048661803e-06, "loss": 0.5419, "step": 26319 }, { "epoch": 0.768444716942571, "grad_norm": 0.7107374412493875, "learning_rate": 1.2864557988645581e-06, "loss": 0.6406, "step": 26320 }, { "epoch": 0.7684739131704184, "grad_norm": 0.6644957963667185, "learning_rate": 1.2862935928629361e-06, "loss": 0.549, "step": 26321 }, { "epoch": 0.7685031093982657, "grad_norm": 0.6976617918010828, "learning_rate": 1.286131386861314e-06, "loss": 0.5851, "step": 26322 }, { "epoch": 0.7685323056261131, "grad_norm": 0.7926114310018714, "learning_rate": 1.285969180859692e-06, "loss": 0.6554, "step": 26323 }, { "epoch": 0.7685615018539604, "grad_norm": 0.7207275741156658, "learning_rate": 1.28580697485807e-06, "loss": 0.6051, "step": 26324 }, { "epoch": 0.7685906980818078, "grad_norm": 0.7151778298210244, "learning_rate": 1.2856447688564477e-06, "loss": 0.643, "step": 26325 }, { "epoch": 0.7686198943096552, "grad_norm": 0.71390381795101, "learning_rate": 1.2854825628548257e-06, "loss": 0.6598, "step": 26326 }, { "epoch": 0.7686490905375025, "grad_norm": 0.7087155185827566, "learning_rate": 1.2853203568532035e-06, "loss": 0.5982, "step": 26327 }, { "epoch": 0.7686782867653499, "grad_norm": 0.6768420121302521, "learning_rate": 1.2851581508515817e-06, "loss": 0.5475, "step": 26328 }, { "epoch": 0.7687074829931972, "grad_norm": 0.7246095694838107, "learning_rate": 1.2849959448499597e-06, "loss": 0.65, "step": 26329 }, { "epoch": 0.7687366792210446, "grad_norm": 0.7444666984935556, "learning_rate": 1.2848337388483375e-06, "loss": 0.6627, "step": 26330 }, { "epoch": 0.768765875448892, "grad_norm": 0.6952011359728418, "learning_rate": 1.2846715328467155e-06, "loss": 0.6084, "step": 26331 }, { "epoch": 0.7687950716767393, "grad_norm": 0.7401186263316131, "learning_rate": 1.2845093268450933e-06, "loss": 0.6743, "step": 26332 }, { "epoch": 0.7688242679045867, "grad_norm": 0.7343465185497051, "learning_rate": 1.2843471208434714e-06, "loss": 0.634, "step": 26333 }, { "epoch": 0.768853464132434, "grad_norm": 0.6986947668749431, "learning_rate": 1.2841849148418491e-06, "loss": 0.5875, "step": 26334 }, { "epoch": 0.7688826603602814, "grad_norm": 0.7131955996593728, "learning_rate": 1.2840227088402272e-06, "loss": 0.6145, "step": 26335 }, { "epoch": 0.7689118565881288, "grad_norm": 0.7283791612192082, "learning_rate": 1.283860502838605e-06, "loss": 0.6242, "step": 26336 }, { "epoch": 0.7689410528159761, "grad_norm": 0.7095299246148645, "learning_rate": 1.283698296836983e-06, "loss": 0.6022, "step": 26337 }, { "epoch": 0.7689702490438235, "grad_norm": 0.7246900910326564, "learning_rate": 1.2835360908353612e-06, "loss": 0.6362, "step": 26338 }, { "epoch": 0.7689994452716709, "grad_norm": 0.668345607068524, "learning_rate": 1.283373884833739e-06, "loss": 0.5735, "step": 26339 }, { "epoch": 0.7690286414995182, "grad_norm": 0.7676541529408867, "learning_rate": 1.283211678832117e-06, "loss": 0.7541, "step": 26340 }, { "epoch": 0.7690578377273656, "grad_norm": 0.6930156191784416, "learning_rate": 1.2830494728304948e-06, "loss": 0.6154, "step": 26341 }, { "epoch": 0.7690870339552129, "grad_norm": 0.7313238003128039, "learning_rate": 1.2828872668288728e-06, "loss": 0.6755, "step": 26342 }, { "epoch": 0.7691162301830603, "grad_norm": 0.7006138963659899, "learning_rate": 1.2827250608272508e-06, "loss": 0.6093, "step": 26343 }, { "epoch": 0.7691454264109077, "grad_norm": 0.7102532059133358, "learning_rate": 1.2825628548256286e-06, "loss": 0.6114, "step": 26344 }, { "epoch": 0.769174622638755, "grad_norm": 0.7442787870597289, "learning_rate": 1.2824006488240066e-06, "loss": 0.6424, "step": 26345 }, { "epoch": 0.7692038188666024, "grad_norm": 0.7461977588016647, "learning_rate": 1.2822384428223844e-06, "loss": 0.6548, "step": 26346 }, { "epoch": 0.7692330150944497, "grad_norm": 0.7254783271458838, "learning_rate": 1.2820762368207626e-06, "loss": 0.6565, "step": 26347 }, { "epoch": 0.7692622113222971, "grad_norm": 0.6954147641813001, "learning_rate": 1.2819140308191406e-06, "loss": 0.5857, "step": 26348 }, { "epoch": 0.7692914075501445, "grad_norm": 0.7379236895336269, "learning_rate": 1.2817518248175184e-06, "loss": 0.6921, "step": 26349 }, { "epoch": 0.7693206037779918, "grad_norm": 0.738854970162183, "learning_rate": 1.2815896188158964e-06, "loss": 0.6827, "step": 26350 }, { "epoch": 0.7693498000058392, "grad_norm": 0.7055009973104274, "learning_rate": 1.2814274128142742e-06, "loss": 0.6749, "step": 26351 }, { "epoch": 0.7693789962336867, "grad_norm": 0.7129289638322112, "learning_rate": 1.2812652068126522e-06, "loss": 0.6345, "step": 26352 }, { "epoch": 0.769408192461534, "grad_norm": 0.7536548428839435, "learning_rate": 1.28110300081103e-06, "loss": 0.6821, "step": 26353 }, { "epoch": 0.7694373886893814, "grad_norm": 0.70258977408863, "learning_rate": 1.280940794809408e-06, "loss": 0.6304, "step": 26354 }, { "epoch": 0.7694665849172287, "grad_norm": 0.7994413276420105, "learning_rate": 1.2807785888077858e-06, "loss": 0.7464, "step": 26355 }, { "epoch": 0.7694957811450761, "grad_norm": 0.7262043793275105, "learning_rate": 1.280616382806164e-06, "loss": 0.6533, "step": 26356 }, { "epoch": 0.7695249773729235, "grad_norm": 0.7142438029359428, "learning_rate": 1.280454176804542e-06, "loss": 0.6455, "step": 26357 }, { "epoch": 0.7695541736007708, "grad_norm": 0.7760725866029085, "learning_rate": 1.2802919708029198e-06, "loss": 0.7484, "step": 26358 }, { "epoch": 0.7695833698286182, "grad_norm": 0.7908394709129958, "learning_rate": 1.2801297648012978e-06, "loss": 0.7311, "step": 26359 }, { "epoch": 0.7696125660564656, "grad_norm": 0.7763804496417447, "learning_rate": 1.2799675587996756e-06, "loss": 0.7383, "step": 26360 }, { "epoch": 0.7696417622843129, "grad_norm": 0.7217063835384842, "learning_rate": 1.2798053527980536e-06, "loss": 0.645, "step": 26361 }, { "epoch": 0.7696709585121603, "grad_norm": 0.7193391379851147, "learning_rate": 1.2796431467964314e-06, "loss": 0.6544, "step": 26362 }, { "epoch": 0.7697001547400076, "grad_norm": 0.7273034457222958, "learning_rate": 1.2794809407948094e-06, "loss": 0.6784, "step": 26363 }, { "epoch": 0.769729350967855, "grad_norm": 0.708035938168414, "learning_rate": 1.2793187347931874e-06, "loss": 0.5662, "step": 26364 }, { "epoch": 0.7697585471957024, "grad_norm": 0.7361298277199569, "learning_rate": 1.2791565287915652e-06, "loss": 0.6827, "step": 26365 }, { "epoch": 0.7697877434235497, "grad_norm": 0.7682716161943283, "learning_rate": 1.2789943227899435e-06, "loss": 0.7202, "step": 26366 }, { "epoch": 0.7698169396513971, "grad_norm": 0.8057912585672977, "learning_rate": 1.2788321167883215e-06, "loss": 0.6918, "step": 26367 }, { "epoch": 0.7698461358792444, "grad_norm": 0.7003666694140972, "learning_rate": 1.2786699107866993e-06, "loss": 0.635, "step": 26368 }, { "epoch": 0.7698753321070918, "grad_norm": 0.700555187712265, "learning_rate": 1.2785077047850773e-06, "loss": 0.5928, "step": 26369 }, { "epoch": 0.7699045283349392, "grad_norm": 0.7462100194475618, "learning_rate": 1.278345498783455e-06, "loss": 0.6373, "step": 26370 }, { "epoch": 0.7699337245627865, "grad_norm": 0.7207835950808046, "learning_rate": 1.278183292781833e-06, "loss": 0.6138, "step": 26371 }, { "epoch": 0.7699629207906339, "grad_norm": 0.7273468310230194, "learning_rate": 1.2780210867802109e-06, "loss": 0.6274, "step": 26372 }, { "epoch": 0.7699921170184812, "grad_norm": 0.7624898798097988, "learning_rate": 1.2778588807785889e-06, "loss": 0.7065, "step": 26373 }, { "epoch": 0.7700213132463286, "grad_norm": 0.6768039459708228, "learning_rate": 1.2776966747769667e-06, "loss": 0.5627, "step": 26374 }, { "epoch": 0.770050509474176, "grad_norm": 0.667503919210988, "learning_rate": 1.2775344687753449e-06, "loss": 0.5701, "step": 26375 }, { "epoch": 0.7700797057020233, "grad_norm": 0.7682445691601579, "learning_rate": 1.2773722627737229e-06, "loss": 0.7488, "step": 26376 }, { "epoch": 0.7701089019298707, "grad_norm": 0.7433153066511302, "learning_rate": 1.2772100567721007e-06, "loss": 0.7077, "step": 26377 }, { "epoch": 0.770138098157718, "grad_norm": 0.7278182543313511, "learning_rate": 1.2770478507704787e-06, "loss": 0.6682, "step": 26378 }, { "epoch": 0.7701672943855654, "grad_norm": 0.8300986494410388, "learning_rate": 1.2768856447688565e-06, "loss": 0.6486, "step": 26379 }, { "epoch": 0.7701964906134128, "grad_norm": 0.6933194923217835, "learning_rate": 1.2767234387672345e-06, "loss": 0.5546, "step": 26380 }, { "epoch": 0.7702256868412601, "grad_norm": 0.7512297369829175, "learning_rate": 1.2765612327656123e-06, "loss": 0.6829, "step": 26381 }, { "epoch": 0.7702548830691075, "grad_norm": 0.7309697105376002, "learning_rate": 1.2763990267639903e-06, "loss": 0.6901, "step": 26382 }, { "epoch": 0.7702840792969549, "grad_norm": 0.7272887060065647, "learning_rate": 1.2762368207623683e-06, "loss": 0.6503, "step": 26383 }, { "epoch": 0.7703132755248022, "grad_norm": 0.7057903135129017, "learning_rate": 1.276074614760746e-06, "loss": 0.6237, "step": 26384 }, { "epoch": 0.7703424717526496, "grad_norm": 0.7305359734767821, "learning_rate": 1.2759124087591243e-06, "loss": 0.662, "step": 26385 }, { "epoch": 0.7703716679804969, "grad_norm": 1.050366162666302, "learning_rate": 1.2757502027575023e-06, "loss": 0.7375, "step": 26386 }, { "epoch": 0.7704008642083443, "grad_norm": 0.7379478844041222, "learning_rate": 1.2755879967558801e-06, "loss": 0.6541, "step": 26387 }, { "epoch": 0.7704300604361917, "grad_norm": 0.629331083958859, "learning_rate": 1.2754257907542581e-06, "loss": 0.4967, "step": 26388 }, { "epoch": 0.770459256664039, "grad_norm": 0.717606201518791, "learning_rate": 1.275263584752636e-06, "loss": 0.5856, "step": 26389 }, { "epoch": 0.7704884528918864, "grad_norm": 0.6951356105503734, "learning_rate": 1.275101378751014e-06, "loss": 0.6064, "step": 26390 }, { "epoch": 0.7705176491197337, "grad_norm": 0.7400389313565372, "learning_rate": 1.2749391727493917e-06, "loss": 0.6436, "step": 26391 }, { "epoch": 0.7705468453475811, "grad_norm": 0.7389592239377352, "learning_rate": 1.2747769667477697e-06, "loss": 0.6588, "step": 26392 }, { "epoch": 0.7705760415754285, "grad_norm": 0.685370730069833, "learning_rate": 1.2746147607461475e-06, "loss": 0.5703, "step": 26393 }, { "epoch": 0.7706052378032758, "grad_norm": 0.7395496322532481, "learning_rate": 1.2744525547445257e-06, "loss": 0.6333, "step": 26394 }, { "epoch": 0.7706344340311232, "grad_norm": 0.7594907747987723, "learning_rate": 1.2742903487429037e-06, "loss": 0.6967, "step": 26395 }, { "epoch": 0.7706636302589706, "grad_norm": 0.7435192607272416, "learning_rate": 1.2741281427412815e-06, "loss": 0.6846, "step": 26396 }, { "epoch": 0.7706928264868179, "grad_norm": 0.6587385816841183, "learning_rate": 1.2739659367396596e-06, "loss": 0.5453, "step": 26397 }, { "epoch": 0.7707220227146653, "grad_norm": 0.7182929953400697, "learning_rate": 1.2738037307380373e-06, "loss": 0.5866, "step": 26398 }, { "epoch": 0.7707512189425126, "grad_norm": 0.7152926360094716, "learning_rate": 1.2736415247364154e-06, "loss": 0.6647, "step": 26399 }, { "epoch": 0.77078041517036, "grad_norm": 0.7953537518259477, "learning_rate": 1.2734793187347931e-06, "loss": 0.7707, "step": 26400 }, { "epoch": 0.7708096113982074, "grad_norm": 0.7095998660907453, "learning_rate": 1.2733171127331712e-06, "loss": 0.6372, "step": 26401 }, { "epoch": 0.7708388076260547, "grad_norm": 0.7174225220916387, "learning_rate": 1.2731549067315492e-06, "loss": 0.6368, "step": 26402 }, { "epoch": 0.7708680038539021, "grad_norm": 0.7006248250233943, "learning_rate": 1.272992700729927e-06, "loss": 0.5892, "step": 26403 }, { "epoch": 0.7708972000817494, "grad_norm": 0.7245340220268339, "learning_rate": 1.2728304947283052e-06, "loss": 0.6615, "step": 26404 }, { "epoch": 0.7709263963095968, "grad_norm": 0.7292853882390962, "learning_rate": 1.2726682887266832e-06, "loss": 0.6551, "step": 26405 }, { "epoch": 0.7709555925374442, "grad_norm": 0.7832432935304882, "learning_rate": 1.272506082725061e-06, "loss": 0.7274, "step": 26406 }, { "epoch": 0.7709847887652915, "grad_norm": 0.7544178333716502, "learning_rate": 1.272343876723439e-06, "loss": 0.6513, "step": 26407 }, { "epoch": 0.7710139849931389, "grad_norm": 0.8452590424478751, "learning_rate": 1.2721816707218168e-06, "loss": 0.6624, "step": 26408 }, { "epoch": 0.7710431812209863, "grad_norm": 0.7673742980035146, "learning_rate": 1.2720194647201948e-06, "loss": 0.6899, "step": 26409 }, { "epoch": 0.7710723774488336, "grad_norm": 0.7021900132401254, "learning_rate": 1.2718572587185726e-06, "loss": 0.6412, "step": 26410 }, { "epoch": 0.771101573676681, "grad_norm": 0.752025195209283, "learning_rate": 1.2716950527169506e-06, "loss": 0.7458, "step": 26411 }, { "epoch": 0.7711307699045283, "grad_norm": 0.7911959711724423, "learning_rate": 1.2715328467153284e-06, "loss": 0.7652, "step": 26412 }, { "epoch": 0.7711599661323757, "grad_norm": 0.7670431264567557, "learning_rate": 1.2713706407137066e-06, "loss": 0.6459, "step": 26413 }, { "epoch": 0.7711891623602231, "grad_norm": 0.7469011613526413, "learning_rate": 1.2712084347120846e-06, "loss": 0.6599, "step": 26414 }, { "epoch": 0.7712183585880704, "grad_norm": 0.9549904022444999, "learning_rate": 1.2710462287104624e-06, "loss": 0.6872, "step": 26415 }, { "epoch": 0.7712475548159178, "grad_norm": 0.7354812511433956, "learning_rate": 1.2708840227088404e-06, "loss": 0.6409, "step": 26416 }, { "epoch": 0.7712767510437651, "grad_norm": 0.703479032681121, "learning_rate": 1.2707218167072182e-06, "loss": 0.6189, "step": 26417 }, { "epoch": 0.7713059472716125, "grad_norm": 0.7213462461490828, "learning_rate": 1.2705596107055962e-06, "loss": 0.6226, "step": 26418 }, { "epoch": 0.7713351434994599, "grad_norm": 0.8890088292799059, "learning_rate": 1.270397404703974e-06, "loss": 0.6711, "step": 26419 }, { "epoch": 0.7713643397273072, "grad_norm": 0.7196105634392972, "learning_rate": 1.270235198702352e-06, "loss": 0.6487, "step": 26420 }, { "epoch": 0.7713935359551546, "grad_norm": 0.7148959846255994, "learning_rate": 1.27007299270073e-06, "loss": 0.5807, "step": 26421 }, { "epoch": 0.771422732183002, "grad_norm": 0.751749969735522, "learning_rate": 1.2699107866991078e-06, "loss": 0.7632, "step": 26422 }, { "epoch": 0.7714519284108493, "grad_norm": 0.7389251883364859, "learning_rate": 1.269748580697486e-06, "loss": 0.6304, "step": 26423 }, { "epoch": 0.7714811246386967, "grad_norm": 0.7503454131130223, "learning_rate": 1.2695863746958638e-06, "loss": 0.6342, "step": 26424 }, { "epoch": 0.771510320866544, "grad_norm": 0.7352514039740174, "learning_rate": 1.2694241686942418e-06, "loss": 0.6218, "step": 26425 }, { "epoch": 0.7715395170943914, "grad_norm": 0.7125359065514181, "learning_rate": 1.2692619626926198e-06, "loss": 0.6264, "step": 26426 }, { "epoch": 0.7715687133222388, "grad_norm": 0.7949459221845757, "learning_rate": 1.2690997566909976e-06, "loss": 0.6513, "step": 26427 }, { "epoch": 0.7715979095500861, "grad_norm": 0.7747024965006318, "learning_rate": 1.2689375506893756e-06, "loss": 0.7052, "step": 26428 }, { "epoch": 0.7716271057779335, "grad_norm": 0.7317049548576355, "learning_rate": 1.2687753446877534e-06, "loss": 0.6776, "step": 26429 }, { "epoch": 0.7716563020057808, "grad_norm": 0.6878232058071159, "learning_rate": 1.2686131386861314e-06, "loss": 0.6181, "step": 26430 }, { "epoch": 0.7716854982336282, "grad_norm": 0.7377952398070602, "learning_rate": 1.2684509326845092e-06, "loss": 0.6556, "step": 26431 }, { "epoch": 0.7717146944614756, "grad_norm": 0.7349285133969233, "learning_rate": 1.2682887266828875e-06, "loss": 0.6509, "step": 26432 }, { "epoch": 0.7717438906893229, "grad_norm": 0.7055802242261958, "learning_rate": 1.2681265206812655e-06, "loss": 0.6396, "step": 26433 }, { "epoch": 0.7717730869171703, "grad_norm": 0.7217369510050867, "learning_rate": 1.2679643146796433e-06, "loss": 0.6214, "step": 26434 }, { "epoch": 0.7718022831450176, "grad_norm": 0.6839658025499143, "learning_rate": 1.2678021086780213e-06, "loss": 0.5985, "step": 26435 }, { "epoch": 0.771831479372865, "grad_norm": 0.7041771754886951, "learning_rate": 1.267639902676399e-06, "loss": 0.6155, "step": 26436 }, { "epoch": 0.7718606756007124, "grad_norm": 0.8026354802414343, "learning_rate": 1.267477696674777e-06, "loss": 0.694, "step": 26437 }, { "epoch": 0.7718898718285597, "grad_norm": 0.7282833932018488, "learning_rate": 1.2673154906731549e-06, "loss": 0.6746, "step": 26438 }, { "epoch": 0.7719190680564071, "grad_norm": 0.8098696456919124, "learning_rate": 1.2671532846715329e-06, "loss": 0.6949, "step": 26439 }, { "epoch": 0.7719482642842544, "grad_norm": 0.6788182819585105, "learning_rate": 1.2669910786699109e-06, "loss": 0.5817, "step": 26440 }, { "epoch": 0.7719774605121018, "grad_norm": 0.7296540278055827, "learning_rate": 1.2668288726682889e-06, "loss": 0.6345, "step": 26441 }, { "epoch": 0.7720066567399492, "grad_norm": 0.6737782862157997, "learning_rate": 1.2666666666666669e-06, "loss": 0.5742, "step": 26442 }, { "epoch": 0.7720358529677965, "grad_norm": 0.8461961111272128, "learning_rate": 1.2665044606650447e-06, "loss": 0.6423, "step": 26443 }, { "epoch": 0.7720650491956439, "grad_norm": 0.6839362267547293, "learning_rate": 1.2663422546634227e-06, "loss": 0.6073, "step": 26444 }, { "epoch": 0.7720942454234913, "grad_norm": 0.7254754049143659, "learning_rate": 1.2661800486618007e-06, "loss": 0.6462, "step": 26445 }, { "epoch": 0.7721234416513386, "grad_norm": 0.7663859585019178, "learning_rate": 1.2660178426601785e-06, "loss": 0.7038, "step": 26446 }, { "epoch": 0.772152637879186, "grad_norm": 0.7059888284072314, "learning_rate": 1.2658556366585565e-06, "loss": 0.6054, "step": 26447 }, { "epoch": 0.7721818341070333, "grad_norm": 0.7102854435854628, "learning_rate": 1.2656934306569343e-06, "loss": 0.6097, "step": 26448 }, { "epoch": 0.7722110303348807, "grad_norm": 0.7018250336582667, "learning_rate": 1.2655312246553123e-06, "loss": 0.6393, "step": 26449 }, { "epoch": 0.7722402265627281, "grad_norm": 0.697408418318338, "learning_rate": 1.26536901865369e-06, "loss": 0.6156, "step": 26450 }, { "epoch": 0.7722694227905754, "grad_norm": 0.7455628904301091, "learning_rate": 1.2652068126520683e-06, "loss": 0.6598, "step": 26451 }, { "epoch": 0.7722986190184228, "grad_norm": 0.7359643948484657, "learning_rate": 1.2650446066504463e-06, "loss": 0.6711, "step": 26452 }, { "epoch": 0.7723278152462701, "grad_norm": 0.7147234485245411, "learning_rate": 1.2648824006488241e-06, "loss": 0.6068, "step": 26453 }, { "epoch": 0.7723570114741175, "grad_norm": 0.7411260823718616, "learning_rate": 1.2647201946472021e-06, "loss": 0.6316, "step": 26454 }, { "epoch": 0.7723862077019649, "grad_norm": 0.7222006493480136, "learning_rate": 1.26455798864558e-06, "loss": 0.6537, "step": 26455 }, { "epoch": 0.7724154039298122, "grad_norm": 0.740288339749251, "learning_rate": 1.264395782643958e-06, "loss": 0.6654, "step": 26456 }, { "epoch": 0.7724446001576596, "grad_norm": 0.7325832068433834, "learning_rate": 1.2642335766423357e-06, "loss": 0.6308, "step": 26457 }, { "epoch": 0.772473796385507, "grad_norm": 0.7863632626979168, "learning_rate": 1.2640713706407137e-06, "loss": 0.6967, "step": 26458 }, { "epoch": 0.7725029926133543, "grad_norm": 0.7494302411459871, "learning_rate": 1.2639091646390917e-06, "loss": 0.6373, "step": 26459 }, { "epoch": 0.7725321888412017, "grad_norm": 0.7511868178396902, "learning_rate": 1.2637469586374697e-06, "loss": 0.6968, "step": 26460 }, { "epoch": 0.772561385069049, "grad_norm": 0.7721500998404526, "learning_rate": 1.2635847526358478e-06, "loss": 0.7072, "step": 26461 }, { "epoch": 0.7725905812968964, "grad_norm": 0.660931937551156, "learning_rate": 1.2634225466342255e-06, "loss": 0.5585, "step": 26462 }, { "epoch": 0.7726197775247438, "grad_norm": 0.7410600097074334, "learning_rate": 1.2632603406326036e-06, "loss": 0.6445, "step": 26463 }, { "epoch": 0.7726489737525911, "grad_norm": 0.7354742366141104, "learning_rate": 1.2630981346309816e-06, "loss": 0.5973, "step": 26464 }, { "epoch": 0.7726781699804385, "grad_norm": 0.7154305101842665, "learning_rate": 1.2629359286293594e-06, "loss": 0.6245, "step": 26465 }, { "epoch": 0.7727073662082858, "grad_norm": 0.7316520462673054, "learning_rate": 1.2627737226277374e-06, "loss": 0.6465, "step": 26466 }, { "epoch": 0.7727365624361332, "grad_norm": 0.6628040098896413, "learning_rate": 1.2626115166261152e-06, "loss": 0.5293, "step": 26467 }, { "epoch": 0.7727657586639806, "grad_norm": 0.7206819754287992, "learning_rate": 1.2624493106244932e-06, "loss": 0.6378, "step": 26468 }, { "epoch": 0.7727949548918279, "grad_norm": 0.7086044164565607, "learning_rate": 1.262287104622871e-06, "loss": 0.6049, "step": 26469 }, { "epoch": 0.7728241511196753, "grad_norm": 0.6913893430105249, "learning_rate": 1.2621248986212492e-06, "loss": 0.5554, "step": 26470 }, { "epoch": 0.7728533473475226, "grad_norm": 0.7271798282107699, "learning_rate": 1.2619626926196272e-06, "loss": 0.6136, "step": 26471 }, { "epoch": 0.77288254357537, "grad_norm": 0.7405994177350735, "learning_rate": 1.261800486618005e-06, "loss": 0.6852, "step": 26472 }, { "epoch": 0.7729117398032175, "grad_norm": 0.734657942362911, "learning_rate": 1.261638280616383e-06, "loss": 0.7072, "step": 26473 }, { "epoch": 0.7729409360310648, "grad_norm": 0.7208770140152274, "learning_rate": 1.2614760746147608e-06, "loss": 0.6438, "step": 26474 }, { "epoch": 0.7729701322589122, "grad_norm": 0.7334121214424522, "learning_rate": 1.2613138686131388e-06, "loss": 0.6429, "step": 26475 }, { "epoch": 0.7729993284867596, "grad_norm": 0.7333498393243691, "learning_rate": 1.2611516626115166e-06, "loss": 0.6801, "step": 26476 }, { "epoch": 0.7730285247146069, "grad_norm": 0.8027445538875706, "learning_rate": 1.2609894566098946e-06, "loss": 0.7152, "step": 26477 }, { "epoch": 0.7730577209424543, "grad_norm": 0.6870677625515682, "learning_rate": 1.2608272506082726e-06, "loss": 0.5929, "step": 26478 }, { "epoch": 0.7730869171703016, "grad_norm": 0.7310376458863977, "learning_rate": 1.2606650446066506e-06, "loss": 0.6891, "step": 26479 }, { "epoch": 0.773116113398149, "grad_norm": 0.7090300144429614, "learning_rate": 1.2605028386050286e-06, "loss": 0.6035, "step": 26480 }, { "epoch": 0.7731453096259964, "grad_norm": 0.7331227113390544, "learning_rate": 1.2603406326034064e-06, "loss": 0.687, "step": 26481 }, { "epoch": 0.7731745058538437, "grad_norm": 0.6827550982782402, "learning_rate": 1.2601784266017844e-06, "loss": 0.5611, "step": 26482 }, { "epoch": 0.7732037020816911, "grad_norm": 0.7374048960327139, "learning_rate": 1.2600162206001624e-06, "loss": 0.675, "step": 26483 }, { "epoch": 0.7732328983095385, "grad_norm": 0.7119747940183605, "learning_rate": 1.2598540145985402e-06, "loss": 0.6449, "step": 26484 }, { "epoch": 0.7732620945373858, "grad_norm": 0.800624685948416, "learning_rate": 1.2596918085969182e-06, "loss": 0.7032, "step": 26485 }, { "epoch": 0.7732912907652332, "grad_norm": 0.6882621957037877, "learning_rate": 1.259529602595296e-06, "loss": 0.6036, "step": 26486 }, { "epoch": 0.7733204869930805, "grad_norm": 0.7179029812831996, "learning_rate": 1.259367396593674e-06, "loss": 0.6438, "step": 26487 }, { "epoch": 0.7733496832209279, "grad_norm": 0.8710336259537929, "learning_rate": 1.2592051905920518e-06, "loss": 0.6597, "step": 26488 }, { "epoch": 0.7733788794487753, "grad_norm": 0.7949183774755931, "learning_rate": 1.25904298459043e-06, "loss": 0.7102, "step": 26489 }, { "epoch": 0.7734080756766226, "grad_norm": 0.7356084951936096, "learning_rate": 1.258880778588808e-06, "loss": 0.6295, "step": 26490 }, { "epoch": 0.77343727190447, "grad_norm": 0.7190907090883553, "learning_rate": 1.2587185725871858e-06, "loss": 0.658, "step": 26491 }, { "epoch": 0.7734664681323173, "grad_norm": 0.7020951439051676, "learning_rate": 1.2585563665855638e-06, "loss": 0.6287, "step": 26492 }, { "epoch": 0.7734956643601647, "grad_norm": 0.7075606698185489, "learning_rate": 1.2583941605839416e-06, "loss": 0.659, "step": 26493 }, { "epoch": 0.7735248605880121, "grad_norm": 0.6969046088184494, "learning_rate": 1.2582319545823196e-06, "loss": 0.6034, "step": 26494 }, { "epoch": 0.7735540568158594, "grad_norm": 0.773579540565704, "learning_rate": 1.2580697485806974e-06, "loss": 0.6991, "step": 26495 }, { "epoch": 0.7735832530437068, "grad_norm": 0.7199938143754292, "learning_rate": 1.2579075425790754e-06, "loss": 0.6492, "step": 26496 }, { "epoch": 0.7736124492715541, "grad_norm": 0.7458082388556451, "learning_rate": 1.2577453365774532e-06, "loss": 0.6819, "step": 26497 }, { "epoch": 0.7736416454994015, "grad_norm": 0.7037328280887301, "learning_rate": 1.2575831305758315e-06, "loss": 0.6286, "step": 26498 }, { "epoch": 0.7736708417272489, "grad_norm": 0.7364761822609375, "learning_rate": 1.2574209245742095e-06, "loss": 0.6267, "step": 26499 }, { "epoch": 0.7737000379550962, "grad_norm": 0.7076111942023445, "learning_rate": 1.2572587185725873e-06, "loss": 0.6811, "step": 26500 }, { "epoch": 0.7737292341829436, "grad_norm": 0.6937731904991485, "learning_rate": 1.2570965125709653e-06, "loss": 0.5947, "step": 26501 }, { "epoch": 0.773758430410791, "grad_norm": 0.6984005611710973, "learning_rate": 1.2569343065693433e-06, "loss": 0.6051, "step": 26502 }, { "epoch": 0.7737876266386383, "grad_norm": 0.6880957446908698, "learning_rate": 1.256772100567721e-06, "loss": 0.5823, "step": 26503 }, { "epoch": 0.7738168228664857, "grad_norm": 0.7027627945812241, "learning_rate": 1.256609894566099e-06, "loss": 0.6237, "step": 26504 }, { "epoch": 0.773846019094333, "grad_norm": 0.7492963474214005, "learning_rate": 1.2564476885644769e-06, "loss": 0.6299, "step": 26505 }, { "epoch": 0.7738752153221804, "grad_norm": 0.7788762712901521, "learning_rate": 1.2562854825628549e-06, "loss": 0.6413, "step": 26506 }, { "epoch": 0.7739044115500278, "grad_norm": 0.7541860463089344, "learning_rate": 1.256123276561233e-06, "loss": 0.6973, "step": 26507 }, { "epoch": 0.7739336077778751, "grad_norm": 0.6900425633327779, "learning_rate": 1.2559610705596109e-06, "loss": 0.6378, "step": 26508 }, { "epoch": 0.7739628040057225, "grad_norm": 0.7122463806195815, "learning_rate": 1.255798864557989e-06, "loss": 0.578, "step": 26509 }, { "epoch": 0.7739920002335698, "grad_norm": 0.6876460435288723, "learning_rate": 1.2556366585563667e-06, "loss": 0.6047, "step": 26510 }, { "epoch": 0.7740211964614172, "grad_norm": 0.7388364065093921, "learning_rate": 1.2554744525547447e-06, "loss": 0.6233, "step": 26511 }, { "epoch": 0.7740503926892646, "grad_norm": 0.7291198795349483, "learning_rate": 1.2553122465531225e-06, "loss": 0.7096, "step": 26512 }, { "epoch": 0.7740795889171119, "grad_norm": 0.7138235808189409, "learning_rate": 1.2551500405515005e-06, "loss": 0.6262, "step": 26513 }, { "epoch": 0.7741087851449593, "grad_norm": 0.7821783554564962, "learning_rate": 1.2549878345498783e-06, "loss": 0.6755, "step": 26514 }, { "epoch": 0.7741379813728066, "grad_norm": 0.7273901802849047, "learning_rate": 1.2548256285482563e-06, "loss": 0.6314, "step": 26515 }, { "epoch": 0.774167177600654, "grad_norm": 0.7349438332425162, "learning_rate": 1.254663422546634e-06, "loss": 0.6762, "step": 26516 }, { "epoch": 0.7741963738285014, "grad_norm": 0.7427279680739738, "learning_rate": 1.2545012165450123e-06, "loss": 0.6523, "step": 26517 }, { "epoch": 0.7742255700563487, "grad_norm": 0.7448485602757122, "learning_rate": 1.2543390105433903e-06, "loss": 0.6081, "step": 26518 }, { "epoch": 0.7742547662841961, "grad_norm": 0.7183209053700961, "learning_rate": 1.2541768045417681e-06, "loss": 0.584, "step": 26519 }, { "epoch": 0.7742839625120435, "grad_norm": 0.7154392661656506, "learning_rate": 1.2540145985401461e-06, "loss": 0.6039, "step": 26520 }, { "epoch": 0.7743131587398908, "grad_norm": 0.640590000952116, "learning_rate": 1.2538523925385241e-06, "loss": 0.54, "step": 26521 }, { "epoch": 0.7743423549677382, "grad_norm": 0.6908467328518207, "learning_rate": 1.253690186536902e-06, "loss": 0.5727, "step": 26522 }, { "epoch": 0.7743715511955855, "grad_norm": 0.7016972712677131, "learning_rate": 1.25352798053528e-06, "loss": 0.5953, "step": 26523 }, { "epoch": 0.7744007474234329, "grad_norm": 0.7074473011140551, "learning_rate": 1.2533657745336577e-06, "loss": 0.6271, "step": 26524 }, { "epoch": 0.7744299436512803, "grad_norm": 0.6844892807086099, "learning_rate": 1.2532035685320357e-06, "loss": 0.5705, "step": 26525 }, { "epoch": 0.7744591398791276, "grad_norm": 0.7158652976834506, "learning_rate": 1.253041362530414e-06, "loss": 0.6262, "step": 26526 }, { "epoch": 0.774488336106975, "grad_norm": 0.6914958278812559, "learning_rate": 1.2528791565287918e-06, "loss": 0.5961, "step": 26527 }, { "epoch": 0.7745175323348223, "grad_norm": 0.6807880466454328, "learning_rate": 1.2527169505271698e-06, "loss": 0.5421, "step": 26528 }, { "epoch": 0.7745467285626697, "grad_norm": 0.7076125618397963, "learning_rate": 1.2525547445255476e-06, "loss": 0.6692, "step": 26529 }, { "epoch": 0.7745759247905171, "grad_norm": 0.7307245671056511, "learning_rate": 1.2523925385239256e-06, "loss": 0.6626, "step": 26530 }, { "epoch": 0.7746051210183644, "grad_norm": 0.846898224847007, "learning_rate": 1.2522303325223034e-06, "loss": 0.5662, "step": 26531 }, { "epoch": 0.7746343172462118, "grad_norm": 0.803120958454881, "learning_rate": 1.2520681265206814e-06, "loss": 0.7057, "step": 26532 }, { "epoch": 0.7746635134740592, "grad_norm": 0.8269506560472933, "learning_rate": 1.2519059205190592e-06, "loss": 0.5673, "step": 26533 }, { "epoch": 0.7746927097019065, "grad_norm": 0.6996264994331803, "learning_rate": 1.2517437145174372e-06, "loss": 0.599, "step": 26534 }, { "epoch": 0.7747219059297539, "grad_norm": 0.7300433524064512, "learning_rate": 1.251581508515815e-06, "loss": 0.6254, "step": 26535 }, { "epoch": 0.7747511021576012, "grad_norm": 0.7042933100695471, "learning_rate": 1.2514193025141932e-06, "loss": 0.5813, "step": 26536 }, { "epoch": 0.7747802983854486, "grad_norm": 0.6636719501964551, "learning_rate": 1.2512570965125712e-06, "loss": 0.5788, "step": 26537 }, { "epoch": 0.774809494613296, "grad_norm": 0.7958061950104794, "learning_rate": 1.251094890510949e-06, "loss": 0.7439, "step": 26538 }, { "epoch": 0.7748386908411433, "grad_norm": 0.688138827886107, "learning_rate": 1.250932684509327e-06, "loss": 0.5861, "step": 26539 }, { "epoch": 0.7748678870689907, "grad_norm": 0.7581570813373187, "learning_rate": 1.250770478507705e-06, "loss": 0.6694, "step": 26540 }, { "epoch": 0.774897083296838, "grad_norm": 0.7853707789355313, "learning_rate": 1.2506082725060828e-06, "loss": 0.6895, "step": 26541 }, { "epoch": 0.7749262795246854, "grad_norm": 0.695944186824106, "learning_rate": 1.2504460665044608e-06, "loss": 0.5861, "step": 26542 }, { "epoch": 0.7749554757525328, "grad_norm": 0.7406652158636675, "learning_rate": 1.2502838605028386e-06, "loss": 0.6182, "step": 26543 }, { "epoch": 0.7749846719803801, "grad_norm": 0.8872958527134425, "learning_rate": 1.2501216545012166e-06, "loss": 0.7211, "step": 26544 }, { "epoch": 0.7750138682082275, "grad_norm": 0.7067625442182979, "learning_rate": 1.2499594484995946e-06, "loss": 0.6369, "step": 26545 }, { "epoch": 0.7750430644360748, "grad_norm": 0.688607524574093, "learning_rate": 1.2497972424979724e-06, "loss": 0.5729, "step": 26546 }, { "epoch": 0.7750722606639222, "grad_norm": 0.6730729140790832, "learning_rate": 1.2496350364963504e-06, "loss": 0.5415, "step": 26547 }, { "epoch": 0.7751014568917696, "grad_norm": 0.7445044797862462, "learning_rate": 1.2494728304947284e-06, "loss": 0.6893, "step": 26548 }, { "epoch": 0.7751306531196169, "grad_norm": 0.7852988819704665, "learning_rate": 1.2493106244931064e-06, "loss": 0.7617, "step": 26549 }, { "epoch": 0.7751598493474643, "grad_norm": 0.6853844406319288, "learning_rate": 1.2491484184914842e-06, "loss": 0.5986, "step": 26550 }, { "epoch": 0.7751890455753117, "grad_norm": 0.7986382967234694, "learning_rate": 1.2489862124898622e-06, "loss": 0.6881, "step": 26551 }, { "epoch": 0.775218241803159, "grad_norm": 0.7113324500807875, "learning_rate": 1.2488240064882402e-06, "loss": 0.6091, "step": 26552 }, { "epoch": 0.7752474380310064, "grad_norm": 0.6759409269010662, "learning_rate": 1.2486618004866182e-06, "loss": 0.5317, "step": 26553 }, { "epoch": 0.7752766342588537, "grad_norm": 0.7159611377813052, "learning_rate": 1.248499594484996e-06, "loss": 0.6721, "step": 26554 }, { "epoch": 0.7753058304867011, "grad_norm": 0.813425221384114, "learning_rate": 1.248337388483374e-06, "loss": 0.666, "step": 26555 }, { "epoch": 0.7753350267145485, "grad_norm": 0.7321174487441753, "learning_rate": 1.2481751824817518e-06, "loss": 0.6736, "step": 26556 }, { "epoch": 0.7753642229423958, "grad_norm": 0.6985145765483743, "learning_rate": 1.2480129764801298e-06, "loss": 0.597, "step": 26557 }, { "epoch": 0.7753934191702432, "grad_norm": 0.7154581197615314, "learning_rate": 1.2478507704785078e-06, "loss": 0.6194, "step": 26558 }, { "epoch": 0.7754226153980905, "grad_norm": 0.7376516173824011, "learning_rate": 1.2476885644768856e-06, "loss": 0.6414, "step": 26559 }, { "epoch": 0.7754518116259379, "grad_norm": 0.7945181136076921, "learning_rate": 1.2475263584752636e-06, "loss": 0.7356, "step": 26560 }, { "epoch": 0.7754810078537853, "grad_norm": 0.7294082741139606, "learning_rate": 1.2473641524736417e-06, "loss": 0.6715, "step": 26561 }, { "epoch": 0.7755102040816326, "grad_norm": 0.6885195536991616, "learning_rate": 1.2472019464720197e-06, "loss": 0.5943, "step": 26562 }, { "epoch": 0.77553940030948, "grad_norm": 0.7299100072274312, "learning_rate": 1.2470397404703975e-06, "loss": 0.6346, "step": 26563 }, { "epoch": 0.7755685965373273, "grad_norm": 0.7161671692416814, "learning_rate": 1.2468775344687755e-06, "loss": 0.5954, "step": 26564 }, { "epoch": 0.7755977927651747, "grad_norm": 0.7116634651998894, "learning_rate": 1.2467153284671533e-06, "loss": 0.6196, "step": 26565 }, { "epoch": 0.7756269889930221, "grad_norm": 0.7025826886178428, "learning_rate": 1.2465531224655315e-06, "loss": 0.6049, "step": 26566 }, { "epoch": 0.7756561852208694, "grad_norm": 0.6954402421191306, "learning_rate": 1.2463909164639093e-06, "loss": 0.5935, "step": 26567 }, { "epoch": 0.7756853814487168, "grad_norm": 0.741523394785339, "learning_rate": 1.2462287104622873e-06, "loss": 0.7028, "step": 26568 }, { "epoch": 0.7757145776765642, "grad_norm": 0.6861543269863482, "learning_rate": 1.246066504460665e-06, "loss": 0.5759, "step": 26569 }, { "epoch": 0.7757437739044115, "grad_norm": 0.7354018638489872, "learning_rate": 1.245904298459043e-06, "loss": 0.6265, "step": 26570 }, { "epoch": 0.7757729701322589, "grad_norm": 0.7611785248067985, "learning_rate": 1.245742092457421e-06, "loss": 0.6713, "step": 26571 }, { "epoch": 0.7758021663601062, "grad_norm": 0.7071096469870678, "learning_rate": 1.245579886455799e-06, "loss": 0.607, "step": 26572 }, { "epoch": 0.7758313625879536, "grad_norm": 0.648723939721816, "learning_rate": 1.2454176804541769e-06, "loss": 0.5187, "step": 26573 }, { "epoch": 0.775860558815801, "grad_norm": 0.6654575949321728, "learning_rate": 1.245255474452555e-06, "loss": 0.5498, "step": 26574 }, { "epoch": 0.7758897550436483, "grad_norm": 0.7653361587188708, "learning_rate": 1.2450932684509327e-06, "loss": 0.626, "step": 26575 }, { "epoch": 0.7759189512714957, "grad_norm": 0.8546936266196347, "learning_rate": 1.2449310624493107e-06, "loss": 0.6272, "step": 26576 }, { "epoch": 0.775948147499343, "grad_norm": 0.721480687910312, "learning_rate": 1.2447688564476887e-06, "loss": 0.594, "step": 26577 }, { "epoch": 0.7759773437271904, "grad_norm": 0.7652505845406882, "learning_rate": 1.2446066504460665e-06, "loss": 0.6727, "step": 26578 }, { "epoch": 0.7760065399550378, "grad_norm": 0.7270854647832107, "learning_rate": 1.2444444444444445e-06, "loss": 0.6639, "step": 26579 }, { "epoch": 0.7760357361828851, "grad_norm": 0.7405225395608168, "learning_rate": 1.2442822384428225e-06, "loss": 0.7044, "step": 26580 }, { "epoch": 0.7760649324107325, "grad_norm": 0.8014587209275402, "learning_rate": 1.2441200324412005e-06, "loss": 0.6122, "step": 26581 }, { "epoch": 0.7760941286385798, "grad_norm": 0.735823180342495, "learning_rate": 1.2439578264395783e-06, "loss": 0.6416, "step": 26582 }, { "epoch": 0.7761233248664272, "grad_norm": 0.7047953859460265, "learning_rate": 1.2437956204379563e-06, "loss": 0.6172, "step": 26583 }, { "epoch": 0.7761525210942746, "grad_norm": 0.7449642557044052, "learning_rate": 1.2436334144363341e-06, "loss": 0.704, "step": 26584 }, { "epoch": 0.7761817173221219, "grad_norm": 1.0189432634491082, "learning_rate": 1.2434712084347123e-06, "loss": 0.7235, "step": 26585 }, { "epoch": 0.7762109135499693, "grad_norm": 0.687335907862527, "learning_rate": 1.2433090024330901e-06, "loss": 0.5337, "step": 26586 }, { "epoch": 0.7762401097778167, "grad_norm": 0.7253971265297395, "learning_rate": 1.2431467964314681e-06, "loss": 0.6186, "step": 26587 }, { "epoch": 0.776269306005664, "grad_norm": 0.7383671128530542, "learning_rate": 1.242984590429846e-06, "loss": 0.6806, "step": 26588 }, { "epoch": 0.7762985022335114, "grad_norm": 0.7253294151320483, "learning_rate": 1.242822384428224e-06, "loss": 0.6572, "step": 26589 }, { "epoch": 0.7763276984613587, "grad_norm": 0.7019911591768612, "learning_rate": 1.242660178426602e-06, "loss": 0.6017, "step": 26590 }, { "epoch": 0.7763568946892061, "grad_norm": 0.7158461720961147, "learning_rate": 1.24249797242498e-06, "loss": 0.6509, "step": 26591 }, { "epoch": 0.7763860909170535, "grad_norm": 0.6979445851302466, "learning_rate": 1.2423357664233577e-06, "loss": 0.5754, "step": 26592 }, { "epoch": 0.7764152871449009, "grad_norm": 0.6927580305312943, "learning_rate": 1.2421735604217358e-06, "loss": 0.6336, "step": 26593 }, { "epoch": 0.7764444833727483, "grad_norm": 0.7705187762461572, "learning_rate": 1.2420113544201135e-06, "loss": 0.7101, "step": 26594 }, { "epoch": 0.7764736796005957, "grad_norm": 0.7302278629671356, "learning_rate": 1.2418491484184916e-06, "loss": 0.6786, "step": 26595 }, { "epoch": 0.776502875828443, "grad_norm": 0.7181297203962064, "learning_rate": 1.2416869424168696e-06, "loss": 0.6214, "step": 26596 }, { "epoch": 0.7765320720562904, "grad_norm": 0.7486291225885447, "learning_rate": 1.2415247364152474e-06, "loss": 0.6914, "step": 26597 }, { "epoch": 0.7765612682841377, "grad_norm": 0.8263472260098965, "learning_rate": 1.2413625304136254e-06, "loss": 0.6874, "step": 26598 }, { "epoch": 0.7765904645119851, "grad_norm": 0.8092697313810467, "learning_rate": 1.2412003244120034e-06, "loss": 0.6921, "step": 26599 }, { "epoch": 0.7766196607398325, "grad_norm": 0.7309959399064347, "learning_rate": 1.2410381184103814e-06, "loss": 0.6229, "step": 26600 }, { "epoch": 0.7766488569676798, "grad_norm": 0.7442430186277039, "learning_rate": 1.2408759124087592e-06, "loss": 0.6356, "step": 26601 }, { "epoch": 0.7766780531955272, "grad_norm": 0.7438334337115112, "learning_rate": 1.2407137064071372e-06, "loss": 0.6567, "step": 26602 }, { "epoch": 0.7767072494233745, "grad_norm": 0.7767682543282961, "learning_rate": 1.240551500405515e-06, "loss": 0.6939, "step": 26603 }, { "epoch": 0.7767364456512219, "grad_norm": 0.7453512462275397, "learning_rate": 1.2403892944038932e-06, "loss": 0.6771, "step": 26604 }, { "epoch": 0.7767656418790693, "grad_norm": 0.7128619622788062, "learning_rate": 1.240227088402271e-06, "loss": 0.6443, "step": 26605 }, { "epoch": 0.7767948381069166, "grad_norm": 0.7492055746134374, "learning_rate": 1.240064882400649e-06, "loss": 0.6553, "step": 26606 }, { "epoch": 0.776824034334764, "grad_norm": 0.710220807418397, "learning_rate": 1.2399026763990268e-06, "loss": 0.5999, "step": 26607 }, { "epoch": 0.7768532305626114, "grad_norm": 0.7350423570402589, "learning_rate": 1.2397404703974048e-06, "loss": 0.6196, "step": 26608 }, { "epoch": 0.7768824267904587, "grad_norm": 0.6918393594287332, "learning_rate": 1.2395782643957828e-06, "loss": 0.5897, "step": 26609 }, { "epoch": 0.7769116230183061, "grad_norm": 0.780343854878907, "learning_rate": 1.2394160583941608e-06, "loss": 0.7018, "step": 26610 }, { "epoch": 0.7769408192461534, "grad_norm": 0.6605776325020055, "learning_rate": 1.2392538523925386e-06, "loss": 0.5649, "step": 26611 }, { "epoch": 0.7769700154740008, "grad_norm": 0.7458591110868328, "learning_rate": 1.2390916463909166e-06, "loss": 0.6928, "step": 26612 }, { "epoch": 0.7769992117018482, "grad_norm": 0.713761915825867, "learning_rate": 1.2389294403892944e-06, "loss": 0.6247, "step": 26613 }, { "epoch": 0.7770284079296955, "grad_norm": 0.8163355492282826, "learning_rate": 1.2387672343876724e-06, "loss": 0.6853, "step": 26614 }, { "epoch": 0.7770576041575429, "grad_norm": 0.7280602586847547, "learning_rate": 1.2386050283860504e-06, "loss": 0.6335, "step": 26615 }, { "epoch": 0.7770868003853902, "grad_norm": 0.7311908394203068, "learning_rate": 1.2384428223844282e-06, "loss": 0.656, "step": 26616 }, { "epoch": 0.7771159966132376, "grad_norm": 0.701481733536307, "learning_rate": 1.2382806163828062e-06, "loss": 0.5973, "step": 26617 }, { "epoch": 0.777145192841085, "grad_norm": 0.73467391283389, "learning_rate": 1.2381184103811842e-06, "loss": 0.6401, "step": 26618 }, { "epoch": 0.7771743890689323, "grad_norm": 0.7824421469605999, "learning_rate": 1.2379562043795622e-06, "loss": 0.6949, "step": 26619 }, { "epoch": 0.7772035852967797, "grad_norm": 0.6690192307922576, "learning_rate": 1.23779399837794e-06, "loss": 0.5361, "step": 26620 }, { "epoch": 0.777232781524627, "grad_norm": 0.6781945106972564, "learning_rate": 1.237631792376318e-06, "loss": 0.6132, "step": 26621 }, { "epoch": 0.7772619777524744, "grad_norm": 0.7646028181022864, "learning_rate": 1.2374695863746958e-06, "loss": 0.6662, "step": 26622 }, { "epoch": 0.7772911739803218, "grad_norm": 0.7703480568609489, "learning_rate": 1.237307380373074e-06, "loss": 0.7336, "step": 26623 }, { "epoch": 0.7773203702081691, "grad_norm": 0.755936393051565, "learning_rate": 1.2371451743714518e-06, "loss": 0.7014, "step": 26624 }, { "epoch": 0.7773495664360165, "grad_norm": 0.7429776081703763, "learning_rate": 1.2369829683698299e-06, "loss": 0.7372, "step": 26625 }, { "epoch": 0.7773787626638639, "grad_norm": 0.7047372442604818, "learning_rate": 1.2368207623682076e-06, "loss": 0.5996, "step": 26626 }, { "epoch": 0.7774079588917112, "grad_norm": 0.7333457743185158, "learning_rate": 1.2366585563665857e-06, "loss": 0.5899, "step": 26627 }, { "epoch": 0.7774371551195586, "grad_norm": 0.6910781766629084, "learning_rate": 1.2364963503649637e-06, "loss": 0.5897, "step": 26628 }, { "epoch": 0.7774663513474059, "grad_norm": 0.6950559842272835, "learning_rate": 1.2363341443633417e-06, "loss": 0.5873, "step": 26629 }, { "epoch": 0.7774955475752533, "grad_norm": 0.7014420212398955, "learning_rate": 1.2361719383617195e-06, "loss": 0.5893, "step": 26630 }, { "epoch": 0.7775247438031007, "grad_norm": 0.8037864524200954, "learning_rate": 1.2360097323600975e-06, "loss": 0.6866, "step": 26631 }, { "epoch": 0.777553940030948, "grad_norm": 0.757979396208197, "learning_rate": 1.2358475263584753e-06, "loss": 0.7249, "step": 26632 }, { "epoch": 0.7775831362587954, "grad_norm": 0.9073307881792427, "learning_rate": 1.2356853203568533e-06, "loss": 0.6316, "step": 26633 }, { "epoch": 0.7776123324866427, "grad_norm": 0.8773967853669182, "learning_rate": 1.2355231143552313e-06, "loss": 0.6872, "step": 26634 }, { "epoch": 0.7776415287144901, "grad_norm": 0.7354457264052446, "learning_rate": 1.235360908353609e-06, "loss": 0.645, "step": 26635 }, { "epoch": 0.7776707249423375, "grad_norm": 0.7091643696334152, "learning_rate": 1.235198702351987e-06, "loss": 0.6398, "step": 26636 }, { "epoch": 0.7776999211701848, "grad_norm": 0.720559247425883, "learning_rate": 1.235036496350365e-06, "loss": 0.6335, "step": 26637 }, { "epoch": 0.7777291173980322, "grad_norm": 0.6834144361208563, "learning_rate": 1.234874290348743e-06, "loss": 0.5835, "step": 26638 }, { "epoch": 0.7777583136258795, "grad_norm": 0.7369415093603304, "learning_rate": 1.2347120843471209e-06, "loss": 0.6406, "step": 26639 }, { "epoch": 0.7777875098537269, "grad_norm": 0.7118496522708886, "learning_rate": 1.234549878345499e-06, "loss": 0.6255, "step": 26640 }, { "epoch": 0.7778167060815743, "grad_norm": 0.7202386916460619, "learning_rate": 1.2343876723438767e-06, "loss": 0.6374, "step": 26641 }, { "epoch": 0.7778459023094216, "grad_norm": 0.7348873104747341, "learning_rate": 1.234225466342255e-06, "loss": 0.6846, "step": 26642 }, { "epoch": 0.777875098537269, "grad_norm": 0.7206357554099797, "learning_rate": 1.2340632603406327e-06, "loss": 0.6286, "step": 26643 }, { "epoch": 0.7779042947651164, "grad_norm": 0.718123236180938, "learning_rate": 1.2339010543390107e-06, "loss": 0.6349, "step": 26644 }, { "epoch": 0.7779334909929637, "grad_norm": 0.7359636187814292, "learning_rate": 1.2337388483373885e-06, "loss": 0.6202, "step": 26645 }, { "epoch": 0.7779626872208111, "grad_norm": 0.7537584309964018, "learning_rate": 1.2335766423357665e-06, "loss": 0.7097, "step": 26646 }, { "epoch": 0.7779918834486584, "grad_norm": 0.7382849333342576, "learning_rate": 1.2334144363341445e-06, "loss": 0.6707, "step": 26647 }, { "epoch": 0.7780210796765058, "grad_norm": 0.7194827866787828, "learning_rate": 1.2332522303325223e-06, "loss": 0.6063, "step": 26648 }, { "epoch": 0.7780502759043532, "grad_norm": 0.7428944475939596, "learning_rate": 1.2330900243309003e-06, "loss": 0.7041, "step": 26649 }, { "epoch": 0.7780794721322005, "grad_norm": 0.6985724113192718, "learning_rate": 1.2329278183292783e-06, "loss": 0.5819, "step": 26650 }, { "epoch": 0.7781086683600479, "grad_norm": 0.777446161642929, "learning_rate": 1.2327656123276563e-06, "loss": 0.7212, "step": 26651 }, { "epoch": 0.7781378645878952, "grad_norm": 0.72995457998566, "learning_rate": 1.2326034063260341e-06, "loss": 0.6334, "step": 26652 }, { "epoch": 0.7781670608157426, "grad_norm": 0.7595111637126438, "learning_rate": 1.2324412003244121e-06, "loss": 0.6784, "step": 26653 }, { "epoch": 0.77819625704359, "grad_norm": 0.7074302749824092, "learning_rate": 1.23227899432279e-06, "loss": 0.6003, "step": 26654 }, { "epoch": 0.7782254532714373, "grad_norm": 0.7647459755973073, "learning_rate": 1.232116788321168e-06, "loss": 0.6772, "step": 26655 }, { "epoch": 0.7782546494992847, "grad_norm": 0.7270947714097987, "learning_rate": 1.231954582319546e-06, "loss": 0.6427, "step": 26656 }, { "epoch": 0.778283845727132, "grad_norm": 0.7684482235134857, "learning_rate": 1.231792376317924e-06, "loss": 0.6589, "step": 26657 }, { "epoch": 0.7783130419549794, "grad_norm": 0.7130410678315705, "learning_rate": 1.2316301703163017e-06, "loss": 0.6101, "step": 26658 }, { "epoch": 0.7783422381828268, "grad_norm": 0.8241635236900304, "learning_rate": 1.2314679643146798e-06, "loss": 0.6959, "step": 26659 }, { "epoch": 0.7783714344106741, "grad_norm": 0.6801952210936711, "learning_rate": 1.2313057583130576e-06, "loss": 0.5911, "step": 26660 }, { "epoch": 0.7784006306385215, "grad_norm": 0.68625084235536, "learning_rate": 1.2311435523114358e-06, "loss": 0.5984, "step": 26661 }, { "epoch": 0.7784298268663689, "grad_norm": 0.6892616396530912, "learning_rate": 1.2309813463098136e-06, "loss": 0.5876, "step": 26662 }, { "epoch": 0.7784590230942162, "grad_norm": 0.7796840083053779, "learning_rate": 1.2308191403081916e-06, "loss": 0.7138, "step": 26663 }, { "epoch": 0.7784882193220636, "grad_norm": 0.6866575499418492, "learning_rate": 1.2306569343065694e-06, "loss": 0.6053, "step": 26664 }, { "epoch": 0.7785174155499109, "grad_norm": 0.718003211806052, "learning_rate": 1.2304947283049474e-06, "loss": 0.6342, "step": 26665 }, { "epoch": 0.7785466117777583, "grad_norm": 0.7075525189897713, "learning_rate": 1.2303325223033254e-06, "loss": 0.6062, "step": 26666 }, { "epoch": 0.7785758080056057, "grad_norm": 0.7177156038527893, "learning_rate": 1.2301703163017032e-06, "loss": 0.6725, "step": 26667 }, { "epoch": 0.778605004233453, "grad_norm": 0.6892998245204177, "learning_rate": 1.2300081103000812e-06, "loss": 0.5707, "step": 26668 }, { "epoch": 0.7786342004613004, "grad_norm": 0.7199564523474581, "learning_rate": 1.2298459042984592e-06, "loss": 0.6483, "step": 26669 }, { "epoch": 0.7786633966891477, "grad_norm": 0.7576873301358488, "learning_rate": 1.2296836982968372e-06, "loss": 0.7101, "step": 26670 }, { "epoch": 0.7786925929169951, "grad_norm": 0.7469677253764604, "learning_rate": 1.229521492295215e-06, "loss": 0.7073, "step": 26671 }, { "epoch": 0.7787217891448425, "grad_norm": 0.6911183123710607, "learning_rate": 1.229359286293593e-06, "loss": 0.5846, "step": 26672 }, { "epoch": 0.7787509853726898, "grad_norm": 0.6802890247127843, "learning_rate": 1.2291970802919708e-06, "loss": 0.5885, "step": 26673 }, { "epoch": 0.7787801816005372, "grad_norm": 0.7073750293301859, "learning_rate": 1.2290348742903488e-06, "loss": 0.6628, "step": 26674 }, { "epoch": 0.7788093778283846, "grad_norm": 0.7121723983952828, "learning_rate": 1.2288726682887268e-06, "loss": 0.624, "step": 26675 }, { "epoch": 0.7788385740562319, "grad_norm": 0.8664458997840148, "learning_rate": 1.2287104622871048e-06, "loss": 0.6966, "step": 26676 }, { "epoch": 0.7788677702840793, "grad_norm": 0.7542624625437074, "learning_rate": 1.2285482562854826e-06, "loss": 0.6891, "step": 26677 }, { "epoch": 0.7788969665119266, "grad_norm": 0.7446803915015299, "learning_rate": 1.2283860502838606e-06, "loss": 0.6739, "step": 26678 }, { "epoch": 0.778926162739774, "grad_norm": 0.7322614199833886, "learning_rate": 1.2282238442822384e-06, "loss": 0.5794, "step": 26679 }, { "epoch": 0.7789553589676214, "grad_norm": 0.6971708030606147, "learning_rate": 1.2280616382806166e-06, "loss": 0.6084, "step": 26680 }, { "epoch": 0.7789845551954687, "grad_norm": 0.7455935210344523, "learning_rate": 1.2278994322789944e-06, "loss": 0.6081, "step": 26681 }, { "epoch": 0.7790137514233161, "grad_norm": 0.6964838529313595, "learning_rate": 1.2277372262773724e-06, "loss": 0.593, "step": 26682 }, { "epoch": 0.7790429476511634, "grad_norm": 0.708830482695283, "learning_rate": 1.2275750202757502e-06, "loss": 0.6136, "step": 26683 }, { "epoch": 0.7790721438790108, "grad_norm": 0.743893581085734, "learning_rate": 1.2274128142741282e-06, "loss": 0.6649, "step": 26684 }, { "epoch": 0.7791013401068582, "grad_norm": 0.7748983053959286, "learning_rate": 1.2272506082725062e-06, "loss": 0.7263, "step": 26685 }, { "epoch": 0.7791305363347055, "grad_norm": 0.7457291951614423, "learning_rate": 1.227088402270884e-06, "loss": 0.7117, "step": 26686 }, { "epoch": 0.7791597325625529, "grad_norm": 0.7549925085329964, "learning_rate": 1.226926196269262e-06, "loss": 0.6525, "step": 26687 }, { "epoch": 0.7791889287904002, "grad_norm": 0.7339109041840896, "learning_rate": 1.22676399026764e-06, "loss": 0.6776, "step": 26688 }, { "epoch": 0.7792181250182476, "grad_norm": 0.6606614832986349, "learning_rate": 1.226601784266018e-06, "loss": 0.5379, "step": 26689 }, { "epoch": 0.779247321246095, "grad_norm": 0.7811865992004224, "learning_rate": 1.2264395782643958e-06, "loss": 0.7467, "step": 26690 }, { "epoch": 0.7792765174739423, "grad_norm": 0.6303694590143408, "learning_rate": 1.2262773722627739e-06, "loss": 0.5081, "step": 26691 }, { "epoch": 0.7793057137017897, "grad_norm": 0.7155196190886772, "learning_rate": 1.2261151662611517e-06, "loss": 0.6359, "step": 26692 }, { "epoch": 0.779334909929637, "grad_norm": 0.7358114374124562, "learning_rate": 1.2259529602595297e-06, "loss": 0.653, "step": 26693 }, { "epoch": 0.7793641061574844, "grad_norm": 0.6963396174016379, "learning_rate": 1.2257907542579077e-06, "loss": 0.5968, "step": 26694 }, { "epoch": 0.7793933023853318, "grad_norm": 0.7560674288354385, "learning_rate": 1.2256285482562857e-06, "loss": 0.7179, "step": 26695 }, { "epoch": 0.7794224986131791, "grad_norm": 0.7257761676403418, "learning_rate": 1.2254663422546635e-06, "loss": 0.6122, "step": 26696 }, { "epoch": 0.7794516948410265, "grad_norm": 0.782627521190505, "learning_rate": 1.2253041362530415e-06, "loss": 0.7429, "step": 26697 }, { "epoch": 0.7794808910688739, "grad_norm": 0.7054067925195183, "learning_rate": 1.2251419302514193e-06, "loss": 0.6539, "step": 26698 }, { "epoch": 0.7795100872967212, "grad_norm": 0.7248194412186806, "learning_rate": 1.2249797242497975e-06, "loss": 0.6781, "step": 26699 }, { "epoch": 0.7795392835245686, "grad_norm": 0.7516122292828473, "learning_rate": 1.2248175182481753e-06, "loss": 0.6527, "step": 26700 }, { "epoch": 0.7795684797524159, "grad_norm": 0.7379837729516914, "learning_rate": 1.2246553122465533e-06, "loss": 0.6808, "step": 26701 }, { "epoch": 0.7795976759802633, "grad_norm": 0.7801473402080566, "learning_rate": 1.224493106244931e-06, "loss": 0.7145, "step": 26702 }, { "epoch": 0.7796268722081107, "grad_norm": 0.7627170178196773, "learning_rate": 1.224330900243309e-06, "loss": 0.6403, "step": 26703 }, { "epoch": 0.779656068435958, "grad_norm": 0.6952250418620174, "learning_rate": 1.224168694241687e-06, "loss": 0.6216, "step": 26704 }, { "epoch": 0.7796852646638054, "grad_norm": 0.7007044753598212, "learning_rate": 1.2240064882400649e-06, "loss": 0.5869, "step": 26705 }, { "epoch": 0.7797144608916527, "grad_norm": 0.7582070728133212, "learning_rate": 1.223844282238443e-06, "loss": 0.5961, "step": 26706 }, { "epoch": 0.7797436571195001, "grad_norm": 0.658915234021377, "learning_rate": 1.223682076236821e-06, "loss": 0.5678, "step": 26707 }, { "epoch": 0.7797728533473475, "grad_norm": 0.7128530688287827, "learning_rate": 1.223519870235199e-06, "loss": 0.627, "step": 26708 }, { "epoch": 0.7798020495751948, "grad_norm": 0.6527299470799213, "learning_rate": 1.2233576642335767e-06, "loss": 0.5789, "step": 26709 }, { "epoch": 0.7798312458030422, "grad_norm": 0.7958528179571105, "learning_rate": 1.2231954582319547e-06, "loss": 0.7252, "step": 26710 }, { "epoch": 0.7798604420308896, "grad_norm": 0.7004648643698872, "learning_rate": 1.2230332522303325e-06, "loss": 0.5939, "step": 26711 }, { "epoch": 0.7798896382587369, "grad_norm": 0.687359605864502, "learning_rate": 1.2228710462287105e-06, "loss": 0.5865, "step": 26712 }, { "epoch": 0.7799188344865843, "grad_norm": 0.7274170651421223, "learning_rate": 1.2227088402270885e-06, "loss": 0.6351, "step": 26713 }, { "epoch": 0.7799480307144318, "grad_norm": 0.7390446514118294, "learning_rate": 1.2225466342254665e-06, "loss": 0.6868, "step": 26714 }, { "epoch": 0.7799772269422791, "grad_norm": 0.6874612837504679, "learning_rate": 1.2223844282238443e-06, "loss": 0.5975, "step": 26715 }, { "epoch": 0.7800064231701265, "grad_norm": 0.7657917168622718, "learning_rate": 1.2222222222222223e-06, "loss": 0.7657, "step": 26716 }, { "epoch": 0.7800356193979738, "grad_norm": 0.7490476877661862, "learning_rate": 1.2220600162206003e-06, "loss": 0.6739, "step": 26717 }, { "epoch": 0.7800648156258212, "grad_norm": 0.8216682605057526, "learning_rate": 1.2218978102189783e-06, "loss": 0.7621, "step": 26718 }, { "epoch": 0.7800940118536686, "grad_norm": 0.722165762078629, "learning_rate": 1.2217356042173561e-06, "loss": 0.6747, "step": 26719 }, { "epoch": 0.7801232080815159, "grad_norm": 0.719702034517039, "learning_rate": 1.2215733982157341e-06, "loss": 0.631, "step": 26720 }, { "epoch": 0.7801524043093633, "grad_norm": 0.7872247387983196, "learning_rate": 1.221411192214112e-06, "loss": 0.6301, "step": 26721 }, { "epoch": 0.7801816005372106, "grad_norm": 0.731656648363476, "learning_rate": 1.22124898621249e-06, "loss": 0.5497, "step": 26722 }, { "epoch": 0.780210796765058, "grad_norm": 0.6376445476268006, "learning_rate": 1.221086780210868e-06, "loss": 0.5126, "step": 26723 }, { "epoch": 0.7802399929929054, "grad_norm": 0.7237615393360516, "learning_rate": 1.2209245742092458e-06, "loss": 0.6068, "step": 26724 }, { "epoch": 0.7802691892207527, "grad_norm": 0.7265204555943183, "learning_rate": 1.2207623682076238e-06, "loss": 0.6265, "step": 26725 }, { "epoch": 0.7802983854486001, "grad_norm": 0.663449667991487, "learning_rate": 1.2206001622060018e-06, "loss": 0.5873, "step": 26726 }, { "epoch": 0.7803275816764474, "grad_norm": 0.7359132227238246, "learning_rate": 1.2204379562043798e-06, "loss": 0.6815, "step": 26727 }, { "epoch": 0.7803567779042948, "grad_norm": 0.7053428292524924, "learning_rate": 1.2202757502027576e-06, "loss": 0.6368, "step": 26728 }, { "epoch": 0.7803859741321422, "grad_norm": 0.6976837166515676, "learning_rate": 1.2201135442011356e-06, "loss": 0.5865, "step": 26729 }, { "epoch": 0.7804151703599895, "grad_norm": 0.6858692897496678, "learning_rate": 1.2199513381995134e-06, "loss": 0.5575, "step": 26730 }, { "epoch": 0.7804443665878369, "grad_norm": 0.6938447603450799, "learning_rate": 1.2197891321978914e-06, "loss": 0.5974, "step": 26731 }, { "epoch": 0.7804735628156843, "grad_norm": 0.7468275806812327, "learning_rate": 1.2196269261962694e-06, "loss": 0.6686, "step": 26732 }, { "epoch": 0.7805027590435316, "grad_norm": 0.7280741583907904, "learning_rate": 1.2194647201946474e-06, "loss": 0.6414, "step": 26733 }, { "epoch": 0.780531955271379, "grad_norm": 0.6968385093612672, "learning_rate": 1.2193025141930252e-06, "loss": 0.6272, "step": 26734 }, { "epoch": 0.7805611514992263, "grad_norm": 0.7366593179263098, "learning_rate": 1.2191403081914032e-06, "loss": 0.6201, "step": 26735 }, { "epoch": 0.7805903477270737, "grad_norm": 0.742419681737314, "learning_rate": 1.2189781021897812e-06, "loss": 0.6886, "step": 26736 }, { "epoch": 0.7806195439549211, "grad_norm": 0.721927813845167, "learning_rate": 1.218815896188159e-06, "loss": 0.649, "step": 26737 }, { "epoch": 0.7806487401827684, "grad_norm": 0.7475115234167922, "learning_rate": 1.218653690186537e-06, "loss": 0.6802, "step": 26738 }, { "epoch": 0.7806779364106158, "grad_norm": 0.789250808932344, "learning_rate": 1.218491484184915e-06, "loss": 0.6, "step": 26739 }, { "epoch": 0.7807071326384631, "grad_norm": 0.7337998997814668, "learning_rate": 1.2183292781832928e-06, "loss": 0.6697, "step": 26740 }, { "epoch": 0.7807363288663105, "grad_norm": 0.7369544280261144, "learning_rate": 1.2181670721816708e-06, "loss": 0.6257, "step": 26741 }, { "epoch": 0.7807655250941579, "grad_norm": 0.6966785907203786, "learning_rate": 1.2180048661800488e-06, "loss": 0.6317, "step": 26742 }, { "epoch": 0.7807947213220052, "grad_norm": 0.6877009545784991, "learning_rate": 1.2178426601784266e-06, "loss": 0.5822, "step": 26743 }, { "epoch": 0.7808239175498526, "grad_norm": 0.8010474606592863, "learning_rate": 1.2176804541768046e-06, "loss": 0.7178, "step": 26744 }, { "epoch": 0.7808531137777, "grad_norm": 0.7183264284766849, "learning_rate": 1.2175182481751826e-06, "loss": 0.6535, "step": 26745 }, { "epoch": 0.7808823100055473, "grad_norm": 0.7235153745547529, "learning_rate": 1.2173560421735606e-06, "loss": 0.6331, "step": 26746 }, { "epoch": 0.7809115062333947, "grad_norm": 0.6808468627418705, "learning_rate": 1.2171938361719384e-06, "loss": 0.5664, "step": 26747 }, { "epoch": 0.780940702461242, "grad_norm": 0.7461620808901143, "learning_rate": 1.2170316301703164e-06, "loss": 0.6752, "step": 26748 }, { "epoch": 0.7809698986890894, "grad_norm": 0.7166272529802151, "learning_rate": 1.2168694241686942e-06, "loss": 0.6352, "step": 26749 }, { "epoch": 0.7809990949169368, "grad_norm": 0.6557311325129954, "learning_rate": 1.2167072181670722e-06, "loss": 0.5456, "step": 26750 }, { "epoch": 0.7810282911447841, "grad_norm": 0.8388218994654767, "learning_rate": 1.2165450121654502e-06, "loss": 0.8157, "step": 26751 }, { "epoch": 0.7810574873726315, "grad_norm": 0.6977104981336169, "learning_rate": 1.2163828061638282e-06, "loss": 0.5963, "step": 26752 }, { "epoch": 0.7810866836004788, "grad_norm": 0.6933040621640865, "learning_rate": 1.216220600162206e-06, "loss": 0.5822, "step": 26753 }, { "epoch": 0.7811158798283262, "grad_norm": 0.6858309150728424, "learning_rate": 1.216058394160584e-06, "loss": 0.5633, "step": 26754 }, { "epoch": 0.7811450760561736, "grad_norm": 0.7324311460630565, "learning_rate": 1.215896188158962e-06, "loss": 0.6721, "step": 26755 }, { "epoch": 0.7811742722840209, "grad_norm": 0.7501056026644154, "learning_rate": 1.2157339821573399e-06, "loss": 0.6613, "step": 26756 }, { "epoch": 0.7812034685118683, "grad_norm": 0.8129445058909093, "learning_rate": 1.2155717761557179e-06, "loss": 0.6674, "step": 26757 }, { "epoch": 0.7812326647397156, "grad_norm": 0.7296354707004707, "learning_rate": 1.2154095701540959e-06, "loss": 0.6306, "step": 26758 }, { "epoch": 0.781261860967563, "grad_norm": 0.6407700050762044, "learning_rate": 1.2152473641524737e-06, "loss": 0.4429, "step": 26759 }, { "epoch": 0.7812910571954104, "grad_norm": 0.6838134055890827, "learning_rate": 1.2150851581508517e-06, "loss": 0.6051, "step": 26760 }, { "epoch": 0.7813202534232577, "grad_norm": 0.664022225488117, "learning_rate": 1.2149229521492297e-06, "loss": 0.545, "step": 26761 }, { "epoch": 0.7813494496511051, "grad_norm": 0.746172603767995, "learning_rate": 1.2147607461476075e-06, "loss": 0.6995, "step": 26762 }, { "epoch": 0.7813786458789524, "grad_norm": 0.705599655516289, "learning_rate": 1.2145985401459855e-06, "loss": 0.5577, "step": 26763 }, { "epoch": 0.7814078421067998, "grad_norm": 0.7422149097086912, "learning_rate": 1.2144363341443635e-06, "loss": 0.6593, "step": 26764 }, { "epoch": 0.7814370383346472, "grad_norm": 0.6887384518059905, "learning_rate": 1.2142741281427415e-06, "loss": 0.5944, "step": 26765 }, { "epoch": 0.7814662345624945, "grad_norm": 0.66844876246532, "learning_rate": 1.2141119221411193e-06, "loss": 0.5376, "step": 26766 }, { "epoch": 0.7814954307903419, "grad_norm": 0.7229370668135459, "learning_rate": 1.2139497161394973e-06, "loss": 0.6512, "step": 26767 }, { "epoch": 0.7815246270181893, "grad_norm": 0.6814068370153095, "learning_rate": 1.213787510137875e-06, "loss": 0.5577, "step": 26768 }, { "epoch": 0.7815538232460366, "grad_norm": 0.6653821604181619, "learning_rate": 1.2136253041362533e-06, "loss": 0.551, "step": 26769 }, { "epoch": 0.781583019473884, "grad_norm": 0.7472988861058188, "learning_rate": 1.213463098134631e-06, "loss": 0.6007, "step": 26770 }, { "epoch": 0.7816122157017313, "grad_norm": 0.6732154563455676, "learning_rate": 1.213300892133009e-06, "loss": 0.5948, "step": 26771 }, { "epoch": 0.7816414119295787, "grad_norm": 0.6752207214880808, "learning_rate": 1.213138686131387e-06, "loss": 0.5737, "step": 26772 }, { "epoch": 0.7816706081574261, "grad_norm": 0.7311317050421363, "learning_rate": 1.212976480129765e-06, "loss": 0.6788, "step": 26773 }, { "epoch": 0.7816998043852734, "grad_norm": 0.7178696984121841, "learning_rate": 1.212814274128143e-06, "loss": 0.6378, "step": 26774 }, { "epoch": 0.7817290006131208, "grad_norm": 0.7275517432735594, "learning_rate": 1.2126520681265207e-06, "loss": 0.6461, "step": 26775 }, { "epoch": 0.7817581968409681, "grad_norm": 0.6711596409946672, "learning_rate": 1.2124898621248987e-06, "loss": 0.5748, "step": 26776 }, { "epoch": 0.7817873930688155, "grad_norm": 0.7310681509318228, "learning_rate": 1.2123276561232767e-06, "loss": 0.6075, "step": 26777 }, { "epoch": 0.7818165892966629, "grad_norm": 0.7205508627506324, "learning_rate": 1.2121654501216545e-06, "loss": 0.6293, "step": 26778 }, { "epoch": 0.7818457855245102, "grad_norm": 0.7551888106719608, "learning_rate": 1.2120032441200325e-06, "loss": 0.6826, "step": 26779 }, { "epoch": 0.7818749817523576, "grad_norm": 0.7439258440745093, "learning_rate": 1.2118410381184105e-06, "loss": 0.6727, "step": 26780 }, { "epoch": 0.781904177980205, "grad_norm": 0.684493696735137, "learning_rate": 1.2116788321167883e-06, "loss": 0.5928, "step": 26781 }, { "epoch": 0.7819333742080523, "grad_norm": 0.7406151791483477, "learning_rate": 1.2115166261151663e-06, "loss": 0.6644, "step": 26782 }, { "epoch": 0.7819625704358997, "grad_norm": 0.690531932136756, "learning_rate": 1.2113544201135441e-06, "loss": 0.553, "step": 26783 }, { "epoch": 0.781991766663747, "grad_norm": 0.7356270879212323, "learning_rate": 1.2111922141119223e-06, "loss": 0.6551, "step": 26784 }, { "epoch": 0.7820209628915944, "grad_norm": 0.6699865722727255, "learning_rate": 1.2110300081103001e-06, "loss": 0.5704, "step": 26785 }, { "epoch": 0.7820501591194418, "grad_norm": 0.690595982654501, "learning_rate": 1.2108678021086781e-06, "loss": 0.5625, "step": 26786 }, { "epoch": 0.7820793553472891, "grad_norm": 0.7390688581628915, "learning_rate": 1.210705596107056e-06, "loss": 0.6952, "step": 26787 }, { "epoch": 0.7821085515751365, "grad_norm": 0.7474495938614464, "learning_rate": 1.2105433901054342e-06, "loss": 0.6876, "step": 26788 }, { "epoch": 0.7821377478029838, "grad_norm": 0.7240042063378461, "learning_rate": 1.210381184103812e-06, "loss": 0.6919, "step": 26789 }, { "epoch": 0.7821669440308312, "grad_norm": 0.7068048079118269, "learning_rate": 1.21021897810219e-06, "loss": 0.6085, "step": 26790 }, { "epoch": 0.7821961402586786, "grad_norm": 0.676309790656471, "learning_rate": 1.2100567721005678e-06, "loss": 0.5424, "step": 26791 }, { "epoch": 0.7822253364865259, "grad_norm": 0.6964731494890775, "learning_rate": 1.2098945660989458e-06, "loss": 0.6415, "step": 26792 }, { "epoch": 0.7822545327143733, "grad_norm": 0.7272768206243165, "learning_rate": 1.2097323600973238e-06, "loss": 0.6615, "step": 26793 }, { "epoch": 0.7822837289422206, "grad_norm": 0.6912357529275784, "learning_rate": 1.2095701540957016e-06, "loss": 0.5977, "step": 26794 }, { "epoch": 0.782312925170068, "grad_norm": 0.8232209138292597, "learning_rate": 1.2094079480940796e-06, "loss": 0.7436, "step": 26795 }, { "epoch": 0.7823421213979154, "grad_norm": 0.7196281369469881, "learning_rate": 1.2092457420924576e-06, "loss": 0.6333, "step": 26796 }, { "epoch": 0.7823713176257627, "grad_norm": 0.7377493329275376, "learning_rate": 1.2090835360908354e-06, "loss": 0.6105, "step": 26797 }, { "epoch": 0.7824005138536101, "grad_norm": 0.7231628331002149, "learning_rate": 1.2089213300892134e-06, "loss": 0.6265, "step": 26798 }, { "epoch": 0.7824297100814575, "grad_norm": 0.653106054605013, "learning_rate": 1.2087591240875914e-06, "loss": 0.5451, "step": 26799 }, { "epoch": 0.7824589063093048, "grad_norm": 0.7218771331196732, "learning_rate": 1.2085969180859692e-06, "loss": 0.6112, "step": 26800 }, { "epoch": 0.7824881025371522, "grad_norm": 0.7625487010320047, "learning_rate": 1.2084347120843472e-06, "loss": 0.7116, "step": 26801 }, { "epoch": 0.7825172987649995, "grad_norm": 0.7011665667484372, "learning_rate": 1.2082725060827252e-06, "loss": 0.6189, "step": 26802 }, { "epoch": 0.7825464949928469, "grad_norm": 0.7655522380017833, "learning_rate": 1.2081103000811032e-06, "loss": 0.607, "step": 26803 }, { "epoch": 0.7825756912206943, "grad_norm": 0.7274368663052063, "learning_rate": 1.207948094079481e-06, "loss": 0.6038, "step": 26804 }, { "epoch": 0.7826048874485416, "grad_norm": 0.7208291272248999, "learning_rate": 1.207785888077859e-06, "loss": 0.65, "step": 26805 }, { "epoch": 0.782634083676389, "grad_norm": 0.8153294914644491, "learning_rate": 1.2076236820762368e-06, "loss": 0.6812, "step": 26806 }, { "epoch": 0.7826632799042363, "grad_norm": 0.6718705825207572, "learning_rate": 1.207461476074615e-06, "loss": 0.546, "step": 26807 }, { "epoch": 0.7826924761320837, "grad_norm": 0.6959156646860656, "learning_rate": 1.2072992700729928e-06, "loss": 0.5554, "step": 26808 }, { "epoch": 0.7827216723599311, "grad_norm": 0.6957365431326736, "learning_rate": 1.2071370640713708e-06, "loss": 0.6099, "step": 26809 }, { "epoch": 0.7827508685877784, "grad_norm": 0.7421949733542748, "learning_rate": 1.2069748580697486e-06, "loss": 0.6665, "step": 26810 }, { "epoch": 0.7827800648156258, "grad_norm": 0.7426673715438145, "learning_rate": 1.2068126520681266e-06, "loss": 0.6651, "step": 26811 }, { "epoch": 0.7828092610434731, "grad_norm": 0.7624463397578248, "learning_rate": 1.2066504460665046e-06, "loss": 0.6873, "step": 26812 }, { "epoch": 0.7828384572713205, "grad_norm": 0.7500733735150149, "learning_rate": 1.2064882400648824e-06, "loss": 0.6624, "step": 26813 }, { "epoch": 0.7828676534991679, "grad_norm": 0.7209902994091242, "learning_rate": 1.2063260340632604e-06, "loss": 0.6684, "step": 26814 }, { "epoch": 0.7828968497270152, "grad_norm": 0.7411843826289138, "learning_rate": 1.2061638280616384e-06, "loss": 0.6546, "step": 26815 }, { "epoch": 0.7829260459548626, "grad_norm": 0.8685738314585734, "learning_rate": 1.2060016220600162e-06, "loss": 0.7442, "step": 26816 }, { "epoch": 0.78295524218271, "grad_norm": 0.8505329383226088, "learning_rate": 1.2058394160583942e-06, "loss": 0.7249, "step": 26817 }, { "epoch": 0.7829844384105573, "grad_norm": 0.7856932954775385, "learning_rate": 1.2056772100567722e-06, "loss": 0.64, "step": 26818 }, { "epoch": 0.7830136346384047, "grad_norm": 0.7449799079363428, "learning_rate": 1.20551500405515e-06, "loss": 0.6152, "step": 26819 }, { "epoch": 0.783042830866252, "grad_norm": 0.8019099563825465, "learning_rate": 1.205352798053528e-06, "loss": 0.7113, "step": 26820 }, { "epoch": 0.7830720270940994, "grad_norm": 0.6771638447544189, "learning_rate": 1.205190592051906e-06, "loss": 0.5882, "step": 26821 }, { "epoch": 0.7831012233219468, "grad_norm": 0.7012245676068067, "learning_rate": 1.205028386050284e-06, "loss": 0.5917, "step": 26822 }, { "epoch": 0.7831304195497941, "grad_norm": 0.7171443072977639, "learning_rate": 1.2048661800486619e-06, "loss": 0.6081, "step": 26823 }, { "epoch": 0.7831596157776415, "grad_norm": 0.6604073675022272, "learning_rate": 1.2047039740470399e-06, "loss": 0.5896, "step": 26824 }, { "epoch": 0.7831888120054888, "grad_norm": 0.767587080322006, "learning_rate": 1.2045417680454177e-06, "loss": 0.6769, "step": 26825 }, { "epoch": 0.7832180082333362, "grad_norm": 0.7025714653850442, "learning_rate": 1.2043795620437959e-06, "loss": 0.6212, "step": 26826 }, { "epoch": 0.7832472044611836, "grad_norm": 0.6806570335399266, "learning_rate": 1.2042173560421737e-06, "loss": 0.5813, "step": 26827 }, { "epoch": 0.7832764006890309, "grad_norm": 0.6995469441872543, "learning_rate": 1.2040551500405517e-06, "loss": 0.5801, "step": 26828 }, { "epoch": 0.7833055969168783, "grad_norm": 0.8249386618541228, "learning_rate": 1.2038929440389295e-06, "loss": 0.7642, "step": 26829 }, { "epoch": 0.7833347931447256, "grad_norm": 0.7177055525216579, "learning_rate": 1.2037307380373075e-06, "loss": 0.6125, "step": 26830 }, { "epoch": 0.783363989372573, "grad_norm": 0.783438667073271, "learning_rate": 1.2035685320356855e-06, "loss": 0.6565, "step": 26831 }, { "epoch": 0.7833931856004204, "grad_norm": 0.7606328609702816, "learning_rate": 1.2034063260340633e-06, "loss": 0.6146, "step": 26832 }, { "epoch": 0.7834223818282677, "grad_norm": 0.7664048820845226, "learning_rate": 1.2032441200324413e-06, "loss": 0.7307, "step": 26833 }, { "epoch": 0.7834515780561152, "grad_norm": 0.7104891653061515, "learning_rate": 1.2030819140308193e-06, "loss": 0.6042, "step": 26834 }, { "epoch": 0.7834807742839626, "grad_norm": 0.753840459956872, "learning_rate": 1.2029197080291973e-06, "loss": 0.6835, "step": 26835 }, { "epoch": 0.7835099705118099, "grad_norm": 0.6965523058861496, "learning_rate": 1.202757502027575e-06, "loss": 0.5915, "step": 26836 }, { "epoch": 0.7835391667396573, "grad_norm": 0.7422987312894228, "learning_rate": 1.2025952960259531e-06, "loss": 0.6827, "step": 26837 }, { "epoch": 0.7835683629675047, "grad_norm": 0.7142992148596237, "learning_rate": 1.202433090024331e-06, "loss": 0.6098, "step": 26838 }, { "epoch": 0.783597559195352, "grad_norm": 0.7283270142280271, "learning_rate": 1.202270884022709e-06, "loss": 0.6204, "step": 26839 }, { "epoch": 0.7836267554231994, "grad_norm": 0.7603820907666315, "learning_rate": 1.202108678021087e-06, "loss": 0.7035, "step": 26840 }, { "epoch": 0.7836559516510467, "grad_norm": 0.803370130011637, "learning_rate": 1.201946472019465e-06, "loss": 0.7127, "step": 26841 }, { "epoch": 0.7836851478788941, "grad_norm": 0.7244009661623607, "learning_rate": 1.2017842660178427e-06, "loss": 0.682, "step": 26842 }, { "epoch": 0.7837143441067415, "grad_norm": 0.7373031373261516, "learning_rate": 1.2016220600162207e-06, "loss": 0.6683, "step": 26843 }, { "epoch": 0.7837435403345888, "grad_norm": 0.6768650765027089, "learning_rate": 1.2014598540145985e-06, "loss": 0.5723, "step": 26844 }, { "epoch": 0.7837727365624362, "grad_norm": 0.7398283212089134, "learning_rate": 1.2012976480129765e-06, "loss": 0.6453, "step": 26845 }, { "epoch": 0.7838019327902835, "grad_norm": 0.7495863546346875, "learning_rate": 1.2011354420113545e-06, "loss": 0.6885, "step": 26846 }, { "epoch": 0.7838311290181309, "grad_norm": 0.7500201819979826, "learning_rate": 1.2009732360097325e-06, "loss": 0.648, "step": 26847 }, { "epoch": 0.7838603252459783, "grad_norm": 0.7037981160114833, "learning_rate": 1.2008110300081103e-06, "loss": 0.6398, "step": 26848 }, { "epoch": 0.7838895214738256, "grad_norm": 0.7764305041518873, "learning_rate": 1.2006488240064883e-06, "loss": 0.7212, "step": 26849 }, { "epoch": 0.783918717701673, "grad_norm": 0.7181652019655768, "learning_rate": 1.2004866180048663e-06, "loss": 0.6649, "step": 26850 }, { "epoch": 0.7839479139295203, "grad_norm": 0.674714029684902, "learning_rate": 1.2003244120032441e-06, "loss": 0.59, "step": 26851 }, { "epoch": 0.7839771101573677, "grad_norm": 0.7207433529377248, "learning_rate": 1.2001622060016222e-06, "loss": 0.6, "step": 26852 }, { "epoch": 0.7840063063852151, "grad_norm": 0.7164644597349059, "learning_rate": 1.2000000000000002e-06, "loss": 0.5993, "step": 26853 }, { "epoch": 0.7840355026130624, "grad_norm": 0.7642514438461574, "learning_rate": 1.1998377939983782e-06, "loss": 0.6631, "step": 26854 }, { "epoch": 0.7840646988409098, "grad_norm": 0.7269296688510214, "learning_rate": 1.199675587996756e-06, "loss": 0.6016, "step": 26855 }, { "epoch": 0.7840938950687572, "grad_norm": 0.683211302017575, "learning_rate": 1.199513381995134e-06, "loss": 0.6, "step": 26856 }, { "epoch": 0.7841230912966045, "grad_norm": 0.7043281163776829, "learning_rate": 1.1993511759935118e-06, "loss": 0.6064, "step": 26857 }, { "epoch": 0.7841522875244519, "grad_norm": 0.7424372805354479, "learning_rate": 1.1991889699918898e-06, "loss": 0.6553, "step": 26858 }, { "epoch": 0.7841814837522992, "grad_norm": 0.7343970149024661, "learning_rate": 1.1990267639902678e-06, "loss": 0.682, "step": 26859 }, { "epoch": 0.7842106799801466, "grad_norm": 0.744966015299708, "learning_rate": 1.1988645579886458e-06, "loss": 0.6688, "step": 26860 }, { "epoch": 0.784239876207994, "grad_norm": 0.7444274645133304, "learning_rate": 1.1987023519870236e-06, "loss": 0.7195, "step": 26861 }, { "epoch": 0.7842690724358413, "grad_norm": 0.7545643791741584, "learning_rate": 1.1985401459854016e-06, "loss": 0.6786, "step": 26862 }, { "epoch": 0.7842982686636887, "grad_norm": 0.9213404333396137, "learning_rate": 1.1983779399837794e-06, "loss": 0.5844, "step": 26863 }, { "epoch": 0.784327464891536, "grad_norm": 0.7491348107465411, "learning_rate": 1.1982157339821574e-06, "loss": 0.7119, "step": 26864 }, { "epoch": 0.7843566611193834, "grad_norm": 0.8383034433599109, "learning_rate": 1.1980535279805354e-06, "loss": 0.7429, "step": 26865 }, { "epoch": 0.7843858573472308, "grad_norm": 0.7074382918061815, "learning_rate": 1.1978913219789134e-06, "loss": 0.6285, "step": 26866 }, { "epoch": 0.7844150535750781, "grad_norm": 0.6702329787785408, "learning_rate": 1.1977291159772912e-06, "loss": 0.5491, "step": 26867 }, { "epoch": 0.7844442498029255, "grad_norm": 0.7279030993413887, "learning_rate": 1.1975669099756692e-06, "loss": 0.5858, "step": 26868 }, { "epoch": 0.7844734460307728, "grad_norm": 0.7016446737817655, "learning_rate": 1.1974047039740472e-06, "loss": 0.5927, "step": 26869 }, { "epoch": 0.7845026422586202, "grad_norm": 0.731025044485532, "learning_rate": 1.197242497972425e-06, "loss": 0.6362, "step": 26870 }, { "epoch": 0.7845318384864676, "grad_norm": 0.7603441495979608, "learning_rate": 1.197080291970803e-06, "loss": 0.6429, "step": 26871 }, { "epoch": 0.7845610347143149, "grad_norm": 0.6881208971645765, "learning_rate": 1.1969180859691808e-06, "loss": 0.5495, "step": 26872 }, { "epoch": 0.7845902309421623, "grad_norm": 0.6965778785325545, "learning_rate": 1.196755879967559e-06, "loss": 0.5517, "step": 26873 }, { "epoch": 0.7846194271700097, "grad_norm": 0.7088268602048612, "learning_rate": 1.1965936739659368e-06, "loss": 0.6294, "step": 26874 }, { "epoch": 0.784648623397857, "grad_norm": 0.688254414211163, "learning_rate": 1.1964314679643148e-06, "loss": 0.5898, "step": 26875 }, { "epoch": 0.7846778196257044, "grad_norm": 0.7185373177465457, "learning_rate": 1.1962692619626926e-06, "loss": 0.6637, "step": 26876 }, { "epoch": 0.7847070158535517, "grad_norm": 0.7050763899026513, "learning_rate": 1.1961070559610706e-06, "loss": 0.6185, "step": 26877 }, { "epoch": 0.7847362120813991, "grad_norm": 0.6960966231807254, "learning_rate": 1.1959448499594486e-06, "loss": 0.6289, "step": 26878 }, { "epoch": 0.7847654083092465, "grad_norm": 0.7615669521853332, "learning_rate": 1.1957826439578266e-06, "loss": 0.6738, "step": 26879 }, { "epoch": 0.7847946045370938, "grad_norm": 0.7130664156903326, "learning_rate": 1.1956204379562044e-06, "loss": 0.5988, "step": 26880 }, { "epoch": 0.7848238007649412, "grad_norm": 0.7659730583208325, "learning_rate": 1.1954582319545824e-06, "loss": 0.6994, "step": 26881 }, { "epoch": 0.7848529969927885, "grad_norm": 0.6697231560112693, "learning_rate": 1.1952960259529602e-06, "loss": 0.5824, "step": 26882 }, { "epoch": 0.7848821932206359, "grad_norm": 0.7180789804512347, "learning_rate": 1.1951338199513382e-06, "loss": 0.5817, "step": 26883 }, { "epoch": 0.7849113894484833, "grad_norm": 0.7161105635689176, "learning_rate": 1.1949716139497163e-06, "loss": 0.6394, "step": 26884 }, { "epoch": 0.7849405856763306, "grad_norm": 0.7956500523501482, "learning_rate": 1.1948094079480943e-06, "loss": 0.6748, "step": 26885 }, { "epoch": 0.784969781904178, "grad_norm": 0.7032136417060715, "learning_rate": 1.194647201946472e-06, "loss": 0.65, "step": 26886 }, { "epoch": 0.7849989781320253, "grad_norm": 0.7148680874380202, "learning_rate": 1.19448499594485e-06, "loss": 0.6188, "step": 26887 }, { "epoch": 0.7850281743598727, "grad_norm": 0.6696698241335519, "learning_rate": 1.194322789943228e-06, "loss": 0.6085, "step": 26888 }, { "epoch": 0.7850573705877201, "grad_norm": 0.7111889972590215, "learning_rate": 1.1941605839416059e-06, "loss": 0.6332, "step": 26889 }, { "epoch": 0.7850865668155674, "grad_norm": 0.7886018078507149, "learning_rate": 1.1939983779399839e-06, "loss": 0.6448, "step": 26890 }, { "epoch": 0.7851157630434148, "grad_norm": 0.7590553435420655, "learning_rate": 1.1938361719383617e-06, "loss": 0.6821, "step": 26891 }, { "epoch": 0.7851449592712622, "grad_norm": 0.6956823394525197, "learning_rate": 1.1936739659367399e-06, "loss": 0.6053, "step": 26892 }, { "epoch": 0.7851741554991095, "grad_norm": 0.7299003511564661, "learning_rate": 1.1935117599351177e-06, "loss": 0.603, "step": 26893 }, { "epoch": 0.7852033517269569, "grad_norm": 0.8117007494641247, "learning_rate": 1.1933495539334957e-06, "loss": 0.7309, "step": 26894 }, { "epoch": 0.7852325479548042, "grad_norm": 0.7064069335835338, "learning_rate": 1.1931873479318735e-06, "loss": 0.6443, "step": 26895 }, { "epoch": 0.7852617441826516, "grad_norm": 0.7015204666006398, "learning_rate": 1.1930251419302515e-06, "loss": 0.564, "step": 26896 }, { "epoch": 0.785290940410499, "grad_norm": 0.7038470023744505, "learning_rate": 1.1928629359286295e-06, "loss": 0.572, "step": 26897 }, { "epoch": 0.7853201366383463, "grad_norm": 0.708782677315189, "learning_rate": 1.1927007299270075e-06, "loss": 0.5951, "step": 26898 }, { "epoch": 0.7853493328661937, "grad_norm": 0.7756078465627817, "learning_rate": 1.1925385239253853e-06, "loss": 0.7007, "step": 26899 }, { "epoch": 0.785378529094041, "grad_norm": 0.6796851216519175, "learning_rate": 1.1923763179237633e-06, "loss": 0.6185, "step": 26900 }, { "epoch": 0.7854077253218884, "grad_norm": 0.8042687197375984, "learning_rate": 1.192214111922141e-06, "loss": 0.7202, "step": 26901 }, { "epoch": 0.7854369215497358, "grad_norm": 0.721044767693242, "learning_rate": 1.192051905920519e-06, "loss": 0.5845, "step": 26902 }, { "epoch": 0.7854661177775831, "grad_norm": 0.7234680534065232, "learning_rate": 1.1918896999188971e-06, "loss": 0.6169, "step": 26903 }, { "epoch": 0.7854953140054305, "grad_norm": 0.7585164347617827, "learning_rate": 1.1917274939172751e-06, "loss": 0.7097, "step": 26904 }, { "epoch": 0.7855245102332779, "grad_norm": 0.7608564446392341, "learning_rate": 1.191565287915653e-06, "loss": 0.6696, "step": 26905 }, { "epoch": 0.7855537064611252, "grad_norm": 0.7044994114494123, "learning_rate": 1.191403081914031e-06, "loss": 0.6243, "step": 26906 }, { "epoch": 0.7855829026889726, "grad_norm": 0.6981127975885356, "learning_rate": 1.191240875912409e-06, "loss": 0.5958, "step": 26907 }, { "epoch": 0.7856120989168199, "grad_norm": 0.752240512257256, "learning_rate": 1.1910786699107867e-06, "loss": 0.6992, "step": 26908 }, { "epoch": 0.7856412951446673, "grad_norm": 0.7447303481025286, "learning_rate": 1.1909164639091647e-06, "loss": 0.6668, "step": 26909 }, { "epoch": 0.7856704913725147, "grad_norm": 0.7228214332792178, "learning_rate": 1.1907542579075425e-06, "loss": 0.649, "step": 26910 }, { "epoch": 0.785699687600362, "grad_norm": 0.7341465317950062, "learning_rate": 1.1905920519059207e-06, "loss": 0.6399, "step": 26911 }, { "epoch": 0.7857288838282094, "grad_norm": 0.7168391572105784, "learning_rate": 1.1904298459042985e-06, "loss": 0.646, "step": 26912 }, { "epoch": 0.7857580800560567, "grad_norm": 0.7174106249177417, "learning_rate": 1.1902676399026765e-06, "loss": 0.597, "step": 26913 }, { "epoch": 0.7857872762839041, "grad_norm": 0.7203743365420717, "learning_rate": 1.1901054339010543e-06, "loss": 0.6663, "step": 26914 }, { "epoch": 0.7858164725117515, "grad_norm": 0.7351577625744284, "learning_rate": 1.1899432278994323e-06, "loss": 0.6757, "step": 26915 }, { "epoch": 0.7858456687395988, "grad_norm": 0.7571574468007125, "learning_rate": 1.1897810218978104e-06, "loss": 0.6682, "step": 26916 }, { "epoch": 0.7858748649674462, "grad_norm": 0.6983790221483679, "learning_rate": 1.1896188158961884e-06, "loss": 0.6208, "step": 26917 }, { "epoch": 0.7859040611952935, "grad_norm": 0.7498636162574753, "learning_rate": 1.1894566098945662e-06, "loss": 0.6459, "step": 26918 }, { "epoch": 0.7859332574231409, "grad_norm": 0.8059901474749306, "learning_rate": 1.1892944038929442e-06, "loss": 0.7945, "step": 26919 }, { "epoch": 0.7859624536509883, "grad_norm": 0.7210731825674528, "learning_rate": 1.1891321978913222e-06, "loss": 0.6452, "step": 26920 }, { "epoch": 0.7859916498788356, "grad_norm": 0.71576345828679, "learning_rate": 1.1889699918897e-06, "loss": 0.5844, "step": 26921 }, { "epoch": 0.786020846106683, "grad_norm": 0.6797316085006182, "learning_rate": 1.188807785888078e-06, "loss": 0.5517, "step": 26922 }, { "epoch": 0.7860500423345304, "grad_norm": 0.8162254589495628, "learning_rate": 1.188645579886456e-06, "loss": 0.7569, "step": 26923 }, { "epoch": 0.7860792385623777, "grad_norm": 0.7077511396436296, "learning_rate": 1.1884833738848338e-06, "loss": 0.6693, "step": 26924 }, { "epoch": 0.7861084347902251, "grad_norm": 0.747232740179684, "learning_rate": 1.1883211678832118e-06, "loss": 0.6668, "step": 26925 }, { "epoch": 0.7861376310180724, "grad_norm": 0.7166156970645758, "learning_rate": 1.1881589618815898e-06, "loss": 0.6305, "step": 26926 }, { "epoch": 0.7861668272459198, "grad_norm": 0.7256306219839932, "learning_rate": 1.1879967558799676e-06, "loss": 0.5971, "step": 26927 }, { "epoch": 0.7861960234737672, "grad_norm": 0.6977924592882983, "learning_rate": 1.1878345498783456e-06, "loss": 0.6057, "step": 26928 }, { "epoch": 0.7862252197016145, "grad_norm": 0.7547510907419842, "learning_rate": 1.1876723438767234e-06, "loss": 0.5849, "step": 26929 }, { "epoch": 0.7862544159294619, "grad_norm": 0.7122092767907663, "learning_rate": 1.1875101378751016e-06, "loss": 0.6337, "step": 26930 }, { "epoch": 0.7862836121573092, "grad_norm": 0.702683022948773, "learning_rate": 1.1873479318734794e-06, "loss": 0.5535, "step": 26931 }, { "epoch": 0.7863128083851566, "grad_norm": 0.6816148392865363, "learning_rate": 1.1871857258718574e-06, "loss": 0.5999, "step": 26932 }, { "epoch": 0.786342004613004, "grad_norm": 0.6954435057018346, "learning_rate": 1.1870235198702352e-06, "loss": 0.6004, "step": 26933 }, { "epoch": 0.7863712008408513, "grad_norm": 0.7660789614159645, "learning_rate": 1.1868613138686132e-06, "loss": 0.6672, "step": 26934 }, { "epoch": 0.7864003970686987, "grad_norm": 0.7249746298760747, "learning_rate": 1.1866991078669912e-06, "loss": 0.6789, "step": 26935 }, { "epoch": 0.786429593296546, "grad_norm": 0.6409638811407825, "learning_rate": 1.1865369018653692e-06, "loss": 0.5167, "step": 26936 }, { "epoch": 0.7864587895243934, "grad_norm": 0.7252894970513936, "learning_rate": 1.186374695863747e-06, "loss": 0.6567, "step": 26937 }, { "epoch": 0.7864879857522408, "grad_norm": 0.7190295717476133, "learning_rate": 1.186212489862125e-06, "loss": 0.6093, "step": 26938 }, { "epoch": 0.7865171819800881, "grad_norm": 0.7405485524361914, "learning_rate": 1.186050283860503e-06, "loss": 0.6448, "step": 26939 }, { "epoch": 0.7865463782079355, "grad_norm": 0.7225354244548599, "learning_rate": 1.1858880778588808e-06, "loss": 0.5855, "step": 26940 }, { "epoch": 0.7865755744357829, "grad_norm": 0.6775478132906426, "learning_rate": 1.1857258718572588e-06, "loss": 0.5717, "step": 26941 }, { "epoch": 0.7866047706636302, "grad_norm": 0.6843153656106467, "learning_rate": 1.1855636658556368e-06, "loss": 0.614, "step": 26942 }, { "epoch": 0.7866339668914776, "grad_norm": 0.8373944542183477, "learning_rate": 1.1854014598540146e-06, "loss": 0.7024, "step": 26943 }, { "epoch": 0.7866631631193249, "grad_norm": 0.7226613245713077, "learning_rate": 1.1852392538523926e-06, "loss": 0.6431, "step": 26944 }, { "epoch": 0.7866923593471723, "grad_norm": 0.6966546220953957, "learning_rate": 1.1850770478507706e-06, "loss": 0.589, "step": 26945 }, { "epoch": 0.7867215555750197, "grad_norm": 0.7827005657764019, "learning_rate": 1.1849148418491484e-06, "loss": 0.7334, "step": 26946 }, { "epoch": 0.786750751802867, "grad_norm": 0.6904261501171582, "learning_rate": 1.1847526358475264e-06, "loss": 0.5706, "step": 26947 }, { "epoch": 0.7867799480307144, "grad_norm": 0.6991608166890648, "learning_rate": 1.1845904298459042e-06, "loss": 0.5668, "step": 26948 }, { "epoch": 0.7868091442585617, "grad_norm": 0.7638338048464638, "learning_rate": 1.1844282238442825e-06, "loss": 0.6385, "step": 26949 }, { "epoch": 0.7868383404864091, "grad_norm": 0.698879459318766, "learning_rate": 1.1842660178426603e-06, "loss": 0.6294, "step": 26950 }, { "epoch": 0.7868675367142565, "grad_norm": 0.71998640759616, "learning_rate": 1.1841038118410383e-06, "loss": 0.6617, "step": 26951 }, { "epoch": 0.7868967329421038, "grad_norm": 0.7583089511151757, "learning_rate": 1.183941605839416e-06, "loss": 0.6911, "step": 26952 }, { "epoch": 0.7869259291699512, "grad_norm": 0.7100127374240264, "learning_rate": 1.183779399837794e-06, "loss": 0.6632, "step": 26953 }, { "epoch": 0.7869551253977985, "grad_norm": 0.718445747224225, "learning_rate": 1.183617193836172e-06, "loss": 0.6462, "step": 26954 }, { "epoch": 0.786984321625646, "grad_norm": 0.6819406705529704, "learning_rate": 1.18345498783455e-06, "loss": 0.5663, "step": 26955 }, { "epoch": 0.7870135178534934, "grad_norm": 0.7300897653534549, "learning_rate": 1.1832927818329279e-06, "loss": 0.5832, "step": 26956 }, { "epoch": 0.7870427140813407, "grad_norm": 0.7803793042840664, "learning_rate": 1.1831305758313059e-06, "loss": 0.6969, "step": 26957 }, { "epoch": 0.7870719103091881, "grad_norm": 0.7510132461654927, "learning_rate": 1.1829683698296839e-06, "loss": 0.6798, "step": 26958 }, { "epoch": 0.7871011065370355, "grad_norm": 0.6997617128606921, "learning_rate": 1.1828061638280617e-06, "loss": 0.5506, "step": 26959 }, { "epoch": 0.7871303027648828, "grad_norm": 0.6952480938093083, "learning_rate": 1.1826439578264397e-06, "loss": 0.6058, "step": 26960 }, { "epoch": 0.7871594989927302, "grad_norm": 0.7776435157309548, "learning_rate": 1.1824817518248177e-06, "loss": 0.6781, "step": 26961 }, { "epoch": 0.7871886952205776, "grad_norm": 0.6708973698579245, "learning_rate": 1.1823195458231955e-06, "loss": 0.5504, "step": 26962 }, { "epoch": 0.7872178914484249, "grad_norm": 0.7138351992143583, "learning_rate": 1.1821573398215735e-06, "loss": 0.5978, "step": 26963 }, { "epoch": 0.7872470876762723, "grad_norm": 0.6508119604114216, "learning_rate": 1.1819951338199515e-06, "loss": 0.5434, "step": 26964 }, { "epoch": 0.7872762839041196, "grad_norm": 0.6805724396727726, "learning_rate": 1.1818329278183293e-06, "loss": 0.589, "step": 26965 }, { "epoch": 0.787305480131967, "grad_norm": 0.8063345800415127, "learning_rate": 1.1816707218167073e-06, "loss": 0.7585, "step": 26966 }, { "epoch": 0.7873346763598144, "grad_norm": 0.7849895789868302, "learning_rate": 1.181508515815085e-06, "loss": 0.6675, "step": 26967 }, { "epoch": 0.7873638725876617, "grad_norm": 0.7316599881220859, "learning_rate": 1.1813463098134633e-06, "loss": 0.6956, "step": 26968 }, { "epoch": 0.7873930688155091, "grad_norm": 0.69684409214252, "learning_rate": 1.1811841038118411e-06, "loss": 0.5866, "step": 26969 }, { "epoch": 0.7874222650433564, "grad_norm": 0.7797958117826458, "learning_rate": 1.1810218978102191e-06, "loss": 0.7135, "step": 26970 }, { "epoch": 0.7874514612712038, "grad_norm": 0.7290758905301405, "learning_rate": 1.180859691808597e-06, "loss": 0.6499, "step": 26971 }, { "epoch": 0.7874806574990512, "grad_norm": 0.7172823259308871, "learning_rate": 1.180697485806975e-06, "loss": 0.6178, "step": 26972 }, { "epoch": 0.7875098537268985, "grad_norm": 0.6953928014424332, "learning_rate": 1.180535279805353e-06, "loss": 0.6257, "step": 26973 }, { "epoch": 0.7875390499547459, "grad_norm": 0.7110865249354289, "learning_rate": 1.180373073803731e-06, "loss": 0.5825, "step": 26974 }, { "epoch": 0.7875682461825932, "grad_norm": 0.7101363983161141, "learning_rate": 1.1802108678021087e-06, "loss": 0.6454, "step": 26975 }, { "epoch": 0.7875974424104406, "grad_norm": 0.7713476890784461, "learning_rate": 1.1800486618004867e-06, "loss": 0.691, "step": 26976 }, { "epoch": 0.787626638638288, "grad_norm": 0.7950521764128787, "learning_rate": 1.1798864557988647e-06, "loss": 0.7018, "step": 26977 }, { "epoch": 0.7876558348661353, "grad_norm": 0.7184784848466028, "learning_rate": 1.1797242497972425e-06, "loss": 0.6772, "step": 26978 }, { "epoch": 0.7876850310939827, "grad_norm": 0.6561045362767557, "learning_rate": 1.1795620437956205e-06, "loss": 0.5579, "step": 26979 }, { "epoch": 0.78771422732183, "grad_norm": 0.6963136365430586, "learning_rate": 1.1793998377939983e-06, "loss": 0.613, "step": 26980 }, { "epoch": 0.7877434235496774, "grad_norm": 1.4334360652529134, "learning_rate": 1.1792376317923763e-06, "loss": 0.6984, "step": 26981 }, { "epoch": 0.7877726197775248, "grad_norm": 0.7149883318905182, "learning_rate": 1.1790754257907544e-06, "loss": 0.6156, "step": 26982 }, { "epoch": 0.7878018160053721, "grad_norm": 0.7969182290889377, "learning_rate": 1.1789132197891324e-06, "loss": 0.827, "step": 26983 }, { "epoch": 0.7878310122332195, "grad_norm": 0.731550942871708, "learning_rate": 1.1787510137875102e-06, "loss": 0.6456, "step": 26984 }, { "epoch": 0.7878602084610669, "grad_norm": 0.7177150675769024, "learning_rate": 1.1785888077858882e-06, "loss": 0.633, "step": 26985 }, { "epoch": 0.7878894046889142, "grad_norm": 0.7675362435855456, "learning_rate": 1.1784266017842662e-06, "loss": 0.7167, "step": 26986 }, { "epoch": 0.7879186009167616, "grad_norm": 0.6860169643815168, "learning_rate": 1.1782643957826442e-06, "loss": 0.5872, "step": 26987 }, { "epoch": 0.7879477971446089, "grad_norm": 0.7990686036181802, "learning_rate": 1.178102189781022e-06, "loss": 0.7727, "step": 26988 }, { "epoch": 0.7879769933724563, "grad_norm": 0.6744983292201964, "learning_rate": 1.1779399837794e-06, "loss": 0.6196, "step": 26989 }, { "epoch": 0.7880061896003037, "grad_norm": 2.0965952268975805, "learning_rate": 1.1777777777777778e-06, "loss": 0.6729, "step": 26990 }, { "epoch": 0.788035385828151, "grad_norm": 0.7218562224021837, "learning_rate": 1.1776155717761558e-06, "loss": 0.6246, "step": 26991 }, { "epoch": 0.7880645820559984, "grad_norm": 0.7470480054306906, "learning_rate": 1.1774533657745338e-06, "loss": 0.6788, "step": 26992 }, { "epoch": 0.7880937782838457, "grad_norm": 0.6914573908242747, "learning_rate": 1.1772911597729118e-06, "loss": 0.5845, "step": 26993 }, { "epoch": 0.7881229745116931, "grad_norm": 0.6767941849020539, "learning_rate": 1.1771289537712896e-06, "loss": 0.5794, "step": 26994 }, { "epoch": 0.7881521707395405, "grad_norm": 0.6736382368840763, "learning_rate": 1.1769667477696676e-06, "loss": 0.5866, "step": 26995 }, { "epoch": 0.7881813669673878, "grad_norm": 0.7529847381838625, "learning_rate": 1.1768045417680456e-06, "loss": 0.6631, "step": 26996 }, { "epoch": 0.7882105631952352, "grad_norm": 0.6700133357255303, "learning_rate": 1.1766423357664234e-06, "loss": 0.5348, "step": 26997 }, { "epoch": 0.7882397594230826, "grad_norm": 0.7101905638566498, "learning_rate": 1.1764801297648014e-06, "loss": 0.5803, "step": 26998 }, { "epoch": 0.7882689556509299, "grad_norm": 0.7343180766359254, "learning_rate": 1.1763179237631792e-06, "loss": 0.6417, "step": 26999 }, { "epoch": 0.7882981518787773, "grad_norm": 0.6828663315776671, "learning_rate": 1.1761557177615572e-06, "loss": 0.5874, "step": 27000 }, { "epoch": 0.7883273481066246, "grad_norm": 0.7116173668812352, "learning_rate": 1.1759935117599352e-06, "loss": 0.6532, "step": 27001 }, { "epoch": 0.788356544334472, "grad_norm": 0.7418126001091949, "learning_rate": 1.1758313057583132e-06, "loss": 0.6183, "step": 27002 }, { "epoch": 0.7883857405623194, "grad_norm": 0.7992090009980327, "learning_rate": 1.175669099756691e-06, "loss": 0.7327, "step": 27003 }, { "epoch": 0.7884149367901667, "grad_norm": 0.6891936897729297, "learning_rate": 1.175506893755069e-06, "loss": 0.6104, "step": 27004 }, { "epoch": 0.7884441330180141, "grad_norm": 0.7223177347670778, "learning_rate": 1.175344687753447e-06, "loss": 0.5774, "step": 27005 }, { "epoch": 0.7884733292458614, "grad_norm": 0.7760039855300277, "learning_rate": 1.175182481751825e-06, "loss": 0.7492, "step": 27006 }, { "epoch": 0.7885025254737088, "grad_norm": 0.7243001864868759, "learning_rate": 1.1750202757502028e-06, "loss": 0.6436, "step": 27007 }, { "epoch": 0.7885317217015562, "grad_norm": 0.7135772346614089, "learning_rate": 1.1748580697485808e-06, "loss": 0.5959, "step": 27008 }, { "epoch": 0.7885609179294035, "grad_norm": 0.7341247394883276, "learning_rate": 1.1746958637469586e-06, "loss": 0.6427, "step": 27009 }, { "epoch": 0.7885901141572509, "grad_norm": 0.6996901152677616, "learning_rate": 1.1745336577453366e-06, "loss": 0.5879, "step": 27010 }, { "epoch": 0.7886193103850982, "grad_norm": 0.6999476468286011, "learning_rate": 1.1743714517437146e-06, "loss": 0.5883, "step": 27011 }, { "epoch": 0.7886485066129456, "grad_norm": 0.7531285958407077, "learning_rate": 1.1742092457420927e-06, "loss": 0.7219, "step": 27012 }, { "epoch": 0.788677702840793, "grad_norm": 0.7095123621179492, "learning_rate": 1.1740470397404704e-06, "loss": 0.6373, "step": 27013 }, { "epoch": 0.7887068990686403, "grad_norm": 0.7788452422504779, "learning_rate": 1.1738848337388485e-06, "loss": 0.7007, "step": 27014 }, { "epoch": 0.7887360952964877, "grad_norm": 0.7600891937696256, "learning_rate": 1.1737226277372265e-06, "loss": 0.719, "step": 27015 }, { "epoch": 0.788765291524335, "grad_norm": 0.6887981887944087, "learning_rate": 1.1735604217356043e-06, "loss": 0.5979, "step": 27016 }, { "epoch": 0.7887944877521824, "grad_norm": 0.6747389356864645, "learning_rate": 1.1733982157339823e-06, "loss": 0.5879, "step": 27017 }, { "epoch": 0.7888236839800298, "grad_norm": 0.6865778772084796, "learning_rate": 1.17323600973236e-06, "loss": 0.5836, "step": 27018 }, { "epoch": 0.7888528802078771, "grad_norm": 0.707683778367949, "learning_rate": 1.173073803730738e-06, "loss": 0.6213, "step": 27019 }, { "epoch": 0.7888820764357245, "grad_norm": 0.6729032726406318, "learning_rate": 1.172911597729116e-06, "loss": 0.5747, "step": 27020 }, { "epoch": 0.7889112726635719, "grad_norm": 0.7112211391532638, "learning_rate": 1.172749391727494e-06, "loss": 0.6376, "step": 27021 }, { "epoch": 0.7889404688914192, "grad_norm": 0.7428888613222041, "learning_rate": 1.1725871857258719e-06, "loss": 0.7147, "step": 27022 }, { "epoch": 0.7889696651192666, "grad_norm": 0.725312965557076, "learning_rate": 1.1724249797242499e-06, "loss": 0.7161, "step": 27023 }, { "epoch": 0.7889988613471139, "grad_norm": 0.6545433492893793, "learning_rate": 1.1722627737226279e-06, "loss": 0.5152, "step": 27024 }, { "epoch": 0.7890280575749613, "grad_norm": 0.7169885830103999, "learning_rate": 1.1721005677210059e-06, "loss": 0.6496, "step": 27025 }, { "epoch": 0.7890572538028087, "grad_norm": 0.7732480555724686, "learning_rate": 1.1719383617193837e-06, "loss": 0.6362, "step": 27026 }, { "epoch": 0.789086450030656, "grad_norm": 0.7361138627152878, "learning_rate": 1.1717761557177617e-06, "loss": 0.6625, "step": 27027 }, { "epoch": 0.7891156462585034, "grad_norm": 0.75345360637387, "learning_rate": 1.1716139497161395e-06, "loss": 0.6401, "step": 27028 }, { "epoch": 0.7891448424863508, "grad_norm": 0.6829159045281785, "learning_rate": 1.1714517437145175e-06, "loss": 0.5706, "step": 27029 }, { "epoch": 0.7891740387141981, "grad_norm": 0.7743443204168476, "learning_rate": 1.1712895377128955e-06, "loss": 0.7085, "step": 27030 }, { "epoch": 0.7892032349420455, "grad_norm": 0.8148594992403765, "learning_rate": 1.1711273317112735e-06, "loss": 0.6779, "step": 27031 }, { "epoch": 0.7892324311698928, "grad_norm": 0.8364134558158178, "learning_rate": 1.1709651257096513e-06, "loss": 0.7092, "step": 27032 }, { "epoch": 0.7892616273977402, "grad_norm": 1.0081736627238267, "learning_rate": 1.1708029197080293e-06, "loss": 0.7712, "step": 27033 }, { "epoch": 0.7892908236255876, "grad_norm": 0.7084034505146275, "learning_rate": 1.1706407137064073e-06, "loss": 0.6538, "step": 27034 }, { "epoch": 0.7893200198534349, "grad_norm": 0.763297709178989, "learning_rate": 1.1704785077047851e-06, "loss": 0.7047, "step": 27035 }, { "epoch": 0.7893492160812823, "grad_norm": 0.7285961630805347, "learning_rate": 1.1703163017031631e-06, "loss": 0.6244, "step": 27036 }, { "epoch": 0.7893784123091296, "grad_norm": 0.7894182692258933, "learning_rate": 1.170154095701541e-06, "loss": 0.6621, "step": 27037 }, { "epoch": 0.789407608536977, "grad_norm": 0.7361722827228214, "learning_rate": 1.1699918896999191e-06, "loss": 0.6398, "step": 27038 }, { "epoch": 0.7894368047648244, "grad_norm": 0.7592186784218178, "learning_rate": 1.169829683698297e-06, "loss": 0.6946, "step": 27039 }, { "epoch": 0.7894660009926717, "grad_norm": 0.7132082197891221, "learning_rate": 1.169667477696675e-06, "loss": 0.6112, "step": 27040 }, { "epoch": 0.7894951972205191, "grad_norm": 0.7511766876157486, "learning_rate": 1.1695052716950527e-06, "loss": 0.6604, "step": 27041 }, { "epoch": 0.7895243934483664, "grad_norm": 0.8929223610010524, "learning_rate": 1.1693430656934307e-06, "loss": 0.6907, "step": 27042 }, { "epoch": 0.7895535896762138, "grad_norm": 0.7318073093922681, "learning_rate": 1.1691808596918087e-06, "loss": 0.6858, "step": 27043 }, { "epoch": 0.7895827859040612, "grad_norm": 0.7115754222225632, "learning_rate": 1.1690186536901868e-06, "loss": 0.5497, "step": 27044 }, { "epoch": 0.7896119821319085, "grad_norm": 0.7330807551758024, "learning_rate": 1.1688564476885645e-06, "loss": 0.6139, "step": 27045 }, { "epoch": 0.7896411783597559, "grad_norm": 0.6981633181197667, "learning_rate": 1.1686942416869426e-06, "loss": 0.5848, "step": 27046 }, { "epoch": 0.7896703745876033, "grad_norm": 0.749485232385352, "learning_rate": 1.1685320356853203e-06, "loss": 0.7086, "step": 27047 }, { "epoch": 0.7896995708154506, "grad_norm": 0.7089431761064026, "learning_rate": 1.1683698296836984e-06, "loss": 0.5947, "step": 27048 }, { "epoch": 0.789728767043298, "grad_norm": 0.6900500261712835, "learning_rate": 1.1682076236820764e-06, "loss": 0.5677, "step": 27049 }, { "epoch": 0.7897579632711453, "grad_norm": 0.7503041228806132, "learning_rate": 1.1680454176804544e-06, "loss": 0.6997, "step": 27050 }, { "epoch": 0.7897871594989927, "grad_norm": 0.6807472632060139, "learning_rate": 1.1678832116788322e-06, "loss": 0.5827, "step": 27051 }, { "epoch": 0.7898163557268401, "grad_norm": 0.6875452801745735, "learning_rate": 1.1677210056772102e-06, "loss": 0.6213, "step": 27052 }, { "epoch": 0.7898455519546874, "grad_norm": 0.6677509639452619, "learning_rate": 1.1675587996755882e-06, "loss": 0.5413, "step": 27053 }, { "epoch": 0.7898747481825348, "grad_norm": 0.7043986469366482, "learning_rate": 1.167396593673966e-06, "loss": 0.6021, "step": 27054 }, { "epoch": 0.7899039444103821, "grad_norm": 0.6789029801736548, "learning_rate": 1.167234387672344e-06, "loss": 0.5739, "step": 27055 }, { "epoch": 0.7899331406382295, "grad_norm": 0.7609993499066906, "learning_rate": 1.1670721816707218e-06, "loss": 0.5947, "step": 27056 }, { "epoch": 0.7899623368660769, "grad_norm": 0.7432709146341616, "learning_rate": 1.1669099756691e-06, "loss": 0.624, "step": 27057 }, { "epoch": 0.7899915330939242, "grad_norm": 0.6781189742819123, "learning_rate": 1.1667477696674778e-06, "loss": 0.5842, "step": 27058 }, { "epoch": 0.7900207293217716, "grad_norm": 0.7300844485426083, "learning_rate": 1.1665855636658558e-06, "loss": 0.627, "step": 27059 }, { "epoch": 0.790049925549619, "grad_norm": 0.6736052200075273, "learning_rate": 1.1664233576642336e-06, "loss": 0.5883, "step": 27060 }, { "epoch": 0.7900791217774663, "grad_norm": 0.7493013627867386, "learning_rate": 1.1662611516626116e-06, "loss": 0.6587, "step": 27061 }, { "epoch": 0.7901083180053137, "grad_norm": 0.6238729521733063, "learning_rate": 1.1660989456609896e-06, "loss": 0.5096, "step": 27062 }, { "epoch": 0.790137514233161, "grad_norm": 0.6873258307974316, "learning_rate": 1.1659367396593676e-06, "loss": 0.5647, "step": 27063 }, { "epoch": 0.7901667104610084, "grad_norm": 0.7425738610771058, "learning_rate": 1.1657745336577454e-06, "loss": 0.6044, "step": 27064 }, { "epoch": 0.7901959066888558, "grad_norm": 0.7520991648308308, "learning_rate": 1.1656123276561234e-06, "loss": 0.6831, "step": 27065 }, { "epoch": 0.7902251029167031, "grad_norm": 0.7437394141469718, "learning_rate": 1.1654501216545012e-06, "loss": 0.6754, "step": 27066 }, { "epoch": 0.7902542991445505, "grad_norm": 0.7982187114247924, "learning_rate": 1.1652879156528792e-06, "loss": 0.7325, "step": 27067 }, { "epoch": 0.7902834953723978, "grad_norm": 0.7223429972711725, "learning_rate": 1.1651257096512572e-06, "loss": 0.631, "step": 27068 }, { "epoch": 0.7903126916002452, "grad_norm": 0.677458794402668, "learning_rate": 1.164963503649635e-06, "loss": 0.5903, "step": 27069 }, { "epoch": 0.7903418878280926, "grad_norm": 0.8009953317258532, "learning_rate": 1.164801297648013e-06, "loss": 0.6745, "step": 27070 }, { "epoch": 0.7903710840559399, "grad_norm": 0.6860355511410354, "learning_rate": 1.164639091646391e-06, "loss": 0.512, "step": 27071 }, { "epoch": 0.7904002802837873, "grad_norm": 0.7263801133471418, "learning_rate": 1.164476885644769e-06, "loss": 0.5744, "step": 27072 }, { "epoch": 0.7904294765116346, "grad_norm": 0.7532885043284454, "learning_rate": 1.1643146796431468e-06, "loss": 0.6264, "step": 27073 }, { "epoch": 0.790458672739482, "grad_norm": 0.7189399845867467, "learning_rate": 1.1641524736415248e-06, "loss": 0.6785, "step": 27074 }, { "epoch": 0.7904878689673294, "grad_norm": 0.6999862068606028, "learning_rate": 1.1639902676399026e-06, "loss": 0.6125, "step": 27075 }, { "epoch": 0.7905170651951768, "grad_norm": 0.7036510296901919, "learning_rate": 1.1638280616382809e-06, "loss": 0.5986, "step": 27076 }, { "epoch": 0.7905462614230242, "grad_norm": 0.7290973609336189, "learning_rate": 1.1636658556366586e-06, "loss": 0.6465, "step": 27077 }, { "epoch": 0.7905754576508716, "grad_norm": 0.7907630906077451, "learning_rate": 1.1635036496350367e-06, "loss": 0.7338, "step": 27078 }, { "epoch": 0.7906046538787189, "grad_norm": 0.69946960580634, "learning_rate": 1.1633414436334144e-06, "loss": 0.6103, "step": 27079 }, { "epoch": 0.7906338501065663, "grad_norm": 0.7559794720542015, "learning_rate": 1.1631792376317925e-06, "loss": 0.6775, "step": 27080 }, { "epoch": 0.7906630463344136, "grad_norm": 0.7380181024160059, "learning_rate": 1.1630170316301705e-06, "loss": 0.6391, "step": 27081 }, { "epoch": 0.790692242562261, "grad_norm": 0.6970815674943078, "learning_rate": 1.1628548256285485e-06, "loss": 0.5826, "step": 27082 }, { "epoch": 0.7907214387901084, "grad_norm": 0.7490848465682354, "learning_rate": 1.1626926196269263e-06, "loss": 0.6336, "step": 27083 }, { "epoch": 0.7907506350179557, "grad_norm": 0.7861571975397814, "learning_rate": 1.1625304136253043e-06, "loss": 0.7161, "step": 27084 }, { "epoch": 0.7907798312458031, "grad_norm": 0.900774803957969, "learning_rate": 1.162368207623682e-06, "loss": 0.7588, "step": 27085 }, { "epoch": 0.7908090274736504, "grad_norm": 0.6593499580623117, "learning_rate": 1.16220600162206e-06, "loss": 0.5454, "step": 27086 }, { "epoch": 0.7908382237014978, "grad_norm": 0.7138390014238363, "learning_rate": 1.162043795620438e-06, "loss": 0.5996, "step": 27087 }, { "epoch": 0.7908674199293452, "grad_norm": 0.7255631188649173, "learning_rate": 1.1618815896188159e-06, "loss": 0.6507, "step": 27088 }, { "epoch": 0.7908966161571925, "grad_norm": 0.7353148042942387, "learning_rate": 1.1617193836171939e-06, "loss": 0.6286, "step": 27089 }, { "epoch": 0.7909258123850399, "grad_norm": 0.6677987122419348, "learning_rate": 1.1615571776155719e-06, "loss": 0.5468, "step": 27090 }, { "epoch": 0.7909550086128873, "grad_norm": 0.8346874417617257, "learning_rate": 1.1613949716139499e-06, "loss": 0.655, "step": 27091 }, { "epoch": 0.7909842048407346, "grad_norm": 0.7624447728610908, "learning_rate": 1.1612327656123277e-06, "loss": 0.6882, "step": 27092 }, { "epoch": 0.791013401068582, "grad_norm": 0.6946164408384595, "learning_rate": 1.1610705596107057e-06, "loss": 0.619, "step": 27093 }, { "epoch": 0.7910425972964293, "grad_norm": 0.7295399652706914, "learning_rate": 1.1609083536090835e-06, "loss": 0.6367, "step": 27094 }, { "epoch": 0.7910717935242767, "grad_norm": 0.7485729382933473, "learning_rate": 1.1607461476074617e-06, "loss": 0.704, "step": 27095 }, { "epoch": 0.7911009897521241, "grad_norm": 0.7354473381448267, "learning_rate": 1.1605839416058395e-06, "loss": 0.6234, "step": 27096 }, { "epoch": 0.7911301859799714, "grad_norm": 0.7364374415568028, "learning_rate": 1.1604217356042175e-06, "loss": 0.6538, "step": 27097 }, { "epoch": 0.7911593822078188, "grad_norm": 0.6958572522592655, "learning_rate": 1.1602595296025953e-06, "loss": 0.5284, "step": 27098 }, { "epoch": 0.7911885784356661, "grad_norm": 0.7299440190748658, "learning_rate": 1.1600973236009733e-06, "loss": 0.6176, "step": 27099 }, { "epoch": 0.7912177746635135, "grad_norm": 0.6729115717647179, "learning_rate": 1.1599351175993513e-06, "loss": 0.5117, "step": 27100 }, { "epoch": 0.7912469708913609, "grad_norm": 0.7291230792589422, "learning_rate": 1.1597729115977293e-06, "loss": 0.6193, "step": 27101 }, { "epoch": 0.7912761671192082, "grad_norm": 0.7046628986795812, "learning_rate": 1.1596107055961071e-06, "loss": 0.5867, "step": 27102 }, { "epoch": 0.7913053633470556, "grad_norm": 0.7342318399588096, "learning_rate": 1.1594484995944851e-06, "loss": 0.6563, "step": 27103 }, { "epoch": 0.791334559574903, "grad_norm": 0.6704081242974012, "learning_rate": 1.1592862935928631e-06, "loss": 0.5749, "step": 27104 }, { "epoch": 0.7913637558027503, "grad_norm": 0.722570242571939, "learning_rate": 1.159124087591241e-06, "loss": 0.6256, "step": 27105 }, { "epoch": 0.7913929520305977, "grad_norm": 0.7086578781754626, "learning_rate": 1.158961881589619e-06, "loss": 0.6454, "step": 27106 }, { "epoch": 0.791422148258445, "grad_norm": 0.6925144236857391, "learning_rate": 1.1587996755879967e-06, "loss": 0.5751, "step": 27107 }, { "epoch": 0.7914513444862924, "grad_norm": 0.7064905977976402, "learning_rate": 1.1586374695863747e-06, "loss": 0.6214, "step": 27108 }, { "epoch": 0.7914805407141398, "grad_norm": 0.729472469082066, "learning_rate": 1.1584752635847527e-06, "loss": 0.6589, "step": 27109 }, { "epoch": 0.7915097369419871, "grad_norm": 0.7176746023556453, "learning_rate": 1.1583130575831308e-06, "loss": 0.6257, "step": 27110 }, { "epoch": 0.7915389331698345, "grad_norm": 0.6621181445632041, "learning_rate": 1.1581508515815085e-06, "loss": 0.5193, "step": 27111 }, { "epoch": 0.7915681293976818, "grad_norm": 0.7545642018582177, "learning_rate": 1.1579886455798866e-06, "loss": 0.6672, "step": 27112 }, { "epoch": 0.7915973256255292, "grad_norm": 0.7385204578142335, "learning_rate": 1.1578264395782643e-06, "loss": 0.6063, "step": 27113 }, { "epoch": 0.7916265218533766, "grad_norm": 0.7068383843373232, "learning_rate": 1.1576642335766426e-06, "loss": 0.6124, "step": 27114 }, { "epoch": 0.7916557180812239, "grad_norm": 0.774273842533017, "learning_rate": 1.1575020275750204e-06, "loss": 0.7015, "step": 27115 }, { "epoch": 0.7916849143090713, "grad_norm": 0.7228549020060022, "learning_rate": 1.1573398215733984e-06, "loss": 0.6187, "step": 27116 }, { "epoch": 0.7917141105369186, "grad_norm": 0.7947228020770768, "learning_rate": 1.1571776155717762e-06, "loss": 0.6257, "step": 27117 }, { "epoch": 0.791743306764766, "grad_norm": 0.7199282330985247, "learning_rate": 1.1570154095701542e-06, "loss": 0.6759, "step": 27118 }, { "epoch": 0.7917725029926134, "grad_norm": 0.7547323151631485, "learning_rate": 1.1568532035685322e-06, "loss": 0.6998, "step": 27119 }, { "epoch": 0.7918016992204607, "grad_norm": 0.7104492849055907, "learning_rate": 1.1566909975669102e-06, "loss": 0.6187, "step": 27120 }, { "epoch": 0.7918308954483081, "grad_norm": 0.7655833504583431, "learning_rate": 1.156528791565288e-06, "loss": 0.7003, "step": 27121 }, { "epoch": 0.7918600916761555, "grad_norm": 0.6921061206908076, "learning_rate": 1.156366585563666e-06, "loss": 0.5972, "step": 27122 }, { "epoch": 0.7918892879040028, "grad_norm": 0.670872881393077, "learning_rate": 1.156204379562044e-06, "loss": 0.5929, "step": 27123 }, { "epoch": 0.7919184841318502, "grad_norm": 0.6909063247247499, "learning_rate": 1.1560421735604218e-06, "loss": 0.5849, "step": 27124 }, { "epoch": 0.7919476803596975, "grad_norm": 0.7186738799555278, "learning_rate": 1.1558799675587998e-06, "loss": 0.609, "step": 27125 }, { "epoch": 0.7919768765875449, "grad_norm": 0.7375695210450265, "learning_rate": 1.1557177615571776e-06, "loss": 0.6758, "step": 27126 }, { "epoch": 0.7920060728153923, "grad_norm": 0.7460214325004304, "learning_rate": 1.1555555555555556e-06, "loss": 0.6534, "step": 27127 }, { "epoch": 0.7920352690432396, "grad_norm": 0.6884940980700529, "learning_rate": 1.1553933495539336e-06, "loss": 0.5905, "step": 27128 }, { "epoch": 0.792064465271087, "grad_norm": 0.6503973859465815, "learning_rate": 1.1552311435523116e-06, "loss": 0.5027, "step": 27129 }, { "epoch": 0.7920936614989343, "grad_norm": 0.6813360608058681, "learning_rate": 1.1550689375506894e-06, "loss": 0.5785, "step": 27130 }, { "epoch": 0.7921228577267817, "grad_norm": 0.7600290410377022, "learning_rate": 1.1549067315490674e-06, "loss": 0.6701, "step": 27131 }, { "epoch": 0.7921520539546291, "grad_norm": 0.6608135101473372, "learning_rate": 1.1547445255474452e-06, "loss": 0.543, "step": 27132 }, { "epoch": 0.7921812501824764, "grad_norm": 0.7243376021441277, "learning_rate": 1.1545823195458234e-06, "loss": 0.62, "step": 27133 }, { "epoch": 0.7922104464103238, "grad_norm": 0.7573888409404819, "learning_rate": 1.1544201135442012e-06, "loss": 0.6909, "step": 27134 }, { "epoch": 0.7922396426381711, "grad_norm": 0.6638483280224763, "learning_rate": 1.1542579075425792e-06, "loss": 0.5155, "step": 27135 }, { "epoch": 0.7922688388660185, "grad_norm": 0.7184620719809018, "learning_rate": 1.154095701540957e-06, "loss": 0.6327, "step": 27136 }, { "epoch": 0.7922980350938659, "grad_norm": 0.7312281936866797, "learning_rate": 1.153933495539335e-06, "loss": 0.6134, "step": 27137 }, { "epoch": 0.7923272313217132, "grad_norm": 0.7115060921977089, "learning_rate": 1.153771289537713e-06, "loss": 0.613, "step": 27138 }, { "epoch": 0.7923564275495606, "grad_norm": 0.7018628542400718, "learning_rate": 1.153609083536091e-06, "loss": 0.51, "step": 27139 }, { "epoch": 0.792385623777408, "grad_norm": 0.7557308755614319, "learning_rate": 1.1534468775344688e-06, "loss": 0.7096, "step": 27140 }, { "epoch": 0.7924148200052553, "grad_norm": 0.7574930857141294, "learning_rate": 1.1532846715328468e-06, "loss": 0.6002, "step": 27141 }, { "epoch": 0.7924440162331027, "grad_norm": 0.7060211880059961, "learning_rate": 1.1531224655312249e-06, "loss": 0.588, "step": 27142 }, { "epoch": 0.79247321246095, "grad_norm": 0.7720500149914894, "learning_rate": 1.1529602595296026e-06, "loss": 0.6734, "step": 27143 }, { "epoch": 0.7925024086887974, "grad_norm": 0.7026455425922924, "learning_rate": 1.1527980535279807e-06, "loss": 0.6221, "step": 27144 }, { "epoch": 0.7925316049166448, "grad_norm": 0.7388674007824756, "learning_rate": 1.1526358475263584e-06, "loss": 0.6545, "step": 27145 }, { "epoch": 0.7925608011444921, "grad_norm": 0.6865109364363482, "learning_rate": 1.1524736415247365e-06, "loss": 0.5216, "step": 27146 }, { "epoch": 0.7925899973723395, "grad_norm": 0.7873894077688487, "learning_rate": 1.1523114355231145e-06, "loss": 0.7056, "step": 27147 }, { "epoch": 0.7926191936001868, "grad_norm": 0.756575915179694, "learning_rate": 1.1521492295214925e-06, "loss": 0.6917, "step": 27148 }, { "epoch": 0.7926483898280342, "grad_norm": 0.6846838075676674, "learning_rate": 1.1519870235198703e-06, "loss": 0.5572, "step": 27149 }, { "epoch": 0.7926775860558816, "grad_norm": 0.7410450771400008, "learning_rate": 1.1518248175182483e-06, "loss": 0.6901, "step": 27150 }, { "epoch": 0.7927067822837289, "grad_norm": 0.6903487631788878, "learning_rate": 1.151662611516626e-06, "loss": 0.5659, "step": 27151 }, { "epoch": 0.7927359785115763, "grad_norm": 0.6836280192159779, "learning_rate": 1.1515004055150043e-06, "loss": 0.5702, "step": 27152 }, { "epoch": 0.7927651747394237, "grad_norm": 0.6856056307220271, "learning_rate": 1.151338199513382e-06, "loss": 0.591, "step": 27153 }, { "epoch": 0.792794370967271, "grad_norm": 0.7497531925203199, "learning_rate": 1.15117599351176e-06, "loss": 0.6569, "step": 27154 }, { "epoch": 0.7928235671951184, "grad_norm": 0.7785506780707004, "learning_rate": 1.1510137875101379e-06, "loss": 0.6307, "step": 27155 }, { "epoch": 0.7928527634229657, "grad_norm": 0.7381299706795703, "learning_rate": 1.1508515815085159e-06, "loss": 0.637, "step": 27156 }, { "epoch": 0.7928819596508131, "grad_norm": 0.7041091024058375, "learning_rate": 1.150689375506894e-06, "loss": 0.6313, "step": 27157 }, { "epoch": 0.7929111558786605, "grad_norm": 0.7317794381800823, "learning_rate": 1.1505271695052717e-06, "loss": 0.6892, "step": 27158 }, { "epoch": 0.7929403521065078, "grad_norm": 0.7598739593298202, "learning_rate": 1.1503649635036497e-06, "loss": 0.6656, "step": 27159 }, { "epoch": 0.7929695483343552, "grad_norm": 0.7293944635895411, "learning_rate": 1.1502027575020277e-06, "loss": 0.6145, "step": 27160 }, { "epoch": 0.7929987445622025, "grad_norm": 0.6935744655119038, "learning_rate": 1.1500405515004057e-06, "loss": 0.6394, "step": 27161 }, { "epoch": 0.7930279407900499, "grad_norm": 0.6880627824990242, "learning_rate": 1.1498783454987835e-06, "loss": 0.5687, "step": 27162 }, { "epoch": 0.7930571370178973, "grad_norm": 0.8087035776628417, "learning_rate": 1.1497161394971615e-06, "loss": 0.673, "step": 27163 }, { "epoch": 0.7930863332457446, "grad_norm": 0.7137745618095122, "learning_rate": 1.1495539334955393e-06, "loss": 0.5775, "step": 27164 }, { "epoch": 0.793115529473592, "grad_norm": 0.7396550359517484, "learning_rate": 1.1493917274939173e-06, "loss": 0.6276, "step": 27165 }, { "epoch": 0.7931447257014393, "grad_norm": 0.7575435903949334, "learning_rate": 1.1492295214922953e-06, "loss": 0.6778, "step": 27166 }, { "epoch": 0.7931739219292867, "grad_norm": 0.7250895575249341, "learning_rate": 1.1490673154906733e-06, "loss": 0.6507, "step": 27167 }, { "epoch": 0.7932031181571341, "grad_norm": 0.6593583131501842, "learning_rate": 1.1489051094890511e-06, "loss": 0.5496, "step": 27168 }, { "epoch": 0.7932323143849814, "grad_norm": 0.73251437699393, "learning_rate": 1.1487429034874291e-06, "loss": 0.6751, "step": 27169 }, { "epoch": 0.7932615106128288, "grad_norm": 0.7261001567178276, "learning_rate": 1.148580697485807e-06, "loss": 0.6148, "step": 27170 }, { "epoch": 0.7932907068406762, "grad_norm": 0.7747013410474201, "learning_rate": 1.1484184914841851e-06, "loss": 0.7367, "step": 27171 }, { "epoch": 0.7933199030685235, "grad_norm": 0.7298334796051222, "learning_rate": 1.148256285482563e-06, "loss": 0.5845, "step": 27172 }, { "epoch": 0.7933490992963709, "grad_norm": 0.6968653292707578, "learning_rate": 1.148094079480941e-06, "loss": 0.6289, "step": 27173 }, { "epoch": 0.7933782955242182, "grad_norm": 0.6909540196557706, "learning_rate": 1.1479318734793187e-06, "loss": 0.5812, "step": 27174 }, { "epoch": 0.7934074917520656, "grad_norm": 0.7166544574329198, "learning_rate": 1.1477696674776967e-06, "loss": 0.6155, "step": 27175 }, { "epoch": 0.793436687979913, "grad_norm": 0.7545335827638974, "learning_rate": 1.1476074614760748e-06, "loss": 0.64, "step": 27176 }, { "epoch": 0.7934658842077603, "grad_norm": 0.7108880332843242, "learning_rate": 1.1474452554744525e-06, "loss": 0.6568, "step": 27177 }, { "epoch": 0.7934950804356077, "grad_norm": 0.708729801634996, "learning_rate": 1.1472830494728306e-06, "loss": 0.6476, "step": 27178 }, { "epoch": 0.793524276663455, "grad_norm": 0.6486442787159848, "learning_rate": 1.1471208434712086e-06, "loss": 0.5507, "step": 27179 }, { "epoch": 0.7935534728913024, "grad_norm": 0.7268052881909122, "learning_rate": 1.1469586374695866e-06, "loss": 0.646, "step": 27180 }, { "epoch": 0.7935826691191498, "grad_norm": 0.7296251830537901, "learning_rate": 1.1467964314679644e-06, "loss": 0.6351, "step": 27181 }, { "epoch": 0.7936118653469971, "grad_norm": 0.7591457059587639, "learning_rate": 1.1466342254663424e-06, "loss": 0.6494, "step": 27182 }, { "epoch": 0.7936410615748445, "grad_norm": 0.7051068001531406, "learning_rate": 1.1464720194647202e-06, "loss": 0.6096, "step": 27183 }, { "epoch": 0.7936702578026918, "grad_norm": 0.7510461388648894, "learning_rate": 1.1463098134630982e-06, "loss": 0.6336, "step": 27184 }, { "epoch": 0.7936994540305392, "grad_norm": 0.7089349461131512, "learning_rate": 1.1461476074614762e-06, "loss": 0.6457, "step": 27185 }, { "epoch": 0.7937286502583866, "grad_norm": 0.7833304495202226, "learning_rate": 1.1459854014598542e-06, "loss": 0.7634, "step": 27186 }, { "epoch": 0.7937578464862339, "grad_norm": 0.713973694038567, "learning_rate": 1.145823195458232e-06, "loss": 0.5921, "step": 27187 }, { "epoch": 0.7937870427140813, "grad_norm": 0.7080902405727897, "learning_rate": 1.14566098945661e-06, "loss": 0.5596, "step": 27188 }, { "epoch": 0.7938162389419287, "grad_norm": 0.7820626591412906, "learning_rate": 1.145498783454988e-06, "loss": 0.6963, "step": 27189 }, { "epoch": 0.793845435169776, "grad_norm": 0.7027132702023287, "learning_rate": 1.145336577453366e-06, "loss": 0.5752, "step": 27190 }, { "epoch": 0.7938746313976234, "grad_norm": 0.7476359074780002, "learning_rate": 1.1451743714517438e-06, "loss": 0.6233, "step": 27191 }, { "epoch": 0.7939038276254707, "grad_norm": 0.7016101941538765, "learning_rate": 1.1450121654501218e-06, "loss": 0.6166, "step": 27192 }, { "epoch": 0.7939330238533181, "grad_norm": 0.7560915437737452, "learning_rate": 1.1448499594484996e-06, "loss": 0.6848, "step": 27193 }, { "epoch": 0.7939622200811655, "grad_norm": 0.7345335598680638, "learning_rate": 1.1446877534468776e-06, "loss": 0.6263, "step": 27194 }, { "epoch": 0.7939914163090128, "grad_norm": 0.7642235553853305, "learning_rate": 1.1445255474452556e-06, "loss": 0.714, "step": 27195 }, { "epoch": 0.7940206125368603, "grad_norm": 0.7013983939698857, "learning_rate": 1.1443633414436334e-06, "loss": 0.5947, "step": 27196 }, { "epoch": 0.7940498087647077, "grad_norm": 0.6914531886636177, "learning_rate": 1.1442011354420114e-06, "loss": 0.6211, "step": 27197 }, { "epoch": 0.794079004992555, "grad_norm": 0.729141304630888, "learning_rate": 1.1440389294403894e-06, "loss": 0.6443, "step": 27198 }, { "epoch": 0.7941082012204024, "grad_norm": 0.708109910508343, "learning_rate": 1.1438767234387674e-06, "loss": 0.6711, "step": 27199 }, { "epoch": 0.7941373974482497, "grad_norm": 0.7056620604543652, "learning_rate": 1.1437145174371452e-06, "loss": 0.6089, "step": 27200 }, { "epoch": 0.7941665936760971, "grad_norm": 0.6977123776437821, "learning_rate": 1.1435523114355232e-06, "loss": 0.5664, "step": 27201 }, { "epoch": 0.7941957899039445, "grad_norm": 0.7019533990317587, "learning_rate": 1.143390105433901e-06, "loss": 0.6252, "step": 27202 }, { "epoch": 0.7942249861317918, "grad_norm": 0.7454639336151616, "learning_rate": 1.143227899432279e-06, "loss": 0.6909, "step": 27203 }, { "epoch": 0.7942541823596392, "grad_norm": 0.7907679230190615, "learning_rate": 1.143065693430657e-06, "loss": 0.7323, "step": 27204 }, { "epoch": 0.7942833785874865, "grad_norm": 0.6778509468935234, "learning_rate": 1.142903487429035e-06, "loss": 0.6103, "step": 27205 }, { "epoch": 0.7943125748153339, "grad_norm": 0.7253280949988, "learning_rate": 1.1427412814274128e-06, "loss": 0.5661, "step": 27206 }, { "epoch": 0.7943417710431813, "grad_norm": 0.7113870770535798, "learning_rate": 1.1425790754257908e-06, "loss": 0.627, "step": 27207 }, { "epoch": 0.7943709672710286, "grad_norm": 0.7751594704784612, "learning_rate": 1.1424168694241689e-06, "loss": 0.6457, "step": 27208 }, { "epoch": 0.794400163498876, "grad_norm": 0.7062588584096255, "learning_rate": 1.1422546634225469e-06, "loss": 0.6292, "step": 27209 }, { "epoch": 0.7944293597267233, "grad_norm": 0.7236409601734552, "learning_rate": 1.1420924574209247e-06, "loss": 0.5942, "step": 27210 }, { "epoch": 0.7944585559545707, "grad_norm": 0.692391414324122, "learning_rate": 1.1419302514193027e-06, "loss": 0.5366, "step": 27211 }, { "epoch": 0.7944877521824181, "grad_norm": 0.7294929260128701, "learning_rate": 1.1417680454176805e-06, "loss": 0.6599, "step": 27212 }, { "epoch": 0.7945169484102654, "grad_norm": 0.747659309251581, "learning_rate": 1.1416058394160585e-06, "loss": 0.6652, "step": 27213 }, { "epoch": 0.7945461446381128, "grad_norm": 0.7563115017541369, "learning_rate": 1.1414436334144365e-06, "loss": 0.6716, "step": 27214 }, { "epoch": 0.7945753408659602, "grad_norm": 0.703598505405761, "learning_rate": 1.1412814274128143e-06, "loss": 0.5951, "step": 27215 }, { "epoch": 0.7946045370938075, "grad_norm": 0.7028160523224797, "learning_rate": 1.1411192214111923e-06, "loss": 0.6607, "step": 27216 }, { "epoch": 0.7946337333216549, "grad_norm": 0.7024430343087033, "learning_rate": 1.1409570154095703e-06, "loss": 0.5906, "step": 27217 }, { "epoch": 0.7946629295495022, "grad_norm": 0.6584335368149768, "learning_rate": 1.1407948094079483e-06, "loss": 0.5555, "step": 27218 }, { "epoch": 0.7946921257773496, "grad_norm": 0.7377938490239798, "learning_rate": 1.140632603406326e-06, "loss": 0.6613, "step": 27219 }, { "epoch": 0.794721322005197, "grad_norm": 0.7358009769602615, "learning_rate": 1.140470397404704e-06, "loss": 0.6617, "step": 27220 }, { "epoch": 0.7947505182330443, "grad_norm": 0.7571028282063275, "learning_rate": 1.1403081914030819e-06, "loss": 0.6026, "step": 27221 }, { "epoch": 0.7947797144608917, "grad_norm": 0.683025610323942, "learning_rate": 1.14014598540146e-06, "loss": 0.5936, "step": 27222 }, { "epoch": 0.794808910688739, "grad_norm": 0.7054487505414367, "learning_rate": 1.139983779399838e-06, "loss": 0.6343, "step": 27223 }, { "epoch": 0.7948381069165864, "grad_norm": 0.7136154745194204, "learning_rate": 1.139821573398216e-06, "loss": 0.6067, "step": 27224 }, { "epoch": 0.7948673031444338, "grad_norm": 0.6890825831549453, "learning_rate": 1.1396593673965937e-06, "loss": 0.5948, "step": 27225 }, { "epoch": 0.7948964993722811, "grad_norm": 0.6737694342331717, "learning_rate": 1.1394971613949717e-06, "loss": 0.5292, "step": 27226 }, { "epoch": 0.7949256956001285, "grad_norm": 0.6864726283143505, "learning_rate": 1.1393349553933497e-06, "loss": 0.5885, "step": 27227 }, { "epoch": 0.7949548918279759, "grad_norm": 0.7728289289590407, "learning_rate": 1.1391727493917277e-06, "loss": 0.6904, "step": 27228 }, { "epoch": 0.7949840880558232, "grad_norm": 0.7648212025211814, "learning_rate": 1.1390105433901055e-06, "loss": 0.6661, "step": 27229 }, { "epoch": 0.7950132842836706, "grad_norm": 0.7801275673519042, "learning_rate": 1.1388483373884835e-06, "loss": 0.67, "step": 27230 }, { "epoch": 0.7950424805115179, "grad_norm": 0.7055871187258411, "learning_rate": 1.1386861313868613e-06, "loss": 0.6212, "step": 27231 }, { "epoch": 0.7950716767393653, "grad_norm": 0.7209354590496846, "learning_rate": 1.1385239253852393e-06, "loss": 0.5862, "step": 27232 }, { "epoch": 0.7951008729672127, "grad_norm": 0.7490269283308504, "learning_rate": 1.1383617193836173e-06, "loss": 0.6721, "step": 27233 }, { "epoch": 0.79513006919506, "grad_norm": 0.7066147161235767, "learning_rate": 1.1381995133819951e-06, "loss": 0.6179, "step": 27234 }, { "epoch": 0.7951592654229074, "grad_norm": 0.7466008839116237, "learning_rate": 1.1380373073803731e-06, "loss": 0.6902, "step": 27235 }, { "epoch": 0.7951884616507547, "grad_norm": 0.7442822880104286, "learning_rate": 1.1378751013787511e-06, "loss": 0.644, "step": 27236 }, { "epoch": 0.7952176578786021, "grad_norm": 0.6723713926707587, "learning_rate": 1.1377128953771291e-06, "loss": 0.5908, "step": 27237 }, { "epoch": 0.7952468541064495, "grad_norm": 0.7772965480865126, "learning_rate": 1.137550689375507e-06, "loss": 0.6764, "step": 27238 }, { "epoch": 0.7952760503342968, "grad_norm": 0.7887695966791919, "learning_rate": 1.137388483373885e-06, "loss": 0.6727, "step": 27239 }, { "epoch": 0.7953052465621442, "grad_norm": 0.7388503169061579, "learning_rate": 1.1372262773722627e-06, "loss": 0.6059, "step": 27240 }, { "epoch": 0.7953344427899915, "grad_norm": 0.645162718308968, "learning_rate": 1.137064071370641e-06, "loss": 0.5239, "step": 27241 }, { "epoch": 0.7953636390178389, "grad_norm": 0.6978916008071043, "learning_rate": 1.1369018653690188e-06, "loss": 0.581, "step": 27242 }, { "epoch": 0.7953928352456863, "grad_norm": 0.7001472685306049, "learning_rate": 1.1367396593673968e-06, "loss": 0.5985, "step": 27243 }, { "epoch": 0.7954220314735336, "grad_norm": 0.7279512290753453, "learning_rate": 1.1365774533657746e-06, "loss": 0.64, "step": 27244 }, { "epoch": 0.795451227701381, "grad_norm": 0.8026699439569761, "learning_rate": 1.1364152473641526e-06, "loss": 0.7686, "step": 27245 }, { "epoch": 0.7954804239292284, "grad_norm": 0.7392055687919613, "learning_rate": 1.1362530413625306e-06, "loss": 0.6674, "step": 27246 }, { "epoch": 0.7955096201570757, "grad_norm": 0.7213916530176621, "learning_rate": 1.1360908353609086e-06, "loss": 0.6299, "step": 27247 }, { "epoch": 0.7955388163849231, "grad_norm": 0.7474619932222913, "learning_rate": 1.1359286293592864e-06, "loss": 0.6798, "step": 27248 }, { "epoch": 0.7955680126127704, "grad_norm": 0.7257481924998115, "learning_rate": 1.1357664233576644e-06, "loss": 0.6329, "step": 27249 }, { "epoch": 0.7955972088406178, "grad_norm": 0.6874720277759515, "learning_rate": 1.1356042173560422e-06, "loss": 0.5598, "step": 27250 }, { "epoch": 0.7956264050684652, "grad_norm": 0.735136723649054, "learning_rate": 1.1354420113544202e-06, "loss": 0.6619, "step": 27251 }, { "epoch": 0.7956556012963125, "grad_norm": 0.7374948482548834, "learning_rate": 1.1352798053527982e-06, "loss": 0.6291, "step": 27252 }, { "epoch": 0.7956847975241599, "grad_norm": 0.7306518075565805, "learning_rate": 1.135117599351176e-06, "loss": 0.6764, "step": 27253 }, { "epoch": 0.7957139937520072, "grad_norm": 0.7191745607273842, "learning_rate": 1.134955393349554e-06, "loss": 0.6015, "step": 27254 }, { "epoch": 0.7957431899798546, "grad_norm": 0.6749737708695026, "learning_rate": 1.134793187347932e-06, "loss": 0.6164, "step": 27255 }, { "epoch": 0.795772386207702, "grad_norm": 0.6880436473070997, "learning_rate": 1.13463098134631e-06, "loss": 0.5835, "step": 27256 }, { "epoch": 0.7958015824355493, "grad_norm": 0.7509656942259146, "learning_rate": 1.1344687753446878e-06, "loss": 0.6457, "step": 27257 }, { "epoch": 0.7958307786633967, "grad_norm": 0.7102992547936081, "learning_rate": 1.1343065693430658e-06, "loss": 0.6063, "step": 27258 }, { "epoch": 0.795859974891244, "grad_norm": 0.6480959759355921, "learning_rate": 1.1341443633414436e-06, "loss": 0.521, "step": 27259 }, { "epoch": 0.7958891711190914, "grad_norm": 0.7023307437643442, "learning_rate": 1.1339821573398218e-06, "loss": 0.5835, "step": 27260 }, { "epoch": 0.7959183673469388, "grad_norm": 0.7199480779197471, "learning_rate": 1.1338199513381996e-06, "loss": 0.6807, "step": 27261 }, { "epoch": 0.7959475635747861, "grad_norm": 0.7152115961404847, "learning_rate": 1.1336577453365776e-06, "loss": 0.6353, "step": 27262 }, { "epoch": 0.7959767598026335, "grad_norm": 0.7320114275474695, "learning_rate": 1.1334955393349554e-06, "loss": 0.6497, "step": 27263 }, { "epoch": 0.7960059560304809, "grad_norm": 0.8060122818678795, "learning_rate": 1.1333333333333334e-06, "loss": 0.685, "step": 27264 }, { "epoch": 0.7960351522583282, "grad_norm": 0.6548737973171151, "learning_rate": 1.1331711273317114e-06, "loss": 0.524, "step": 27265 }, { "epoch": 0.7960643484861756, "grad_norm": 0.7293174331200818, "learning_rate": 1.1330089213300892e-06, "loss": 0.6945, "step": 27266 }, { "epoch": 0.7960935447140229, "grad_norm": 0.7332770592654677, "learning_rate": 1.1328467153284672e-06, "loss": 0.6054, "step": 27267 }, { "epoch": 0.7961227409418703, "grad_norm": 0.7885770643442247, "learning_rate": 1.1326845093268452e-06, "loss": 0.6887, "step": 27268 }, { "epoch": 0.7961519371697177, "grad_norm": 0.7477493274971534, "learning_rate": 1.132522303325223e-06, "loss": 0.6508, "step": 27269 }, { "epoch": 0.796181133397565, "grad_norm": 0.7515633901217124, "learning_rate": 1.132360097323601e-06, "loss": 0.6365, "step": 27270 }, { "epoch": 0.7962103296254124, "grad_norm": 0.722758770906906, "learning_rate": 1.132197891321979e-06, "loss": 0.6431, "step": 27271 }, { "epoch": 0.7962395258532597, "grad_norm": 0.8684738733952534, "learning_rate": 1.1320356853203568e-06, "loss": 0.6744, "step": 27272 }, { "epoch": 0.7962687220811071, "grad_norm": 0.7420538028830248, "learning_rate": 1.1318734793187348e-06, "loss": 0.5822, "step": 27273 }, { "epoch": 0.7962979183089545, "grad_norm": 0.7171983034212849, "learning_rate": 1.1317112733171129e-06, "loss": 0.6221, "step": 27274 }, { "epoch": 0.7963271145368018, "grad_norm": 0.6664925362618196, "learning_rate": 1.1315490673154909e-06, "loss": 0.564, "step": 27275 }, { "epoch": 0.7963563107646492, "grad_norm": 0.7103241189135445, "learning_rate": 1.1313868613138687e-06, "loss": 0.633, "step": 27276 }, { "epoch": 0.7963855069924966, "grad_norm": 0.7667819291651847, "learning_rate": 1.1312246553122467e-06, "loss": 0.7206, "step": 27277 }, { "epoch": 0.7964147032203439, "grad_norm": 0.7335375279893198, "learning_rate": 1.1310624493106245e-06, "loss": 0.6364, "step": 27278 }, { "epoch": 0.7964438994481913, "grad_norm": 0.6930038563683567, "learning_rate": 1.1309002433090027e-06, "loss": 0.6085, "step": 27279 }, { "epoch": 0.7964730956760386, "grad_norm": 0.7679089231374074, "learning_rate": 1.1307380373073805e-06, "loss": 0.7011, "step": 27280 }, { "epoch": 0.796502291903886, "grad_norm": 0.6591162240005618, "learning_rate": 1.1305758313057585e-06, "loss": 0.5473, "step": 27281 }, { "epoch": 0.7965314881317334, "grad_norm": 0.7215221104081052, "learning_rate": 1.1304136253041363e-06, "loss": 0.6036, "step": 27282 }, { "epoch": 0.7965606843595807, "grad_norm": 0.7134914598829478, "learning_rate": 1.1302514193025143e-06, "loss": 0.6116, "step": 27283 }, { "epoch": 0.7965898805874281, "grad_norm": 0.7534128065164387, "learning_rate": 1.1300892133008923e-06, "loss": 0.6071, "step": 27284 }, { "epoch": 0.7966190768152754, "grad_norm": 0.7182639687847358, "learning_rate": 1.12992700729927e-06, "loss": 0.6471, "step": 27285 }, { "epoch": 0.7966482730431228, "grad_norm": 0.7368712271798848, "learning_rate": 1.129764801297648e-06, "loss": 0.6528, "step": 27286 }, { "epoch": 0.7966774692709702, "grad_norm": 0.7193692559376967, "learning_rate": 1.129602595296026e-06, "loss": 0.6537, "step": 27287 }, { "epoch": 0.7967066654988175, "grad_norm": 1.024149203508117, "learning_rate": 1.1294403892944039e-06, "loss": 0.7333, "step": 27288 }, { "epoch": 0.7967358617266649, "grad_norm": 0.6850479174404941, "learning_rate": 1.129278183292782e-06, "loss": 0.6019, "step": 27289 }, { "epoch": 0.7967650579545122, "grad_norm": 0.803961876431322, "learning_rate": 1.12911597729116e-06, "loss": 0.6807, "step": 27290 }, { "epoch": 0.7967942541823596, "grad_norm": 0.7501944840280534, "learning_rate": 1.1289537712895377e-06, "loss": 0.6717, "step": 27291 }, { "epoch": 0.796823450410207, "grad_norm": 0.6764010949155309, "learning_rate": 1.1287915652879157e-06, "loss": 0.5203, "step": 27292 }, { "epoch": 0.7968526466380543, "grad_norm": 0.6542562742714335, "learning_rate": 1.1286293592862937e-06, "loss": 0.5285, "step": 27293 }, { "epoch": 0.7968818428659017, "grad_norm": 0.6933342665977331, "learning_rate": 1.1284671532846717e-06, "loss": 0.6188, "step": 27294 }, { "epoch": 0.796911039093749, "grad_norm": 0.6953238285366989, "learning_rate": 1.1283049472830495e-06, "loss": 0.6007, "step": 27295 }, { "epoch": 0.7969402353215964, "grad_norm": 0.708424924000686, "learning_rate": 1.1281427412814275e-06, "loss": 0.6052, "step": 27296 }, { "epoch": 0.7969694315494438, "grad_norm": 0.6338229161443003, "learning_rate": 1.1279805352798053e-06, "loss": 0.5098, "step": 27297 }, { "epoch": 0.7969986277772911, "grad_norm": 0.7376004411801541, "learning_rate": 1.1278183292781835e-06, "loss": 0.5904, "step": 27298 }, { "epoch": 0.7970278240051385, "grad_norm": 0.7246028633632483, "learning_rate": 1.1276561232765613e-06, "loss": 0.6392, "step": 27299 }, { "epoch": 0.7970570202329859, "grad_norm": 0.7338867015662252, "learning_rate": 1.1274939172749393e-06, "loss": 0.6298, "step": 27300 }, { "epoch": 0.7970862164608332, "grad_norm": 0.6837319434894975, "learning_rate": 1.1273317112733171e-06, "loss": 0.6084, "step": 27301 }, { "epoch": 0.7971154126886806, "grad_norm": 0.7368562043361357, "learning_rate": 1.1271695052716951e-06, "loss": 0.6544, "step": 27302 }, { "epoch": 0.7971446089165279, "grad_norm": 0.7497977854498803, "learning_rate": 1.1270072992700731e-06, "loss": 0.634, "step": 27303 }, { "epoch": 0.7971738051443753, "grad_norm": 0.7344836486787841, "learning_rate": 1.126845093268451e-06, "loss": 0.6401, "step": 27304 }, { "epoch": 0.7972030013722227, "grad_norm": 0.759569153489302, "learning_rate": 1.126682887266829e-06, "loss": 0.7172, "step": 27305 }, { "epoch": 0.79723219760007, "grad_norm": 0.6950699708791864, "learning_rate": 1.126520681265207e-06, "loss": 0.6195, "step": 27306 }, { "epoch": 0.7972613938279174, "grad_norm": 0.7510865802959689, "learning_rate": 1.126358475263585e-06, "loss": 0.7343, "step": 27307 }, { "epoch": 0.7972905900557647, "grad_norm": 0.9779638160295967, "learning_rate": 1.1261962692619628e-06, "loss": 0.6112, "step": 27308 }, { "epoch": 0.7973197862836121, "grad_norm": 0.665862499602325, "learning_rate": 1.1260340632603408e-06, "loss": 0.5402, "step": 27309 }, { "epoch": 0.7973489825114595, "grad_norm": 0.7949185619443515, "learning_rate": 1.1258718572587186e-06, "loss": 0.7099, "step": 27310 }, { "epoch": 0.7973781787393068, "grad_norm": 0.715810050029926, "learning_rate": 1.1257096512570966e-06, "loss": 0.6626, "step": 27311 }, { "epoch": 0.7974073749671542, "grad_norm": 0.7768456462428406, "learning_rate": 1.1255474452554746e-06, "loss": 0.6794, "step": 27312 }, { "epoch": 0.7974365711950016, "grad_norm": 0.7184841723082609, "learning_rate": 1.1253852392538526e-06, "loss": 0.6463, "step": 27313 }, { "epoch": 0.7974657674228489, "grad_norm": 0.6736962039425537, "learning_rate": 1.1252230332522304e-06, "loss": 0.5795, "step": 27314 }, { "epoch": 0.7974949636506963, "grad_norm": 0.7196357538616768, "learning_rate": 1.1250608272506084e-06, "loss": 0.6219, "step": 27315 }, { "epoch": 0.7975241598785436, "grad_norm": 0.7326215950317747, "learning_rate": 1.1248986212489862e-06, "loss": 0.6437, "step": 27316 }, { "epoch": 0.7975533561063911, "grad_norm": 0.7093423524453562, "learning_rate": 1.1247364152473644e-06, "loss": 0.6377, "step": 27317 }, { "epoch": 0.7975825523342385, "grad_norm": 0.7374920778371193, "learning_rate": 1.1245742092457422e-06, "loss": 0.6214, "step": 27318 }, { "epoch": 0.7976117485620858, "grad_norm": 0.7375433787362955, "learning_rate": 1.1244120032441202e-06, "loss": 0.6313, "step": 27319 }, { "epoch": 0.7976409447899332, "grad_norm": 0.6912741728800633, "learning_rate": 1.124249797242498e-06, "loss": 0.6212, "step": 27320 }, { "epoch": 0.7976701410177806, "grad_norm": 0.703494258237684, "learning_rate": 1.124087591240876e-06, "loss": 0.635, "step": 27321 }, { "epoch": 0.7976993372456279, "grad_norm": 0.7021378690748515, "learning_rate": 1.123925385239254e-06, "loss": 0.6253, "step": 27322 }, { "epoch": 0.7977285334734753, "grad_norm": 0.7277826467571464, "learning_rate": 1.1237631792376318e-06, "loss": 0.6917, "step": 27323 }, { "epoch": 0.7977577297013226, "grad_norm": 0.8051619417239888, "learning_rate": 1.1236009732360098e-06, "loss": 0.7017, "step": 27324 }, { "epoch": 0.79778692592917, "grad_norm": 0.6926886502050961, "learning_rate": 1.1234387672343878e-06, "loss": 0.5865, "step": 27325 }, { "epoch": 0.7978161221570174, "grad_norm": 0.7334329814278774, "learning_rate": 1.1232765612327658e-06, "loss": 0.6686, "step": 27326 }, { "epoch": 0.7978453183848647, "grad_norm": 0.7235197504247304, "learning_rate": 1.1231143552311436e-06, "loss": 0.6503, "step": 27327 }, { "epoch": 0.7978745146127121, "grad_norm": 0.6688459719773909, "learning_rate": 1.1229521492295216e-06, "loss": 0.5409, "step": 27328 }, { "epoch": 0.7979037108405594, "grad_norm": 0.6629252707457955, "learning_rate": 1.1227899432278994e-06, "loss": 0.5622, "step": 27329 }, { "epoch": 0.7979329070684068, "grad_norm": 0.8408967859808795, "learning_rate": 1.1226277372262774e-06, "loss": 0.7512, "step": 27330 }, { "epoch": 0.7979621032962542, "grad_norm": 0.6831890153434391, "learning_rate": 1.1224655312246554e-06, "loss": 0.558, "step": 27331 }, { "epoch": 0.7979912995241015, "grad_norm": 0.7214357163331375, "learning_rate": 1.1223033252230334e-06, "loss": 0.6286, "step": 27332 }, { "epoch": 0.7980204957519489, "grad_norm": 0.6802089785478806, "learning_rate": 1.1221411192214112e-06, "loss": 0.5521, "step": 27333 }, { "epoch": 0.7980496919797962, "grad_norm": 0.6606076090636844, "learning_rate": 1.1219789132197892e-06, "loss": 0.5677, "step": 27334 }, { "epoch": 0.7980788882076436, "grad_norm": 0.7033479418960578, "learning_rate": 1.121816707218167e-06, "loss": 0.6382, "step": 27335 }, { "epoch": 0.798108084435491, "grad_norm": 0.7953668106239888, "learning_rate": 1.1216545012165453e-06, "loss": 0.7622, "step": 27336 }, { "epoch": 0.7981372806633383, "grad_norm": 0.6735045244374226, "learning_rate": 1.121492295214923e-06, "loss": 0.5973, "step": 27337 }, { "epoch": 0.7981664768911857, "grad_norm": 0.7206608624334098, "learning_rate": 1.121330089213301e-06, "loss": 0.6131, "step": 27338 }, { "epoch": 0.7981956731190331, "grad_norm": 0.7397432341420588, "learning_rate": 1.1211678832116789e-06, "loss": 0.6487, "step": 27339 }, { "epoch": 0.7982248693468804, "grad_norm": 0.6912682242192911, "learning_rate": 1.1210056772100569e-06, "loss": 0.567, "step": 27340 }, { "epoch": 0.7982540655747278, "grad_norm": 0.7674425094232687, "learning_rate": 1.1208434712084349e-06, "loss": 0.6529, "step": 27341 }, { "epoch": 0.7982832618025751, "grad_norm": 0.7437349024676312, "learning_rate": 1.1206812652068127e-06, "loss": 0.6702, "step": 27342 }, { "epoch": 0.7983124580304225, "grad_norm": 0.7240540927737376, "learning_rate": 1.1205190592051907e-06, "loss": 0.6258, "step": 27343 }, { "epoch": 0.7983416542582699, "grad_norm": 0.7241048295597973, "learning_rate": 1.1203568532035687e-06, "loss": 0.6534, "step": 27344 }, { "epoch": 0.7983708504861172, "grad_norm": 0.7155389213705956, "learning_rate": 1.1201946472019467e-06, "loss": 0.6676, "step": 27345 }, { "epoch": 0.7984000467139646, "grad_norm": 0.7700163381403956, "learning_rate": 1.1200324412003245e-06, "loss": 0.6656, "step": 27346 }, { "epoch": 0.798429242941812, "grad_norm": 0.7409477365817982, "learning_rate": 1.1198702351987025e-06, "loss": 0.7107, "step": 27347 }, { "epoch": 0.7984584391696593, "grad_norm": 0.7019911414994541, "learning_rate": 1.1197080291970803e-06, "loss": 0.6, "step": 27348 }, { "epoch": 0.7984876353975067, "grad_norm": 0.7105090437859733, "learning_rate": 1.1195458231954583e-06, "loss": 0.614, "step": 27349 }, { "epoch": 0.798516831625354, "grad_norm": 0.7295250441253917, "learning_rate": 1.1193836171938363e-06, "loss": 0.7132, "step": 27350 }, { "epoch": 0.7985460278532014, "grad_norm": 0.8231261346709644, "learning_rate": 1.1192214111922143e-06, "loss": 0.6219, "step": 27351 }, { "epoch": 0.7985752240810488, "grad_norm": 0.6418191772743767, "learning_rate": 1.119059205190592e-06, "loss": 0.5217, "step": 27352 }, { "epoch": 0.7986044203088961, "grad_norm": 0.6978689240250527, "learning_rate": 1.11889699918897e-06, "loss": 0.6128, "step": 27353 }, { "epoch": 0.7986336165367435, "grad_norm": 0.7091571064528739, "learning_rate": 1.118734793187348e-06, "loss": 0.6141, "step": 27354 }, { "epoch": 0.7986628127645908, "grad_norm": 0.6725134269003471, "learning_rate": 1.118572587185726e-06, "loss": 0.5496, "step": 27355 }, { "epoch": 0.7986920089924382, "grad_norm": 0.6662076084847038, "learning_rate": 1.118410381184104e-06, "loss": 0.589, "step": 27356 }, { "epoch": 0.7987212052202856, "grad_norm": 0.7180368826691182, "learning_rate": 1.118248175182482e-06, "loss": 0.5991, "step": 27357 }, { "epoch": 0.7987504014481329, "grad_norm": 0.6763948954543366, "learning_rate": 1.1180859691808597e-06, "loss": 0.5481, "step": 27358 }, { "epoch": 0.7987795976759803, "grad_norm": 0.7189950461460191, "learning_rate": 1.1179237631792377e-06, "loss": 0.6123, "step": 27359 }, { "epoch": 0.7988087939038276, "grad_norm": 0.752062447853425, "learning_rate": 1.1177615571776157e-06, "loss": 0.71, "step": 27360 }, { "epoch": 0.798837990131675, "grad_norm": 0.6445188076344432, "learning_rate": 1.1175993511759935e-06, "loss": 0.5101, "step": 27361 }, { "epoch": 0.7988671863595224, "grad_norm": 0.7397563430774906, "learning_rate": 1.1174371451743715e-06, "loss": 0.636, "step": 27362 }, { "epoch": 0.7988963825873697, "grad_norm": 0.8628597453449229, "learning_rate": 1.1172749391727495e-06, "loss": 0.6296, "step": 27363 }, { "epoch": 0.7989255788152171, "grad_norm": 0.7503817413641045, "learning_rate": 1.1171127331711275e-06, "loss": 0.7052, "step": 27364 }, { "epoch": 0.7989547750430644, "grad_norm": 0.6937687861250258, "learning_rate": 1.1169505271695053e-06, "loss": 0.623, "step": 27365 }, { "epoch": 0.7989839712709118, "grad_norm": 0.7213831737107967, "learning_rate": 1.1167883211678833e-06, "loss": 0.7115, "step": 27366 }, { "epoch": 0.7990131674987592, "grad_norm": 0.6978688086385566, "learning_rate": 1.1166261151662611e-06, "loss": 0.6437, "step": 27367 }, { "epoch": 0.7990423637266065, "grad_norm": 0.7263562583079891, "learning_rate": 1.1164639091646391e-06, "loss": 0.6352, "step": 27368 }, { "epoch": 0.7990715599544539, "grad_norm": 0.7492833662367544, "learning_rate": 1.1163017031630171e-06, "loss": 0.6944, "step": 27369 }, { "epoch": 0.7991007561823013, "grad_norm": 0.7210610243223919, "learning_rate": 1.1161394971613952e-06, "loss": 0.6651, "step": 27370 }, { "epoch": 0.7991299524101486, "grad_norm": 0.7014613450947437, "learning_rate": 1.115977291159773e-06, "loss": 0.6186, "step": 27371 }, { "epoch": 0.799159148637996, "grad_norm": 0.7181037991044888, "learning_rate": 1.115815085158151e-06, "loss": 0.7001, "step": 27372 }, { "epoch": 0.7991883448658433, "grad_norm": 0.7221571027918638, "learning_rate": 1.115652879156529e-06, "loss": 0.6364, "step": 27373 }, { "epoch": 0.7992175410936907, "grad_norm": 0.7391374664497813, "learning_rate": 1.1154906731549068e-06, "loss": 0.6775, "step": 27374 }, { "epoch": 0.7992467373215381, "grad_norm": 0.6776401572070944, "learning_rate": 1.1153284671532848e-06, "loss": 0.5668, "step": 27375 }, { "epoch": 0.7992759335493854, "grad_norm": 0.761697475575618, "learning_rate": 1.1151662611516628e-06, "loss": 0.7251, "step": 27376 }, { "epoch": 0.7993051297772328, "grad_norm": 0.7462440000669062, "learning_rate": 1.1150040551500406e-06, "loss": 0.6513, "step": 27377 }, { "epoch": 0.7993343260050801, "grad_norm": 0.7018341251861607, "learning_rate": 1.1148418491484186e-06, "loss": 0.5987, "step": 27378 }, { "epoch": 0.7993635222329275, "grad_norm": 0.7490194511803695, "learning_rate": 1.1146796431467966e-06, "loss": 0.6877, "step": 27379 }, { "epoch": 0.7993927184607749, "grad_norm": 0.7111356069086016, "learning_rate": 1.1145174371451744e-06, "loss": 0.6267, "step": 27380 }, { "epoch": 0.7994219146886222, "grad_norm": 0.7058942839340854, "learning_rate": 1.1143552311435524e-06, "loss": 0.6212, "step": 27381 }, { "epoch": 0.7994511109164696, "grad_norm": 0.6485883521081903, "learning_rate": 1.1141930251419302e-06, "loss": 0.4875, "step": 27382 }, { "epoch": 0.799480307144317, "grad_norm": 0.7120241267748675, "learning_rate": 1.1140308191403084e-06, "loss": 0.5132, "step": 27383 }, { "epoch": 0.7995095033721643, "grad_norm": 0.7415727215653941, "learning_rate": 1.1138686131386862e-06, "loss": 0.6545, "step": 27384 }, { "epoch": 0.7995386996000117, "grad_norm": 0.6838939999055389, "learning_rate": 1.1137064071370642e-06, "loss": 0.6025, "step": 27385 }, { "epoch": 0.799567895827859, "grad_norm": 0.78420652397381, "learning_rate": 1.113544201135442e-06, "loss": 0.747, "step": 27386 }, { "epoch": 0.7995970920557064, "grad_norm": 0.7129438676111158, "learning_rate": 1.11338199513382e-06, "loss": 0.5642, "step": 27387 }, { "epoch": 0.7996262882835538, "grad_norm": 0.7258824160466824, "learning_rate": 1.113219789132198e-06, "loss": 0.6207, "step": 27388 }, { "epoch": 0.7996554845114011, "grad_norm": 0.7008083470851488, "learning_rate": 1.113057583130576e-06, "loss": 0.615, "step": 27389 }, { "epoch": 0.7996846807392485, "grad_norm": 0.7216337390206978, "learning_rate": 1.1128953771289538e-06, "loss": 0.6342, "step": 27390 }, { "epoch": 0.7997138769670958, "grad_norm": 0.7075544977179204, "learning_rate": 1.1127331711273318e-06, "loss": 0.6109, "step": 27391 }, { "epoch": 0.7997430731949432, "grad_norm": 0.672332067298668, "learning_rate": 1.1125709651257098e-06, "loss": 0.5458, "step": 27392 }, { "epoch": 0.7997722694227906, "grad_norm": 0.7154307025530334, "learning_rate": 1.1124087591240876e-06, "loss": 0.6196, "step": 27393 }, { "epoch": 0.7998014656506379, "grad_norm": 0.7494192804448616, "learning_rate": 1.1122465531224656e-06, "loss": 0.6198, "step": 27394 }, { "epoch": 0.7998306618784853, "grad_norm": 0.7384773308641887, "learning_rate": 1.1120843471208436e-06, "loss": 0.6191, "step": 27395 }, { "epoch": 0.7998598581063326, "grad_norm": 0.6897515291236529, "learning_rate": 1.1119221411192214e-06, "loss": 0.6044, "step": 27396 }, { "epoch": 0.79988905433418, "grad_norm": 0.7118201634634067, "learning_rate": 1.1117599351175994e-06, "loss": 0.6737, "step": 27397 }, { "epoch": 0.7999182505620274, "grad_norm": 0.7308100192841787, "learning_rate": 1.1115977291159774e-06, "loss": 0.6749, "step": 27398 }, { "epoch": 0.7999474467898747, "grad_norm": 0.7470009795131695, "learning_rate": 1.1114355231143552e-06, "loss": 0.6643, "step": 27399 }, { "epoch": 0.7999766430177221, "grad_norm": 0.7134228928793064, "learning_rate": 1.1112733171127332e-06, "loss": 0.5642, "step": 27400 }, { "epoch": 0.8000058392455695, "grad_norm": 0.7067420730823896, "learning_rate": 1.111111111111111e-06, "loss": 0.6648, "step": 27401 }, { "epoch": 0.8000350354734168, "grad_norm": 0.7222293831913931, "learning_rate": 1.1109489051094893e-06, "loss": 0.6325, "step": 27402 }, { "epoch": 0.8000642317012642, "grad_norm": 0.7424979669552814, "learning_rate": 1.110786699107867e-06, "loss": 0.622, "step": 27403 }, { "epoch": 0.8000934279291115, "grad_norm": 0.742990310140679, "learning_rate": 1.110624493106245e-06, "loss": 0.629, "step": 27404 }, { "epoch": 0.8001226241569589, "grad_norm": 0.6668362691418138, "learning_rate": 1.1104622871046229e-06, "loss": 0.5107, "step": 27405 }, { "epoch": 0.8001518203848063, "grad_norm": 0.7114850965644663, "learning_rate": 1.110300081103001e-06, "loss": 0.629, "step": 27406 }, { "epoch": 0.8001810166126536, "grad_norm": 0.7651293684989351, "learning_rate": 1.1101378751013789e-06, "loss": 0.6655, "step": 27407 }, { "epoch": 0.800210212840501, "grad_norm": 0.7114744571496397, "learning_rate": 1.1099756690997569e-06, "loss": 0.6086, "step": 27408 }, { "epoch": 0.8002394090683483, "grad_norm": 0.746817917714022, "learning_rate": 1.1098134630981347e-06, "loss": 0.6186, "step": 27409 }, { "epoch": 0.8002686052961957, "grad_norm": 0.70888560388579, "learning_rate": 1.1096512570965127e-06, "loss": 0.6195, "step": 27410 }, { "epoch": 0.8002978015240431, "grad_norm": 0.6852026489585448, "learning_rate": 1.1094890510948907e-06, "loss": 0.5715, "step": 27411 }, { "epoch": 0.8003269977518904, "grad_norm": 0.7329766691997524, "learning_rate": 1.1093268450932685e-06, "loss": 0.6175, "step": 27412 }, { "epoch": 0.8003561939797378, "grad_norm": 0.7363720711317507, "learning_rate": 1.1091646390916465e-06, "loss": 0.6268, "step": 27413 }, { "epoch": 0.8003853902075851, "grad_norm": 0.8183825642090771, "learning_rate": 1.1090024330900245e-06, "loss": 0.6737, "step": 27414 }, { "epoch": 0.8004145864354325, "grad_norm": 0.7539925567478606, "learning_rate": 1.1088402270884023e-06, "loss": 0.7042, "step": 27415 }, { "epoch": 0.8004437826632799, "grad_norm": 0.746713330064286, "learning_rate": 1.1086780210867803e-06, "loss": 0.6887, "step": 27416 }, { "epoch": 0.8004729788911272, "grad_norm": 0.83040499752822, "learning_rate": 1.1085158150851583e-06, "loss": 0.7965, "step": 27417 }, { "epoch": 0.8005021751189746, "grad_norm": 0.7328896420912646, "learning_rate": 1.108353609083536e-06, "loss": 0.6105, "step": 27418 }, { "epoch": 0.800531371346822, "grad_norm": 0.7071236935366696, "learning_rate": 1.108191403081914e-06, "loss": 0.5582, "step": 27419 }, { "epoch": 0.8005605675746693, "grad_norm": 0.6948744480508647, "learning_rate": 1.108029197080292e-06, "loss": 0.6088, "step": 27420 }, { "epoch": 0.8005897638025167, "grad_norm": 0.7627851997340879, "learning_rate": 1.1078669910786701e-06, "loss": 0.6569, "step": 27421 }, { "epoch": 0.800618960030364, "grad_norm": 0.6772663167830204, "learning_rate": 1.107704785077048e-06, "loss": 0.5647, "step": 27422 }, { "epoch": 0.8006481562582114, "grad_norm": 0.6535891693414296, "learning_rate": 1.107542579075426e-06, "loss": 0.5496, "step": 27423 }, { "epoch": 0.8006773524860588, "grad_norm": 0.7457948061095935, "learning_rate": 1.1073803730738037e-06, "loss": 0.5595, "step": 27424 }, { "epoch": 0.8007065487139061, "grad_norm": 0.6894777440559046, "learning_rate": 1.107218167072182e-06, "loss": 0.5603, "step": 27425 }, { "epoch": 0.8007357449417535, "grad_norm": 0.645678620867065, "learning_rate": 1.1070559610705597e-06, "loss": 0.5237, "step": 27426 }, { "epoch": 0.8007649411696008, "grad_norm": 0.6592215569442782, "learning_rate": 1.1068937550689377e-06, "loss": 0.557, "step": 27427 }, { "epoch": 0.8007941373974482, "grad_norm": 0.7418525749189386, "learning_rate": 1.1067315490673155e-06, "loss": 0.6257, "step": 27428 }, { "epoch": 0.8008233336252956, "grad_norm": 0.7285458996820949, "learning_rate": 1.1065693430656935e-06, "loss": 0.6221, "step": 27429 }, { "epoch": 0.8008525298531429, "grad_norm": 0.7595276979200299, "learning_rate": 1.1064071370640715e-06, "loss": 0.7296, "step": 27430 }, { "epoch": 0.8008817260809903, "grad_norm": 0.7508062364076635, "learning_rate": 1.1062449310624493e-06, "loss": 0.6828, "step": 27431 }, { "epoch": 0.8009109223088376, "grad_norm": 0.7606767602709825, "learning_rate": 1.1060827250608273e-06, "loss": 0.5963, "step": 27432 }, { "epoch": 0.800940118536685, "grad_norm": 0.7897465595478823, "learning_rate": 1.1059205190592053e-06, "loss": 0.7438, "step": 27433 }, { "epoch": 0.8009693147645324, "grad_norm": 0.8485214022526305, "learning_rate": 1.1057583130575831e-06, "loss": 0.6133, "step": 27434 }, { "epoch": 0.8009985109923797, "grad_norm": 0.7104281758987656, "learning_rate": 1.1055961070559612e-06, "loss": 0.62, "step": 27435 }, { "epoch": 0.8010277072202271, "grad_norm": 0.6769039137512983, "learning_rate": 1.1054339010543392e-06, "loss": 0.6018, "step": 27436 }, { "epoch": 0.8010569034480746, "grad_norm": 0.6895594510185755, "learning_rate": 1.105271695052717e-06, "loss": 0.5746, "step": 27437 }, { "epoch": 0.8010860996759219, "grad_norm": 0.7394762566356309, "learning_rate": 1.105109489051095e-06, "loss": 0.6394, "step": 27438 }, { "epoch": 0.8011152959037693, "grad_norm": 0.7767711249720342, "learning_rate": 1.1049472830494728e-06, "loss": 0.6487, "step": 27439 }, { "epoch": 0.8011444921316166, "grad_norm": 0.7400179527094111, "learning_rate": 1.104785077047851e-06, "loss": 0.6898, "step": 27440 }, { "epoch": 0.801173688359464, "grad_norm": 0.7498581520732267, "learning_rate": 1.1046228710462288e-06, "loss": 0.6915, "step": 27441 }, { "epoch": 0.8012028845873114, "grad_norm": 0.7267474866183116, "learning_rate": 1.1044606650446068e-06, "loss": 0.6564, "step": 27442 }, { "epoch": 0.8012320808151587, "grad_norm": 0.642402499976296, "learning_rate": 1.1042984590429846e-06, "loss": 0.5492, "step": 27443 }, { "epoch": 0.8012612770430061, "grad_norm": 0.7793907203412886, "learning_rate": 1.1041362530413626e-06, "loss": 0.6778, "step": 27444 }, { "epoch": 0.8012904732708535, "grad_norm": 0.6822417150713241, "learning_rate": 1.1039740470397406e-06, "loss": 0.6071, "step": 27445 }, { "epoch": 0.8013196694987008, "grad_norm": 0.7392861846158777, "learning_rate": 1.1038118410381186e-06, "loss": 0.7055, "step": 27446 }, { "epoch": 0.8013488657265482, "grad_norm": 0.7001727423198804, "learning_rate": 1.1036496350364964e-06, "loss": 0.575, "step": 27447 }, { "epoch": 0.8013780619543955, "grad_norm": 0.6513055980199739, "learning_rate": 1.1034874290348744e-06, "loss": 0.5124, "step": 27448 }, { "epoch": 0.8014072581822429, "grad_norm": 0.7510248353983588, "learning_rate": 1.1033252230332524e-06, "loss": 0.66, "step": 27449 }, { "epoch": 0.8014364544100903, "grad_norm": 0.7147941898287509, "learning_rate": 1.1031630170316302e-06, "loss": 0.5641, "step": 27450 }, { "epoch": 0.8014656506379376, "grad_norm": 0.8264552776666753, "learning_rate": 1.1030008110300082e-06, "loss": 0.7147, "step": 27451 }, { "epoch": 0.801494846865785, "grad_norm": 0.7598504286536315, "learning_rate": 1.1028386050283862e-06, "loss": 0.6517, "step": 27452 }, { "epoch": 0.8015240430936323, "grad_norm": 0.6680177174359053, "learning_rate": 1.102676399026764e-06, "loss": 0.5684, "step": 27453 }, { "epoch": 0.8015532393214797, "grad_norm": 0.6651464598477822, "learning_rate": 1.102514193025142e-06, "loss": 0.53, "step": 27454 }, { "epoch": 0.8015824355493271, "grad_norm": 0.6738162677433777, "learning_rate": 1.10235198702352e-06, "loss": 0.5585, "step": 27455 }, { "epoch": 0.8016116317771744, "grad_norm": 0.6808058548559524, "learning_rate": 1.1021897810218978e-06, "loss": 0.5879, "step": 27456 }, { "epoch": 0.8016408280050218, "grad_norm": 0.7212109760402181, "learning_rate": 1.1020275750202758e-06, "loss": 0.6521, "step": 27457 }, { "epoch": 0.8016700242328691, "grad_norm": 0.6758895796716864, "learning_rate": 1.1018653690186538e-06, "loss": 0.5681, "step": 27458 }, { "epoch": 0.8016992204607165, "grad_norm": 0.7275506590346726, "learning_rate": 1.1017031630170318e-06, "loss": 0.6526, "step": 27459 }, { "epoch": 0.8017284166885639, "grad_norm": 0.7668735501503743, "learning_rate": 1.1015409570154096e-06, "loss": 0.7164, "step": 27460 }, { "epoch": 0.8017576129164112, "grad_norm": 0.7707203547034568, "learning_rate": 1.1013787510137876e-06, "loss": 0.6965, "step": 27461 }, { "epoch": 0.8017868091442586, "grad_norm": 0.6794332977783821, "learning_rate": 1.1012165450121654e-06, "loss": 0.5676, "step": 27462 }, { "epoch": 0.801816005372106, "grad_norm": 0.6908025548077369, "learning_rate": 1.1010543390105434e-06, "loss": 0.6049, "step": 27463 }, { "epoch": 0.8018452015999533, "grad_norm": 0.7055016962726827, "learning_rate": 1.1008921330089214e-06, "loss": 0.5977, "step": 27464 }, { "epoch": 0.8018743978278007, "grad_norm": 0.6960824810938986, "learning_rate": 1.1007299270072994e-06, "loss": 0.6299, "step": 27465 }, { "epoch": 0.801903594055648, "grad_norm": 0.7633656157621919, "learning_rate": 1.1005677210056772e-06, "loss": 0.6618, "step": 27466 }, { "epoch": 0.8019327902834954, "grad_norm": 0.7136215190631611, "learning_rate": 1.1004055150040553e-06, "loss": 0.5787, "step": 27467 }, { "epoch": 0.8019619865113428, "grad_norm": 0.9294178964625684, "learning_rate": 1.1002433090024333e-06, "loss": 0.6709, "step": 27468 }, { "epoch": 0.8019911827391901, "grad_norm": 0.8127097350088985, "learning_rate": 1.100081103000811e-06, "loss": 0.705, "step": 27469 }, { "epoch": 0.8020203789670375, "grad_norm": 0.7597628187777999, "learning_rate": 1.099918896999189e-06, "loss": 0.6925, "step": 27470 }, { "epoch": 0.8020495751948848, "grad_norm": 0.7793604723052326, "learning_rate": 1.099756690997567e-06, "loss": 0.7431, "step": 27471 }, { "epoch": 0.8020787714227322, "grad_norm": 0.7769155677722266, "learning_rate": 1.0995944849959449e-06, "loss": 0.7056, "step": 27472 }, { "epoch": 0.8021079676505796, "grad_norm": 0.6958426151141763, "learning_rate": 1.0994322789943229e-06, "loss": 0.5866, "step": 27473 }, { "epoch": 0.8021371638784269, "grad_norm": 0.7204088098323242, "learning_rate": 1.0992700729927009e-06, "loss": 0.6274, "step": 27474 }, { "epoch": 0.8021663601062743, "grad_norm": 0.7198961711592211, "learning_rate": 1.0991078669910787e-06, "loss": 0.5807, "step": 27475 }, { "epoch": 0.8021955563341217, "grad_norm": 0.7188034672998412, "learning_rate": 1.0989456609894567e-06, "loss": 0.6094, "step": 27476 }, { "epoch": 0.802224752561969, "grad_norm": 0.6962746633333523, "learning_rate": 1.0987834549878347e-06, "loss": 0.6119, "step": 27477 }, { "epoch": 0.8022539487898164, "grad_norm": 0.6824988389683101, "learning_rate": 1.0986212489862127e-06, "loss": 0.567, "step": 27478 }, { "epoch": 0.8022831450176637, "grad_norm": 0.7187741887634111, "learning_rate": 1.0984590429845905e-06, "loss": 0.6202, "step": 27479 }, { "epoch": 0.8023123412455111, "grad_norm": 0.7004534992382596, "learning_rate": 1.0982968369829685e-06, "loss": 0.6187, "step": 27480 }, { "epoch": 0.8023415374733585, "grad_norm": 0.7206446965566958, "learning_rate": 1.0981346309813463e-06, "loss": 0.6135, "step": 27481 }, { "epoch": 0.8023707337012058, "grad_norm": 0.7805756816411356, "learning_rate": 1.0979724249797243e-06, "loss": 0.7216, "step": 27482 }, { "epoch": 0.8023999299290532, "grad_norm": 0.6624654223983435, "learning_rate": 1.0978102189781023e-06, "loss": 0.5956, "step": 27483 }, { "epoch": 0.8024291261569005, "grad_norm": 0.6811808443577149, "learning_rate": 1.0976480129764803e-06, "loss": 0.5999, "step": 27484 }, { "epoch": 0.8024583223847479, "grad_norm": 0.7269623552649839, "learning_rate": 1.097485806974858e-06, "loss": 0.6625, "step": 27485 }, { "epoch": 0.8024875186125953, "grad_norm": 0.6968823958212472, "learning_rate": 1.0973236009732361e-06, "loss": 0.5522, "step": 27486 }, { "epoch": 0.8025167148404426, "grad_norm": 0.7455294619108259, "learning_rate": 1.0971613949716141e-06, "loss": 0.6716, "step": 27487 }, { "epoch": 0.80254591106829, "grad_norm": 0.7322143271615656, "learning_rate": 1.096999188969992e-06, "loss": 0.6808, "step": 27488 }, { "epoch": 0.8025751072961373, "grad_norm": 1.0024540055544156, "learning_rate": 1.09683698296837e-06, "loss": 0.8035, "step": 27489 }, { "epoch": 0.8026043035239847, "grad_norm": 0.7466935288292351, "learning_rate": 1.0966747769667477e-06, "loss": 0.6629, "step": 27490 }, { "epoch": 0.8026334997518321, "grad_norm": 0.7255233116578327, "learning_rate": 1.096512570965126e-06, "loss": 0.6464, "step": 27491 }, { "epoch": 0.8026626959796794, "grad_norm": 0.6823850785302709, "learning_rate": 1.0963503649635037e-06, "loss": 0.5931, "step": 27492 }, { "epoch": 0.8026918922075268, "grad_norm": 0.663182229732164, "learning_rate": 1.0961881589618817e-06, "loss": 0.5524, "step": 27493 }, { "epoch": 0.8027210884353742, "grad_norm": 0.6858286910593245, "learning_rate": 1.0960259529602595e-06, "loss": 0.5933, "step": 27494 }, { "epoch": 0.8027502846632215, "grad_norm": 0.7795780797273426, "learning_rate": 1.0958637469586375e-06, "loss": 0.6631, "step": 27495 }, { "epoch": 0.8027794808910689, "grad_norm": 0.848985689676326, "learning_rate": 1.0957015409570155e-06, "loss": 0.7023, "step": 27496 }, { "epoch": 0.8028086771189162, "grad_norm": 0.743925202596541, "learning_rate": 1.0955393349553935e-06, "loss": 0.6661, "step": 27497 }, { "epoch": 0.8028378733467636, "grad_norm": 0.7537073235205108, "learning_rate": 1.0953771289537713e-06, "loss": 0.7143, "step": 27498 }, { "epoch": 0.802867069574611, "grad_norm": 0.6833444304128525, "learning_rate": 1.0952149229521494e-06, "loss": 0.5694, "step": 27499 }, { "epoch": 0.8028962658024583, "grad_norm": 0.7076102416334733, "learning_rate": 1.0950527169505271e-06, "loss": 0.611, "step": 27500 }, { "epoch": 0.8029254620303057, "grad_norm": 0.7375378268996994, "learning_rate": 1.0948905109489052e-06, "loss": 0.6052, "step": 27501 }, { "epoch": 0.802954658258153, "grad_norm": 0.7095727827302438, "learning_rate": 1.0947283049472832e-06, "loss": 0.6408, "step": 27502 }, { "epoch": 0.8029838544860004, "grad_norm": 0.7429374592737323, "learning_rate": 1.0945660989456612e-06, "loss": 0.6217, "step": 27503 }, { "epoch": 0.8030130507138478, "grad_norm": 0.7585088308815022, "learning_rate": 1.094403892944039e-06, "loss": 0.6699, "step": 27504 }, { "epoch": 0.8030422469416951, "grad_norm": 0.7122132021951504, "learning_rate": 1.094241686942417e-06, "loss": 0.5831, "step": 27505 }, { "epoch": 0.8030714431695425, "grad_norm": 0.7135231777591816, "learning_rate": 1.094079480940795e-06, "loss": 0.604, "step": 27506 }, { "epoch": 0.8031006393973898, "grad_norm": 0.7025199466584269, "learning_rate": 1.0939172749391728e-06, "loss": 0.6666, "step": 27507 }, { "epoch": 0.8031298356252372, "grad_norm": 0.7460601684700053, "learning_rate": 1.0937550689375508e-06, "loss": 0.6114, "step": 27508 }, { "epoch": 0.8031590318530846, "grad_norm": 0.7121034905087089, "learning_rate": 1.0935928629359286e-06, "loss": 0.6549, "step": 27509 }, { "epoch": 0.8031882280809319, "grad_norm": 0.7683933945240867, "learning_rate": 1.0934306569343068e-06, "loss": 0.6492, "step": 27510 }, { "epoch": 0.8032174243087793, "grad_norm": 0.7311098637761477, "learning_rate": 1.0932684509326846e-06, "loss": 0.6156, "step": 27511 }, { "epoch": 0.8032466205366267, "grad_norm": 0.8123245440445453, "learning_rate": 1.0931062449310626e-06, "loss": 0.7123, "step": 27512 }, { "epoch": 0.803275816764474, "grad_norm": 0.7412589044018256, "learning_rate": 1.0929440389294404e-06, "loss": 0.6742, "step": 27513 }, { "epoch": 0.8033050129923214, "grad_norm": 0.7082836889126685, "learning_rate": 1.0927818329278184e-06, "loss": 0.6832, "step": 27514 }, { "epoch": 0.8033342092201687, "grad_norm": 0.7208213100266807, "learning_rate": 1.0926196269261964e-06, "loss": 0.6698, "step": 27515 }, { "epoch": 0.8033634054480161, "grad_norm": 0.7440335611125977, "learning_rate": 1.0924574209245744e-06, "loss": 0.6637, "step": 27516 }, { "epoch": 0.8033926016758635, "grad_norm": 0.743770750424854, "learning_rate": 1.0922952149229522e-06, "loss": 0.7341, "step": 27517 }, { "epoch": 0.8034217979037108, "grad_norm": 0.7349631642876479, "learning_rate": 1.0921330089213302e-06, "loss": 0.6538, "step": 27518 }, { "epoch": 0.8034509941315582, "grad_norm": 0.7574742031171399, "learning_rate": 1.091970802919708e-06, "loss": 0.7184, "step": 27519 }, { "epoch": 0.8034801903594055, "grad_norm": 0.6988317184643339, "learning_rate": 1.091808596918086e-06, "loss": 0.6313, "step": 27520 }, { "epoch": 0.8035093865872529, "grad_norm": 0.734252235043925, "learning_rate": 1.091646390916464e-06, "loss": 0.6698, "step": 27521 }, { "epoch": 0.8035385828151003, "grad_norm": 0.6976205910053737, "learning_rate": 1.091484184914842e-06, "loss": 0.5992, "step": 27522 }, { "epoch": 0.8035677790429476, "grad_norm": 0.7031337551804604, "learning_rate": 1.0913219789132198e-06, "loss": 0.6536, "step": 27523 }, { "epoch": 0.803596975270795, "grad_norm": 0.7322035963963631, "learning_rate": 1.0911597729115978e-06, "loss": 0.6479, "step": 27524 }, { "epoch": 0.8036261714986423, "grad_norm": 0.7527164980312286, "learning_rate": 1.0909975669099758e-06, "loss": 0.6683, "step": 27525 }, { "epoch": 0.8036553677264897, "grad_norm": 0.6866523124827701, "learning_rate": 1.0908353609083536e-06, "loss": 0.6164, "step": 27526 }, { "epoch": 0.8036845639543371, "grad_norm": 0.7272434957822664, "learning_rate": 1.0906731549067316e-06, "loss": 0.6841, "step": 27527 }, { "epoch": 0.8037137601821844, "grad_norm": 0.7552770408721063, "learning_rate": 1.0905109489051094e-06, "loss": 0.6637, "step": 27528 }, { "epoch": 0.8037429564100318, "grad_norm": 0.7231147257332103, "learning_rate": 1.0903487429034876e-06, "loss": 0.6069, "step": 27529 }, { "epoch": 0.8037721526378792, "grad_norm": 0.6770901012300787, "learning_rate": 1.0901865369018654e-06, "loss": 0.5789, "step": 27530 }, { "epoch": 0.8038013488657265, "grad_norm": 0.7549537297799209, "learning_rate": 1.0900243309002435e-06, "loss": 0.6982, "step": 27531 }, { "epoch": 0.8038305450935739, "grad_norm": 0.7397410753597796, "learning_rate": 1.0898621248986212e-06, "loss": 0.6577, "step": 27532 }, { "epoch": 0.8038597413214212, "grad_norm": 0.7027023941362792, "learning_rate": 1.0896999188969993e-06, "loss": 0.5782, "step": 27533 }, { "epoch": 0.8038889375492686, "grad_norm": 0.7304705146731666, "learning_rate": 1.0895377128953773e-06, "loss": 0.6567, "step": 27534 }, { "epoch": 0.803918133777116, "grad_norm": 0.7734084076576921, "learning_rate": 1.0893755068937553e-06, "loss": 0.7761, "step": 27535 }, { "epoch": 0.8039473300049633, "grad_norm": 0.7377257564878074, "learning_rate": 1.089213300892133e-06, "loss": 0.6803, "step": 27536 }, { "epoch": 0.8039765262328107, "grad_norm": 0.6891571409021545, "learning_rate": 1.089051094890511e-06, "loss": 0.5608, "step": 27537 }, { "epoch": 0.804005722460658, "grad_norm": 0.675510828910182, "learning_rate": 1.0888888888888889e-06, "loss": 0.595, "step": 27538 }, { "epoch": 0.8040349186885054, "grad_norm": 0.7019474334236963, "learning_rate": 1.0887266828872669e-06, "loss": 0.5927, "step": 27539 }, { "epoch": 0.8040641149163528, "grad_norm": 0.7545730912602211, "learning_rate": 1.0885644768856449e-06, "loss": 0.7421, "step": 27540 }, { "epoch": 0.8040933111442001, "grad_norm": 0.7895113273878678, "learning_rate": 1.0884022708840229e-06, "loss": 0.6613, "step": 27541 }, { "epoch": 0.8041225073720475, "grad_norm": 0.7617551356496567, "learning_rate": 1.0882400648824007e-06, "loss": 0.6733, "step": 27542 }, { "epoch": 0.8041517035998949, "grad_norm": 0.7348482319305982, "learning_rate": 1.0880778588807787e-06, "loss": 0.6223, "step": 27543 }, { "epoch": 0.8041808998277422, "grad_norm": 0.6945766934053096, "learning_rate": 1.0879156528791567e-06, "loss": 0.6199, "step": 27544 }, { "epoch": 0.8042100960555896, "grad_norm": 0.7509561942526354, "learning_rate": 1.0877534468775345e-06, "loss": 0.6493, "step": 27545 }, { "epoch": 0.8042392922834369, "grad_norm": 0.7101519994913968, "learning_rate": 1.0875912408759125e-06, "loss": 0.6587, "step": 27546 }, { "epoch": 0.8042684885112843, "grad_norm": 0.7268275299096462, "learning_rate": 1.0874290348742903e-06, "loss": 0.5897, "step": 27547 }, { "epoch": 0.8042976847391317, "grad_norm": 0.7422823921780622, "learning_rate": 1.0872668288726685e-06, "loss": 0.6342, "step": 27548 }, { "epoch": 0.804326880966979, "grad_norm": 0.757166761841529, "learning_rate": 1.0871046228710463e-06, "loss": 0.6946, "step": 27549 }, { "epoch": 0.8043560771948264, "grad_norm": 0.7644697191460067, "learning_rate": 1.0869424168694243e-06, "loss": 0.6765, "step": 27550 }, { "epoch": 0.8043852734226737, "grad_norm": 0.7277632228004334, "learning_rate": 1.086780210867802e-06, "loss": 0.6368, "step": 27551 }, { "epoch": 0.8044144696505211, "grad_norm": 0.7311299631861505, "learning_rate": 1.0866180048661801e-06, "loss": 0.6789, "step": 27552 }, { "epoch": 0.8044436658783685, "grad_norm": 0.7051939570792923, "learning_rate": 1.0864557988645581e-06, "loss": 0.611, "step": 27553 }, { "epoch": 0.8044728621062158, "grad_norm": 0.6546655914585499, "learning_rate": 1.0862935928629361e-06, "loss": 0.5863, "step": 27554 }, { "epoch": 0.8045020583340632, "grad_norm": 0.7306966778889153, "learning_rate": 1.086131386861314e-06, "loss": 0.6259, "step": 27555 }, { "epoch": 0.8045312545619105, "grad_norm": 0.7707362415106884, "learning_rate": 1.085969180859692e-06, "loss": 0.6831, "step": 27556 }, { "epoch": 0.8045604507897579, "grad_norm": 0.8107088009489709, "learning_rate": 1.08580697485807e-06, "loss": 0.7201, "step": 27557 }, { "epoch": 0.8045896470176054, "grad_norm": 0.7116580162545456, "learning_rate": 1.0856447688564477e-06, "loss": 0.6358, "step": 27558 }, { "epoch": 0.8046188432454527, "grad_norm": 0.7593967234148703, "learning_rate": 1.0854825628548257e-06, "loss": 0.6685, "step": 27559 }, { "epoch": 0.8046480394733001, "grad_norm": 0.7257281920357938, "learning_rate": 1.0853203568532037e-06, "loss": 0.632, "step": 27560 }, { "epoch": 0.8046772357011475, "grad_norm": 0.7642021815994156, "learning_rate": 1.0851581508515815e-06, "loss": 0.666, "step": 27561 }, { "epoch": 0.8047064319289948, "grad_norm": 0.7365960841065498, "learning_rate": 1.0849959448499595e-06, "loss": 0.673, "step": 27562 }, { "epoch": 0.8047356281568422, "grad_norm": 0.7114955414982224, "learning_rate": 1.0848337388483376e-06, "loss": 0.6092, "step": 27563 }, { "epoch": 0.8047648243846895, "grad_norm": 0.7265640151202742, "learning_rate": 1.0846715328467153e-06, "loss": 0.6343, "step": 27564 }, { "epoch": 0.8047940206125369, "grad_norm": 0.6839177671346692, "learning_rate": 1.0845093268450934e-06, "loss": 0.5975, "step": 27565 }, { "epoch": 0.8048232168403843, "grad_norm": 0.7268049564513099, "learning_rate": 1.0843471208434711e-06, "loss": 0.6327, "step": 27566 }, { "epoch": 0.8048524130682316, "grad_norm": 0.7415493370906971, "learning_rate": 1.0841849148418494e-06, "loss": 0.6432, "step": 27567 }, { "epoch": 0.804881609296079, "grad_norm": 0.6940755957891128, "learning_rate": 1.0840227088402272e-06, "loss": 0.6169, "step": 27568 }, { "epoch": 0.8049108055239264, "grad_norm": 0.7532902706974277, "learning_rate": 1.0838605028386052e-06, "loss": 0.6636, "step": 27569 }, { "epoch": 0.8049400017517737, "grad_norm": 0.7073717900753331, "learning_rate": 1.083698296836983e-06, "loss": 0.6082, "step": 27570 }, { "epoch": 0.8049691979796211, "grad_norm": 0.7375677541861155, "learning_rate": 1.083536090835361e-06, "loss": 0.6513, "step": 27571 }, { "epoch": 0.8049983942074684, "grad_norm": 0.7150528249471038, "learning_rate": 1.083373884833739e-06, "loss": 0.6366, "step": 27572 }, { "epoch": 0.8050275904353158, "grad_norm": 0.716161645734034, "learning_rate": 1.083211678832117e-06, "loss": 0.5975, "step": 27573 }, { "epoch": 0.8050567866631632, "grad_norm": 0.6843149744300034, "learning_rate": 1.0830494728304948e-06, "loss": 0.5739, "step": 27574 }, { "epoch": 0.8050859828910105, "grad_norm": 0.7242367421800953, "learning_rate": 1.0828872668288728e-06, "loss": 0.6462, "step": 27575 }, { "epoch": 0.8051151791188579, "grad_norm": 0.7512814248462496, "learning_rate": 1.0827250608272508e-06, "loss": 0.7062, "step": 27576 }, { "epoch": 0.8051443753467052, "grad_norm": 0.7404438696620089, "learning_rate": 1.0825628548256286e-06, "loss": 0.6399, "step": 27577 }, { "epoch": 0.8051735715745526, "grad_norm": 0.7449097161545617, "learning_rate": 1.0824006488240066e-06, "loss": 0.6769, "step": 27578 }, { "epoch": 0.8052027678024, "grad_norm": 0.6921151647465824, "learning_rate": 1.0822384428223844e-06, "loss": 0.5923, "step": 27579 }, { "epoch": 0.8052319640302473, "grad_norm": 0.7502502161544812, "learning_rate": 1.0820762368207624e-06, "loss": 0.7056, "step": 27580 }, { "epoch": 0.8052611602580947, "grad_norm": 0.8561801855369685, "learning_rate": 1.0819140308191404e-06, "loss": 0.8026, "step": 27581 }, { "epoch": 0.805290356485942, "grad_norm": 0.7217458708168015, "learning_rate": 1.0817518248175184e-06, "loss": 0.6354, "step": 27582 }, { "epoch": 0.8053195527137894, "grad_norm": 0.7139117347718009, "learning_rate": 1.0815896188158962e-06, "loss": 0.658, "step": 27583 }, { "epoch": 0.8053487489416368, "grad_norm": 0.6883331148564813, "learning_rate": 1.0814274128142742e-06, "loss": 0.6371, "step": 27584 }, { "epoch": 0.8053779451694841, "grad_norm": 0.9322144189390198, "learning_rate": 1.081265206812652e-06, "loss": 0.624, "step": 27585 }, { "epoch": 0.8054071413973315, "grad_norm": 0.6615084960962099, "learning_rate": 1.0811030008110302e-06, "loss": 0.5547, "step": 27586 }, { "epoch": 0.8054363376251789, "grad_norm": 0.7199660928358395, "learning_rate": 1.080940794809408e-06, "loss": 0.6059, "step": 27587 }, { "epoch": 0.8054655338530262, "grad_norm": 0.7754880177868768, "learning_rate": 1.080778588807786e-06, "loss": 0.6543, "step": 27588 }, { "epoch": 0.8054947300808736, "grad_norm": 0.7129536637810258, "learning_rate": 1.0806163828061638e-06, "loss": 0.6339, "step": 27589 }, { "epoch": 0.8055239263087209, "grad_norm": 0.6946730075350387, "learning_rate": 1.0804541768045418e-06, "loss": 0.5709, "step": 27590 }, { "epoch": 0.8055531225365683, "grad_norm": 0.7100283094920161, "learning_rate": 1.0802919708029198e-06, "loss": 0.6264, "step": 27591 }, { "epoch": 0.8055823187644157, "grad_norm": 0.7227807548248641, "learning_rate": 1.0801297648012978e-06, "loss": 0.6633, "step": 27592 }, { "epoch": 0.805611514992263, "grad_norm": 0.7134164201755065, "learning_rate": 1.0799675587996756e-06, "loss": 0.6579, "step": 27593 }, { "epoch": 0.8056407112201104, "grad_norm": 0.7020180491813917, "learning_rate": 1.0798053527980536e-06, "loss": 0.563, "step": 27594 }, { "epoch": 0.8056699074479577, "grad_norm": 0.6896208888413959, "learning_rate": 1.0796431467964317e-06, "loss": 0.5894, "step": 27595 }, { "epoch": 0.8056991036758051, "grad_norm": 0.7657569493870053, "learning_rate": 1.0794809407948094e-06, "loss": 0.7054, "step": 27596 }, { "epoch": 0.8057282999036525, "grad_norm": 0.7089913111130245, "learning_rate": 1.0793187347931875e-06, "loss": 0.6147, "step": 27597 }, { "epoch": 0.8057574961314998, "grad_norm": 0.6907001199553241, "learning_rate": 1.0791565287915652e-06, "loss": 0.6081, "step": 27598 }, { "epoch": 0.8057866923593472, "grad_norm": 0.7508769581566245, "learning_rate": 1.0789943227899433e-06, "loss": 0.6386, "step": 27599 }, { "epoch": 0.8058158885871946, "grad_norm": 0.6811103579861248, "learning_rate": 1.0788321167883213e-06, "loss": 0.5898, "step": 27600 }, { "epoch": 0.8058450848150419, "grad_norm": 0.7099578176572718, "learning_rate": 1.0786699107866993e-06, "loss": 0.5905, "step": 27601 }, { "epoch": 0.8058742810428893, "grad_norm": 0.7181945838565071, "learning_rate": 1.078507704785077e-06, "loss": 0.5835, "step": 27602 }, { "epoch": 0.8059034772707366, "grad_norm": 0.7326284515859475, "learning_rate": 1.078345498783455e-06, "loss": 0.707, "step": 27603 }, { "epoch": 0.805932673498584, "grad_norm": 0.6960035475817729, "learning_rate": 1.0781832927818329e-06, "loss": 0.6088, "step": 27604 }, { "epoch": 0.8059618697264314, "grad_norm": 0.6343940372303499, "learning_rate": 1.078021086780211e-06, "loss": 0.5092, "step": 27605 }, { "epoch": 0.8059910659542787, "grad_norm": 0.7775124418695042, "learning_rate": 1.0778588807785889e-06, "loss": 0.7467, "step": 27606 }, { "epoch": 0.8060202621821261, "grad_norm": 0.6969096436693298, "learning_rate": 1.0776966747769669e-06, "loss": 0.634, "step": 27607 }, { "epoch": 0.8060494584099734, "grad_norm": 0.6892958308464768, "learning_rate": 1.0775344687753447e-06, "loss": 0.5885, "step": 27608 }, { "epoch": 0.8060786546378208, "grad_norm": 0.7321043893024716, "learning_rate": 1.0773722627737227e-06, "loss": 0.6628, "step": 27609 }, { "epoch": 0.8061078508656682, "grad_norm": 0.8018920933964659, "learning_rate": 1.0772100567721007e-06, "loss": 0.7394, "step": 27610 }, { "epoch": 0.8061370470935155, "grad_norm": 0.7244288830150082, "learning_rate": 1.0770478507704787e-06, "loss": 0.6552, "step": 27611 }, { "epoch": 0.8061662433213629, "grad_norm": 0.6854049602838364, "learning_rate": 1.0768856447688565e-06, "loss": 0.6016, "step": 27612 }, { "epoch": 0.8061954395492102, "grad_norm": 0.7321131439716386, "learning_rate": 1.0767234387672345e-06, "loss": 0.6619, "step": 27613 }, { "epoch": 0.8062246357770576, "grad_norm": 0.6889241683108738, "learning_rate": 1.0765612327656125e-06, "loss": 0.5888, "step": 27614 }, { "epoch": 0.806253832004905, "grad_norm": 0.7302356813984011, "learning_rate": 1.0763990267639903e-06, "loss": 0.6246, "step": 27615 }, { "epoch": 0.8062830282327523, "grad_norm": 0.6803191024480627, "learning_rate": 1.0762368207623683e-06, "loss": 0.6299, "step": 27616 }, { "epoch": 0.8063122244605997, "grad_norm": 0.7447191043969594, "learning_rate": 1.0760746147607461e-06, "loss": 0.6674, "step": 27617 }, { "epoch": 0.806341420688447, "grad_norm": 0.773659200753665, "learning_rate": 1.0759124087591241e-06, "loss": 0.7443, "step": 27618 }, { "epoch": 0.8063706169162944, "grad_norm": 0.7234351743806313, "learning_rate": 1.0757502027575021e-06, "loss": 0.6461, "step": 27619 }, { "epoch": 0.8063998131441418, "grad_norm": 0.7143583754800457, "learning_rate": 1.0755879967558801e-06, "loss": 0.6366, "step": 27620 }, { "epoch": 0.8064290093719891, "grad_norm": 0.710371434258882, "learning_rate": 1.075425790754258e-06, "loss": 0.6196, "step": 27621 }, { "epoch": 0.8064582055998365, "grad_norm": 0.6583823517023987, "learning_rate": 1.075263584752636e-06, "loss": 0.5647, "step": 27622 }, { "epoch": 0.8064874018276839, "grad_norm": 0.837066526517391, "learning_rate": 1.0751013787510137e-06, "loss": 0.7454, "step": 27623 }, { "epoch": 0.8065165980555312, "grad_norm": 0.7062495347502467, "learning_rate": 1.074939172749392e-06, "loss": 0.5862, "step": 27624 }, { "epoch": 0.8065457942833786, "grad_norm": 0.7007951187297297, "learning_rate": 1.0747769667477697e-06, "loss": 0.5926, "step": 27625 }, { "epoch": 0.8065749905112259, "grad_norm": 0.6989251078236258, "learning_rate": 1.0746147607461477e-06, "loss": 0.6334, "step": 27626 }, { "epoch": 0.8066041867390733, "grad_norm": 0.7024297801494325, "learning_rate": 1.0744525547445255e-06, "loss": 0.6218, "step": 27627 }, { "epoch": 0.8066333829669207, "grad_norm": 0.7460555919432421, "learning_rate": 1.0742903487429035e-06, "loss": 0.6883, "step": 27628 }, { "epoch": 0.806662579194768, "grad_norm": 0.7383631106489943, "learning_rate": 1.0741281427412816e-06, "loss": 0.6577, "step": 27629 }, { "epoch": 0.8066917754226154, "grad_norm": 0.7795210728769285, "learning_rate": 1.0739659367396596e-06, "loss": 0.6996, "step": 27630 }, { "epoch": 0.8067209716504627, "grad_norm": 0.6645763010713193, "learning_rate": 1.0738037307380374e-06, "loss": 0.4989, "step": 27631 }, { "epoch": 0.8067501678783101, "grad_norm": 0.7184950960821778, "learning_rate": 1.0736415247364154e-06, "loss": 0.6208, "step": 27632 }, { "epoch": 0.8067793641061575, "grad_norm": 0.749370640208021, "learning_rate": 1.0734793187347934e-06, "loss": 0.6296, "step": 27633 }, { "epoch": 0.8068085603340048, "grad_norm": 0.7005559889424964, "learning_rate": 1.0733171127331712e-06, "loss": 0.6583, "step": 27634 }, { "epoch": 0.8068377565618522, "grad_norm": 0.7062584488630086, "learning_rate": 1.0731549067315492e-06, "loss": 0.6152, "step": 27635 }, { "epoch": 0.8068669527896996, "grad_norm": 0.7403832137923532, "learning_rate": 1.072992700729927e-06, "loss": 0.6758, "step": 27636 }, { "epoch": 0.8068961490175469, "grad_norm": 0.7192698578713145, "learning_rate": 1.072830494728305e-06, "loss": 0.6276, "step": 27637 }, { "epoch": 0.8069253452453943, "grad_norm": 0.7362984855141019, "learning_rate": 1.072668288726683e-06, "loss": 0.5749, "step": 27638 }, { "epoch": 0.8069545414732416, "grad_norm": 0.7354004243950506, "learning_rate": 1.072506082725061e-06, "loss": 0.6779, "step": 27639 }, { "epoch": 0.806983737701089, "grad_norm": 0.7950383103583603, "learning_rate": 1.0723438767234388e-06, "loss": 0.7148, "step": 27640 }, { "epoch": 0.8070129339289364, "grad_norm": 0.70192859584479, "learning_rate": 1.0721816707218168e-06, "loss": 0.5884, "step": 27641 }, { "epoch": 0.8070421301567837, "grad_norm": 0.6894408703795876, "learning_rate": 1.0720194647201948e-06, "loss": 0.6071, "step": 27642 }, { "epoch": 0.8070713263846311, "grad_norm": 0.7194525939602314, "learning_rate": 1.0718572587185728e-06, "loss": 0.5994, "step": 27643 }, { "epoch": 0.8071005226124784, "grad_norm": 0.6733828188580753, "learning_rate": 1.0716950527169506e-06, "loss": 0.6015, "step": 27644 }, { "epoch": 0.8071297188403258, "grad_norm": 0.6916495258894966, "learning_rate": 1.0715328467153286e-06, "loss": 0.5665, "step": 27645 }, { "epoch": 0.8071589150681732, "grad_norm": 0.7338330984693425, "learning_rate": 1.0713706407137064e-06, "loss": 0.6622, "step": 27646 }, { "epoch": 0.8071881112960205, "grad_norm": 0.7321289211504377, "learning_rate": 1.0712084347120844e-06, "loss": 0.6437, "step": 27647 }, { "epoch": 0.8072173075238679, "grad_norm": 0.6964760904794651, "learning_rate": 1.0710462287104624e-06, "loss": 0.5849, "step": 27648 }, { "epoch": 0.8072465037517152, "grad_norm": 0.7739658510989718, "learning_rate": 1.0708840227088404e-06, "loss": 0.6605, "step": 27649 }, { "epoch": 0.8072756999795626, "grad_norm": 0.7249376143969362, "learning_rate": 1.0707218167072182e-06, "loss": 0.6416, "step": 27650 }, { "epoch": 0.80730489620741, "grad_norm": 0.6877338996645364, "learning_rate": 1.0705596107055962e-06, "loss": 0.555, "step": 27651 }, { "epoch": 0.8073340924352573, "grad_norm": 0.7288599782336967, "learning_rate": 1.0703974047039742e-06, "loss": 0.6424, "step": 27652 }, { "epoch": 0.8073632886631047, "grad_norm": 0.6932392963438304, "learning_rate": 1.070235198702352e-06, "loss": 0.5995, "step": 27653 }, { "epoch": 0.8073924848909521, "grad_norm": 0.7314744838485442, "learning_rate": 1.07007299270073e-06, "loss": 0.6602, "step": 27654 }, { "epoch": 0.8074216811187994, "grad_norm": 0.7490929261295163, "learning_rate": 1.0699107866991078e-06, "loss": 0.6229, "step": 27655 }, { "epoch": 0.8074508773466468, "grad_norm": 0.750220933703607, "learning_rate": 1.0697485806974858e-06, "loss": 0.6668, "step": 27656 }, { "epoch": 0.8074800735744941, "grad_norm": 0.7288893071207273, "learning_rate": 1.0695863746958638e-06, "loss": 0.6724, "step": 27657 }, { "epoch": 0.8075092698023415, "grad_norm": 0.6887045577216653, "learning_rate": 1.0694241686942418e-06, "loss": 0.5657, "step": 27658 }, { "epoch": 0.8075384660301889, "grad_norm": 0.7057030082485252, "learning_rate": 1.0692619626926196e-06, "loss": 0.6322, "step": 27659 }, { "epoch": 0.8075676622580362, "grad_norm": 0.7443759660000512, "learning_rate": 1.0690997566909976e-06, "loss": 0.5722, "step": 27660 }, { "epoch": 0.8075968584858836, "grad_norm": 0.764492962059907, "learning_rate": 1.0689375506893757e-06, "loss": 0.7217, "step": 27661 }, { "epoch": 0.807626054713731, "grad_norm": 0.7190955251488254, "learning_rate": 1.0687753446877537e-06, "loss": 0.6316, "step": 27662 }, { "epoch": 0.8076552509415783, "grad_norm": 0.6457245679076418, "learning_rate": 1.0686131386861315e-06, "loss": 0.5135, "step": 27663 }, { "epoch": 0.8076844471694257, "grad_norm": 0.7073825356414555, "learning_rate": 1.0684509326845095e-06, "loss": 0.586, "step": 27664 }, { "epoch": 0.807713643397273, "grad_norm": 0.7402784157246708, "learning_rate": 1.0682887266828873e-06, "loss": 0.6547, "step": 27665 }, { "epoch": 0.8077428396251204, "grad_norm": 0.7507307840025951, "learning_rate": 1.0681265206812653e-06, "loss": 0.6937, "step": 27666 }, { "epoch": 0.8077720358529678, "grad_norm": 0.7594171524748454, "learning_rate": 1.0679643146796433e-06, "loss": 0.6355, "step": 27667 }, { "epoch": 0.8078012320808151, "grad_norm": 0.7891669397954901, "learning_rate": 1.0678021086780213e-06, "loss": 0.6977, "step": 27668 }, { "epoch": 0.8078304283086625, "grad_norm": 0.7175616265747328, "learning_rate": 1.067639902676399e-06, "loss": 0.6046, "step": 27669 }, { "epoch": 0.8078596245365098, "grad_norm": 0.6889773162890207, "learning_rate": 1.067477696674777e-06, "loss": 0.5881, "step": 27670 }, { "epoch": 0.8078888207643572, "grad_norm": 0.671833053576012, "learning_rate": 1.067315490673155e-06, "loss": 0.572, "step": 27671 }, { "epoch": 0.8079180169922046, "grad_norm": 0.7264294659987257, "learning_rate": 1.0671532846715329e-06, "loss": 0.6654, "step": 27672 }, { "epoch": 0.8079472132200519, "grad_norm": 0.7655730566918346, "learning_rate": 1.0669910786699109e-06, "loss": 0.6609, "step": 27673 }, { "epoch": 0.8079764094478993, "grad_norm": 0.6739189591938456, "learning_rate": 1.0668288726682887e-06, "loss": 0.5757, "step": 27674 }, { "epoch": 0.8080056056757466, "grad_norm": 0.6981023759514798, "learning_rate": 1.066666666666667e-06, "loss": 0.6359, "step": 27675 }, { "epoch": 0.808034801903594, "grad_norm": 0.6518323515357631, "learning_rate": 1.0665044606650447e-06, "loss": 0.499, "step": 27676 }, { "epoch": 0.8080639981314414, "grad_norm": 0.7248408061758486, "learning_rate": 1.0663422546634227e-06, "loss": 0.6587, "step": 27677 }, { "epoch": 0.8080931943592887, "grad_norm": 0.7528800054192677, "learning_rate": 1.0661800486618005e-06, "loss": 0.6443, "step": 27678 }, { "epoch": 0.8081223905871362, "grad_norm": 0.7251557390745655, "learning_rate": 1.0660178426601785e-06, "loss": 0.653, "step": 27679 }, { "epoch": 0.8081515868149836, "grad_norm": 0.7202322385992754, "learning_rate": 1.0658556366585565e-06, "loss": 0.6154, "step": 27680 }, { "epoch": 0.8081807830428309, "grad_norm": 0.7943069395613773, "learning_rate": 1.0656934306569345e-06, "loss": 0.6765, "step": 27681 }, { "epoch": 0.8082099792706783, "grad_norm": 0.7798841678908914, "learning_rate": 1.0655312246553123e-06, "loss": 0.6386, "step": 27682 }, { "epoch": 0.8082391754985256, "grad_norm": 0.7253655308439998, "learning_rate": 1.0653690186536903e-06, "loss": 0.6449, "step": 27683 }, { "epoch": 0.808268371726373, "grad_norm": 0.6923397173938467, "learning_rate": 1.0652068126520681e-06, "loss": 0.5916, "step": 27684 }, { "epoch": 0.8082975679542204, "grad_norm": 0.7751440295927868, "learning_rate": 1.0650446066504461e-06, "loss": 0.7393, "step": 27685 }, { "epoch": 0.8083267641820677, "grad_norm": 0.7123384235190996, "learning_rate": 1.0648824006488241e-06, "loss": 0.6594, "step": 27686 }, { "epoch": 0.8083559604099151, "grad_norm": 0.7730124822238389, "learning_rate": 1.064720194647202e-06, "loss": 0.6733, "step": 27687 }, { "epoch": 0.8083851566377624, "grad_norm": 0.7120612332090058, "learning_rate": 1.06455798864558e-06, "loss": 0.5842, "step": 27688 }, { "epoch": 0.8084143528656098, "grad_norm": 0.6731823831783529, "learning_rate": 1.064395782643958e-06, "loss": 0.5304, "step": 27689 }, { "epoch": 0.8084435490934572, "grad_norm": 0.7218036858799866, "learning_rate": 1.064233576642336e-06, "loss": 0.6716, "step": 27690 }, { "epoch": 0.8084727453213045, "grad_norm": 0.7563880953856631, "learning_rate": 1.0640713706407137e-06, "loss": 0.7011, "step": 27691 }, { "epoch": 0.8085019415491519, "grad_norm": 0.7350499496280756, "learning_rate": 1.0639091646390917e-06, "loss": 0.6583, "step": 27692 }, { "epoch": 0.8085311377769993, "grad_norm": 0.7216350385636419, "learning_rate": 1.0637469586374695e-06, "loss": 0.6204, "step": 27693 }, { "epoch": 0.8085603340048466, "grad_norm": 0.6836943904546177, "learning_rate": 1.0635847526358478e-06, "loss": 0.5337, "step": 27694 }, { "epoch": 0.808589530232694, "grad_norm": 0.7624926786189083, "learning_rate": 1.0634225466342256e-06, "loss": 0.6742, "step": 27695 }, { "epoch": 0.8086187264605413, "grad_norm": 0.7638031921256114, "learning_rate": 1.0632603406326036e-06, "loss": 0.6903, "step": 27696 }, { "epoch": 0.8086479226883887, "grad_norm": 0.6621008252851565, "learning_rate": 1.0630981346309814e-06, "loss": 0.5671, "step": 27697 }, { "epoch": 0.8086771189162361, "grad_norm": 0.8531057147555853, "learning_rate": 1.0629359286293594e-06, "loss": 0.7025, "step": 27698 }, { "epoch": 0.8087063151440834, "grad_norm": 0.7813411115715445, "learning_rate": 1.0627737226277374e-06, "loss": 0.6792, "step": 27699 }, { "epoch": 0.8087355113719308, "grad_norm": 0.7218639957511058, "learning_rate": 1.0626115166261154e-06, "loss": 0.6145, "step": 27700 }, { "epoch": 0.8087647075997781, "grad_norm": 0.7649715768021025, "learning_rate": 1.0624493106244932e-06, "loss": 0.6909, "step": 27701 }, { "epoch": 0.8087939038276255, "grad_norm": 0.6950519761602524, "learning_rate": 1.0622871046228712e-06, "loss": 0.5809, "step": 27702 }, { "epoch": 0.8088231000554729, "grad_norm": 0.7237466738238651, "learning_rate": 1.062124898621249e-06, "loss": 0.5856, "step": 27703 }, { "epoch": 0.8088522962833202, "grad_norm": 0.6970934044392659, "learning_rate": 1.061962692619627e-06, "loss": 0.641, "step": 27704 }, { "epoch": 0.8088814925111676, "grad_norm": 0.7231420710617376, "learning_rate": 1.061800486618005e-06, "loss": 0.6323, "step": 27705 }, { "epoch": 0.808910688739015, "grad_norm": 0.6896424849222352, "learning_rate": 1.0616382806163828e-06, "loss": 0.5582, "step": 27706 }, { "epoch": 0.8089398849668623, "grad_norm": 0.6988588048480892, "learning_rate": 1.0614760746147608e-06, "loss": 0.5771, "step": 27707 }, { "epoch": 0.8089690811947097, "grad_norm": 0.6926479380212717, "learning_rate": 1.0613138686131388e-06, "loss": 0.5683, "step": 27708 }, { "epoch": 0.808998277422557, "grad_norm": 0.7028012146327092, "learning_rate": 1.0611516626115168e-06, "loss": 0.5999, "step": 27709 }, { "epoch": 0.8090274736504044, "grad_norm": 0.7313756932800993, "learning_rate": 1.0609894566098946e-06, "loss": 0.6413, "step": 27710 }, { "epoch": 0.8090566698782518, "grad_norm": 0.6663836688757487, "learning_rate": 1.0608272506082726e-06, "loss": 0.5943, "step": 27711 }, { "epoch": 0.8090858661060991, "grad_norm": 0.6990295085798024, "learning_rate": 1.0606650446066504e-06, "loss": 0.6138, "step": 27712 }, { "epoch": 0.8091150623339465, "grad_norm": 0.6969041178543175, "learning_rate": 1.0605028386050286e-06, "loss": 0.6236, "step": 27713 }, { "epoch": 0.8091442585617938, "grad_norm": 0.7604977338772921, "learning_rate": 1.0603406326034064e-06, "loss": 0.6878, "step": 27714 }, { "epoch": 0.8091734547896412, "grad_norm": 0.7380568411762268, "learning_rate": 1.0601784266017844e-06, "loss": 0.7088, "step": 27715 }, { "epoch": 0.8092026510174886, "grad_norm": 0.7238591912135373, "learning_rate": 1.0600162206001622e-06, "loss": 0.6134, "step": 27716 }, { "epoch": 0.8092318472453359, "grad_norm": 0.711936665283564, "learning_rate": 1.0598540145985402e-06, "loss": 0.6284, "step": 27717 }, { "epoch": 0.8092610434731833, "grad_norm": 0.6967357591818195, "learning_rate": 1.0596918085969182e-06, "loss": 0.6611, "step": 27718 }, { "epoch": 0.8092902397010306, "grad_norm": 0.6701670941584207, "learning_rate": 1.0595296025952962e-06, "loss": 0.5351, "step": 27719 }, { "epoch": 0.809319435928878, "grad_norm": 0.6908398337718236, "learning_rate": 1.059367396593674e-06, "loss": 0.5949, "step": 27720 }, { "epoch": 0.8093486321567254, "grad_norm": 0.6737040980117843, "learning_rate": 1.059205190592052e-06, "loss": 0.5405, "step": 27721 }, { "epoch": 0.8093778283845727, "grad_norm": 0.7041360318767528, "learning_rate": 1.0590429845904298e-06, "loss": 0.5683, "step": 27722 }, { "epoch": 0.8094070246124201, "grad_norm": 0.7535861627203487, "learning_rate": 1.0588807785888078e-06, "loss": 0.686, "step": 27723 }, { "epoch": 0.8094362208402675, "grad_norm": 0.6937393656501466, "learning_rate": 1.0587185725871858e-06, "loss": 0.5944, "step": 27724 }, { "epoch": 0.8094654170681148, "grad_norm": 0.7731243972138648, "learning_rate": 1.0585563665855636e-06, "loss": 0.7132, "step": 27725 }, { "epoch": 0.8094946132959622, "grad_norm": 0.8212754091321146, "learning_rate": 1.0583941605839416e-06, "loss": 0.7069, "step": 27726 }, { "epoch": 0.8095238095238095, "grad_norm": 0.7826363686327119, "learning_rate": 1.0582319545823197e-06, "loss": 0.6499, "step": 27727 }, { "epoch": 0.8095530057516569, "grad_norm": 0.7088590096539287, "learning_rate": 1.0580697485806977e-06, "loss": 0.6057, "step": 27728 }, { "epoch": 0.8095822019795043, "grad_norm": 0.6806121009029491, "learning_rate": 1.0579075425790755e-06, "loss": 0.5743, "step": 27729 }, { "epoch": 0.8096113982073516, "grad_norm": 0.7681377742862672, "learning_rate": 1.0577453365774535e-06, "loss": 0.6368, "step": 27730 }, { "epoch": 0.809640594435199, "grad_norm": 0.7614845465162967, "learning_rate": 1.0575831305758313e-06, "loss": 0.6781, "step": 27731 }, { "epoch": 0.8096697906630463, "grad_norm": 0.800780529500228, "learning_rate": 1.0574209245742095e-06, "loss": 0.7578, "step": 27732 }, { "epoch": 0.8096989868908937, "grad_norm": 0.7802646406863523, "learning_rate": 1.0572587185725873e-06, "loss": 0.6193, "step": 27733 }, { "epoch": 0.8097281831187411, "grad_norm": 0.7754458310906563, "learning_rate": 1.0570965125709653e-06, "loss": 0.7419, "step": 27734 }, { "epoch": 0.8097573793465884, "grad_norm": 0.7205971157486918, "learning_rate": 1.056934306569343e-06, "loss": 0.6506, "step": 27735 }, { "epoch": 0.8097865755744358, "grad_norm": 0.7538151085592184, "learning_rate": 1.056772100567721e-06, "loss": 0.6243, "step": 27736 }, { "epoch": 0.8098157718022831, "grad_norm": 0.7002273328523652, "learning_rate": 1.056609894566099e-06, "loss": 0.6149, "step": 27737 }, { "epoch": 0.8098449680301305, "grad_norm": 0.6850383523479616, "learning_rate": 1.056447688564477e-06, "loss": 0.6104, "step": 27738 }, { "epoch": 0.8098741642579779, "grad_norm": 0.7080216204638711, "learning_rate": 1.0562854825628549e-06, "loss": 0.6531, "step": 27739 }, { "epoch": 0.8099033604858252, "grad_norm": 0.744186095369097, "learning_rate": 1.056123276561233e-06, "loss": 0.6345, "step": 27740 }, { "epoch": 0.8099325567136726, "grad_norm": 0.7531547274163142, "learning_rate": 1.0559610705596107e-06, "loss": 0.6843, "step": 27741 }, { "epoch": 0.80996175294152, "grad_norm": 0.7431643951278328, "learning_rate": 1.0557988645579887e-06, "loss": 0.7243, "step": 27742 }, { "epoch": 0.8099909491693673, "grad_norm": 0.7301095705490553, "learning_rate": 1.0556366585563667e-06, "loss": 0.6714, "step": 27743 }, { "epoch": 0.8100201453972147, "grad_norm": 0.7646742846298005, "learning_rate": 1.0554744525547445e-06, "loss": 0.6739, "step": 27744 }, { "epoch": 0.810049341625062, "grad_norm": 0.7511881919015795, "learning_rate": 1.0553122465531225e-06, "loss": 0.6772, "step": 27745 }, { "epoch": 0.8100785378529094, "grad_norm": 0.6969264953762141, "learning_rate": 1.0551500405515005e-06, "loss": 0.6288, "step": 27746 }, { "epoch": 0.8101077340807568, "grad_norm": 0.7066527608110313, "learning_rate": 1.0549878345498785e-06, "loss": 0.5746, "step": 27747 }, { "epoch": 0.8101369303086041, "grad_norm": 0.6988159989100231, "learning_rate": 1.0548256285482563e-06, "loss": 0.579, "step": 27748 }, { "epoch": 0.8101661265364515, "grad_norm": 0.729314081556785, "learning_rate": 1.0546634225466343e-06, "loss": 0.6728, "step": 27749 }, { "epoch": 0.8101953227642988, "grad_norm": 0.6913741391103675, "learning_rate": 1.0545012165450121e-06, "loss": 0.5707, "step": 27750 }, { "epoch": 0.8102245189921462, "grad_norm": 0.6721554108362309, "learning_rate": 1.0543390105433903e-06, "loss": 0.5349, "step": 27751 }, { "epoch": 0.8102537152199936, "grad_norm": 0.7551668549341037, "learning_rate": 1.0541768045417681e-06, "loss": 0.6962, "step": 27752 }, { "epoch": 0.8102829114478409, "grad_norm": 0.7745585653980575, "learning_rate": 1.0540145985401461e-06, "loss": 0.6299, "step": 27753 }, { "epoch": 0.8103121076756883, "grad_norm": 0.699050776345375, "learning_rate": 1.053852392538524e-06, "loss": 0.5611, "step": 27754 }, { "epoch": 0.8103413039035356, "grad_norm": 0.7175255810733612, "learning_rate": 1.053690186536902e-06, "loss": 0.6631, "step": 27755 }, { "epoch": 0.810370500131383, "grad_norm": 0.7038482283359508, "learning_rate": 1.05352798053528e-06, "loss": 0.5978, "step": 27756 }, { "epoch": 0.8103996963592304, "grad_norm": 0.7344987048552895, "learning_rate": 1.053365774533658e-06, "loss": 0.702, "step": 27757 }, { "epoch": 0.8104288925870777, "grad_norm": 0.7377720409854557, "learning_rate": 1.0532035685320357e-06, "loss": 0.6248, "step": 27758 }, { "epoch": 0.8104580888149251, "grad_norm": 0.7619003282384388, "learning_rate": 1.0530413625304138e-06, "loss": 0.671, "step": 27759 }, { "epoch": 0.8104872850427725, "grad_norm": 0.6773799110617297, "learning_rate": 1.0528791565287918e-06, "loss": 0.5487, "step": 27760 }, { "epoch": 0.8105164812706198, "grad_norm": 0.7952145683014026, "learning_rate": 1.0527169505271696e-06, "loss": 0.8018, "step": 27761 }, { "epoch": 0.8105456774984672, "grad_norm": 0.7354588718349365, "learning_rate": 1.0525547445255476e-06, "loss": 0.6737, "step": 27762 }, { "epoch": 0.8105748737263145, "grad_norm": 0.7521326919627948, "learning_rate": 1.0523925385239254e-06, "loss": 0.6824, "step": 27763 }, { "epoch": 0.8106040699541619, "grad_norm": 0.7550264116316547, "learning_rate": 1.0522303325223034e-06, "loss": 0.6949, "step": 27764 }, { "epoch": 0.8106332661820093, "grad_norm": 0.7314136446774296, "learning_rate": 1.0520681265206814e-06, "loss": 0.6437, "step": 27765 }, { "epoch": 0.8106624624098566, "grad_norm": 0.7039139184191134, "learning_rate": 1.0519059205190594e-06, "loss": 0.6723, "step": 27766 }, { "epoch": 0.810691658637704, "grad_norm": 0.760400437659608, "learning_rate": 1.0517437145174372e-06, "loss": 0.6615, "step": 27767 }, { "epoch": 0.8107208548655513, "grad_norm": 0.7582160855911417, "learning_rate": 1.0515815085158152e-06, "loss": 0.7011, "step": 27768 }, { "epoch": 0.8107500510933987, "grad_norm": 0.7771173601668, "learning_rate": 1.051419302514193e-06, "loss": 0.6781, "step": 27769 }, { "epoch": 0.8107792473212461, "grad_norm": 0.7138081727561796, "learning_rate": 1.0512570965125712e-06, "loss": 0.6576, "step": 27770 }, { "epoch": 0.8108084435490934, "grad_norm": 0.733333615681301, "learning_rate": 1.051094890510949e-06, "loss": 0.6188, "step": 27771 }, { "epoch": 0.8108376397769408, "grad_norm": 0.7375811438848541, "learning_rate": 1.050932684509327e-06, "loss": 0.6796, "step": 27772 }, { "epoch": 0.8108668360047881, "grad_norm": 0.7205162511846516, "learning_rate": 1.0507704785077048e-06, "loss": 0.6215, "step": 27773 }, { "epoch": 0.8108960322326355, "grad_norm": 0.7241955236572492, "learning_rate": 1.0506082725060828e-06, "loss": 0.6986, "step": 27774 }, { "epoch": 0.8109252284604829, "grad_norm": 0.7173607481354414, "learning_rate": 1.0504460665044608e-06, "loss": 0.6496, "step": 27775 }, { "epoch": 0.8109544246883302, "grad_norm": 0.7089870505662321, "learning_rate": 1.0502838605028386e-06, "loss": 0.6358, "step": 27776 }, { "epoch": 0.8109836209161776, "grad_norm": 0.6914018957212024, "learning_rate": 1.0501216545012166e-06, "loss": 0.5735, "step": 27777 }, { "epoch": 0.811012817144025, "grad_norm": 0.8621931916536087, "learning_rate": 1.0499594484995946e-06, "loss": 0.7366, "step": 27778 }, { "epoch": 0.8110420133718723, "grad_norm": 0.6587776128382171, "learning_rate": 1.0497972424979726e-06, "loss": 0.5597, "step": 27779 }, { "epoch": 0.8110712095997197, "grad_norm": 0.8867736775077613, "learning_rate": 1.0496350364963504e-06, "loss": 0.7156, "step": 27780 }, { "epoch": 0.811100405827567, "grad_norm": 0.8302458932210356, "learning_rate": 1.0494728304947284e-06, "loss": 0.7287, "step": 27781 }, { "epoch": 0.8111296020554144, "grad_norm": 0.6945330829139614, "learning_rate": 1.0493106244931062e-06, "loss": 0.5627, "step": 27782 }, { "epoch": 0.8111587982832618, "grad_norm": 0.6555331746852585, "learning_rate": 1.0491484184914842e-06, "loss": 0.5533, "step": 27783 }, { "epoch": 0.8111879945111091, "grad_norm": 0.6748948640353768, "learning_rate": 1.0489862124898622e-06, "loss": 0.5877, "step": 27784 }, { "epoch": 0.8112171907389565, "grad_norm": 0.7862332383795111, "learning_rate": 1.0488240064882402e-06, "loss": 0.6523, "step": 27785 }, { "epoch": 0.8112463869668038, "grad_norm": 0.8967712148608409, "learning_rate": 1.048661800486618e-06, "loss": 0.6792, "step": 27786 }, { "epoch": 0.8112755831946512, "grad_norm": 0.7669142849540633, "learning_rate": 1.048499594484996e-06, "loss": 0.6991, "step": 27787 }, { "epoch": 0.8113047794224986, "grad_norm": 0.6930453575956417, "learning_rate": 1.0483373884833738e-06, "loss": 0.6227, "step": 27788 }, { "epoch": 0.8113339756503459, "grad_norm": 0.7413146828407268, "learning_rate": 1.048175182481752e-06, "loss": 0.7128, "step": 27789 }, { "epoch": 0.8113631718781933, "grad_norm": 0.7893861537720663, "learning_rate": 1.0480129764801298e-06, "loss": 0.673, "step": 27790 }, { "epoch": 0.8113923681060407, "grad_norm": 0.7585962889345349, "learning_rate": 1.0478507704785079e-06, "loss": 0.5877, "step": 27791 }, { "epoch": 0.811421564333888, "grad_norm": 0.773419228011643, "learning_rate": 1.0476885644768856e-06, "loss": 0.6689, "step": 27792 }, { "epoch": 0.8114507605617354, "grad_norm": 0.7099537860179016, "learning_rate": 1.0475263584752637e-06, "loss": 0.6176, "step": 27793 }, { "epoch": 0.8114799567895827, "grad_norm": 0.711482673807413, "learning_rate": 1.0473641524736417e-06, "loss": 0.5952, "step": 27794 }, { "epoch": 0.8115091530174301, "grad_norm": 0.7138514340145492, "learning_rate": 1.0472019464720195e-06, "loss": 0.6452, "step": 27795 }, { "epoch": 0.8115383492452775, "grad_norm": 0.7163940392112844, "learning_rate": 1.0470397404703975e-06, "loss": 0.6275, "step": 27796 }, { "epoch": 0.8115675454731248, "grad_norm": 0.7168349307532087, "learning_rate": 1.0468775344687755e-06, "loss": 0.6898, "step": 27797 }, { "epoch": 0.8115967417009722, "grad_norm": 0.7442032180666587, "learning_rate": 1.0467153284671535e-06, "loss": 0.747, "step": 27798 }, { "epoch": 0.8116259379288197, "grad_norm": 0.7532551467016807, "learning_rate": 1.0465531224655313e-06, "loss": 0.6977, "step": 27799 }, { "epoch": 0.811655134156667, "grad_norm": 0.7505308075710172, "learning_rate": 1.0463909164639093e-06, "loss": 0.5949, "step": 27800 }, { "epoch": 0.8116843303845144, "grad_norm": 0.7531074726111274, "learning_rate": 1.046228710462287e-06, "loss": 0.7, "step": 27801 }, { "epoch": 0.8117135266123617, "grad_norm": 1.1812770303426448, "learning_rate": 1.046066504460665e-06, "loss": 0.6832, "step": 27802 }, { "epoch": 0.8117427228402091, "grad_norm": 0.7943456845636051, "learning_rate": 1.045904298459043e-06, "loss": 0.7358, "step": 27803 }, { "epoch": 0.8117719190680565, "grad_norm": 0.7061651934206475, "learning_rate": 1.045742092457421e-06, "loss": 0.604, "step": 27804 }, { "epoch": 0.8118011152959038, "grad_norm": 0.7579010729288014, "learning_rate": 1.0455798864557989e-06, "loss": 0.6607, "step": 27805 }, { "epoch": 0.8118303115237512, "grad_norm": 0.6763113907120308, "learning_rate": 1.045417680454177e-06, "loss": 0.5657, "step": 27806 }, { "epoch": 0.8118595077515985, "grad_norm": 0.8267043355391992, "learning_rate": 1.0452554744525547e-06, "loss": 0.5743, "step": 27807 }, { "epoch": 0.8118887039794459, "grad_norm": 0.7415152753652853, "learning_rate": 1.045093268450933e-06, "loss": 0.5634, "step": 27808 }, { "epoch": 0.8119179002072933, "grad_norm": 0.7334198200160349, "learning_rate": 1.0449310624493107e-06, "loss": 0.6678, "step": 27809 }, { "epoch": 0.8119470964351406, "grad_norm": 0.7179367178748358, "learning_rate": 1.0447688564476887e-06, "loss": 0.6239, "step": 27810 }, { "epoch": 0.811976292662988, "grad_norm": 0.6934120843233363, "learning_rate": 1.0446066504460665e-06, "loss": 0.5896, "step": 27811 }, { "epoch": 0.8120054888908353, "grad_norm": 0.800113910712324, "learning_rate": 1.0444444444444445e-06, "loss": 0.7027, "step": 27812 }, { "epoch": 0.8120346851186827, "grad_norm": 0.7237820807955484, "learning_rate": 1.0442822384428225e-06, "loss": 0.5857, "step": 27813 }, { "epoch": 0.8120638813465301, "grad_norm": 0.7702371708358876, "learning_rate": 1.0441200324412003e-06, "loss": 0.7154, "step": 27814 }, { "epoch": 0.8120930775743774, "grad_norm": 0.6867272784423043, "learning_rate": 1.0439578264395783e-06, "loss": 0.5778, "step": 27815 }, { "epoch": 0.8121222738022248, "grad_norm": 0.7484760922582439, "learning_rate": 1.0437956204379563e-06, "loss": 0.6118, "step": 27816 }, { "epoch": 0.8121514700300722, "grad_norm": 0.7075362291434413, "learning_rate": 1.0436334144363343e-06, "loss": 0.6166, "step": 27817 }, { "epoch": 0.8121806662579195, "grad_norm": 0.7096108942348918, "learning_rate": 1.0434712084347121e-06, "loss": 0.6453, "step": 27818 }, { "epoch": 0.8122098624857669, "grad_norm": 0.7285871544692967, "learning_rate": 1.0433090024330901e-06, "loss": 0.6467, "step": 27819 }, { "epoch": 0.8122390587136142, "grad_norm": 0.6978403649184605, "learning_rate": 1.043146796431468e-06, "loss": 0.618, "step": 27820 }, { "epoch": 0.8122682549414616, "grad_norm": 0.7062033108069319, "learning_rate": 1.042984590429846e-06, "loss": 0.595, "step": 27821 }, { "epoch": 0.812297451169309, "grad_norm": 0.6649147567614696, "learning_rate": 1.042822384428224e-06, "loss": 0.5687, "step": 27822 }, { "epoch": 0.8123266473971563, "grad_norm": 0.7522304777296493, "learning_rate": 1.042660178426602e-06, "loss": 0.7011, "step": 27823 }, { "epoch": 0.8123558436250037, "grad_norm": 0.76597179674088, "learning_rate": 1.0424979724249797e-06, "loss": 0.724, "step": 27824 }, { "epoch": 0.812385039852851, "grad_norm": 1.0101856933667006, "learning_rate": 1.0423357664233578e-06, "loss": 0.5686, "step": 27825 }, { "epoch": 0.8124142360806984, "grad_norm": 0.7046719767896785, "learning_rate": 1.0421735604217358e-06, "loss": 0.6083, "step": 27826 }, { "epoch": 0.8124434323085458, "grad_norm": 0.847750752886819, "learning_rate": 1.0420113544201138e-06, "loss": 0.7122, "step": 27827 }, { "epoch": 0.8124726285363931, "grad_norm": 0.6960722253591285, "learning_rate": 1.0418491484184916e-06, "loss": 0.59, "step": 27828 }, { "epoch": 0.8125018247642405, "grad_norm": 0.7561503923114444, "learning_rate": 1.0416869424168696e-06, "loss": 0.6394, "step": 27829 }, { "epoch": 0.8125310209920878, "grad_norm": 0.7094185766815488, "learning_rate": 1.0415247364152474e-06, "loss": 0.5966, "step": 27830 }, { "epoch": 0.8125602172199352, "grad_norm": 0.7150891641863901, "learning_rate": 1.0413625304136254e-06, "loss": 0.6246, "step": 27831 }, { "epoch": 0.8125894134477826, "grad_norm": 0.6656172176733575, "learning_rate": 1.0412003244120034e-06, "loss": 0.5803, "step": 27832 }, { "epoch": 0.8126186096756299, "grad_norm": 0.6640014584352768, "learning_rate": 1.0410381184103812e-06, "loss": 0.5529, "step": 27833 }, { "epoch": 0.8126478059034773, "grad_norm": 0.7531400909698175, "learning_rate": 1.0408759124087592e-06, "loss": 0.6327, "step": 27834 }, { "epoch": 0.8126770021313247, "grad_norm": 0.8050026793687941, "learning_rate": 1.0407137064071372e-06, "loss": 0.6732, "step": 27835 }, { "epoch": 0.812706198359172, "grad_norm": 0.7441582950541591, "learning_rate": 1.0405515004055152e-06, "loss": 0.6555, "step": 27836 }, { "epoch": 0.8127353945870194, "grad_norm": 0.7250952187893802, "learning_rate": 1.040389294403893e-06, "loss": 0.6406, "step": 27837 }, { "epoch": 0.8127645908148667, "grad_norm": 0.7034741076314734, "learning_rate": 1.040227088402271e-06, "loss": 0.6382, "step": 27838 }, { "epoch": 0.8127937870427141, "grad_norm": 0.7084249185326771, "learning_rate": 1.0400648824006488e-06, "loss": 0.5965, "step": 27839 }, { "epoch": 0.8128229832705615, "grad_norm": 0.7460273153131614, "learning_rate": 1.0399026763990268e-06, "loss": 0.6934, "step": 27840 }, { "epoch": 0.8128521794984088, "grad_norm": 0.7629735272998761, "learning_rate": 1.0397404703974048e-06, "loss": 0.712, "step": 27841 }, { "epoch": 0.8128813757262562, "grad_norm": 0.7024973841072539, "learning_rate": 1.0395782643957828e-06, "loss": 0.5908, "step": 27842 }, { "epoch": 0.8129105719541035, "grad_norm": 0.7549532188197712, "learning_rate": 1.0394160583941606e-06, "loss": 0.6882, "step": 27843 }, { "epoch": 0.8129397681819509, "grad_norm": 0.7229130691202164, "learning_rate": 1.0392538523925386e-06, "loss": 0.6496, "step": 27844 }, { "epoch": 0.8129689644097983, "grad_norm": 0.6947812207521535, "learning_rate": 1.0390916463909166e-06, "loss": 0.5862, "step": 27845 }, { "epoch": 0.8129981606376456, "grad_norm": 0.7253470468865972, "learning_rate": 1.0389294403892946e-06, "loss": 0.6672, "step": 27846 }, { "epoch": 0.813027356865493, "grad_norm": 0.7265615041579615, "learning_rate": 1.0387672343876724e-06, "loss": 0.6341, "step": 27847 }, { "epoch": 0.8130565530933404, "grad_norm": 0.6587220646975064, "learning_rate": 1.0386050283860504e-06, "loss": 0.5337, "step": 27848 }, { "epoch": 0.8130857493211877, "grad_norm": 0.7296823063684276, "learning_rate": 1.0384428223844282e-06, "loss": 0.6825, "step": 27849 }, { "epoch": 0.8131149455490351, "grad_norm": 0.740077416678087, "learning_rate": 1.0382806163828062e-06, "loss": 0.6449, "step": 27850 }, { "epoch": 0.8131441417768824, "grad_norm": 0.7612928754279781, "learning_rate": 1.0381184103811842e-06, "loss": 0.637, "step": 27851 }, { "epoch": 0.8131733380047298, "grad_norm": 0.7454186009023308, "learning_rate": 1.037956204379562e-06, "loss": 0.6108, "step": 27852 }, { "epoch": 0.8132025342325772, "grad_norm": 0.7097125977983091, "learning_rate": 1.03779399837794e-06, "loss": 0.644, "step": 27853 }, { "epoch": 0.8132317304604245, "grad_norm": 0.7631296988824813, "learning_rate": 1.037631792376318e-06, "loss": 0.7008, "step": 27854 }, { "epoch": 0.8132609266882719, "grad_norm": 0.7668319962972867, "learning_rate": 1.037469586374696e-06, "loss": 0.6592, "step": 27855 }, { "epoch": 0.8132901229161192, "grad_norm": 0.734809269689494, "learning_rate": 1.0373073803730738e-06, "loss": 0.6834, "step": 27856 }, { "epoch": 0.8133193191439666, "grad_norm": 1.0577002706608811, "learning_rate": 1.0371451743714519e-06, "loss": 0.6484, "step": 27857 }, { "epoch": 0.813348515371814, "grad_norm": 0.7507498519117863, "learning_rate": 1.0369829683698297e-06, "loss": 0.613, "step": 27858 }, { "epoch": 0.8133777115996613, "grad_norm": 0.7393965333003503, "learning_rate": 1.0368207623682077e-06, "loss": 0.7147, "step": 27859 }, { "epoch": 0.8134069078275087, "grad_norm": 0.7021619366697446, "learning_rate": 1.0366585563665857e-06, "loss": 0.6077, "step": 27860 }, { "epoch": 0.813436104055356, "grad_norm": 0.7934698023103359, "learning_rate": 1.0364963503649637e-06, "loss": 0.7211, "step": 27861 }, { "epoch": 0.8134653002832034, "grad_norm": 0.6670077403947723, "learning_rate": 1.0363341443633415e-06, "loss": 0.5846, "step": 27862 }, { "epoch": 0.8134944965110508, "grad_norm": 0.8435649299900351, "learning_rate": 1.0361719383617195e-06, "loss": 0.589, "step": 27863 }, { "epoch": 0.8135236927388981, "grad_norm": 0.7315253332757582, "learning_rate": 1.0360097323600975e-06, "loss": 0.6356, "step": 27864 }, { "epoch": 0.8135528889667455, "grad_norm": 0.7535390873677841, "learning_rate": 1.0358475263584753e-06, "loss": 0.6861, "step": 27865 }, { "epoch": 0.8135820851945929, "grad_norm": 0.7389946245055341, "learning_rate": 1.0356853203568533e-06, "loss": 0.5959, "step": 27866 }, { "epoch": 0.8136112814224402, "grad_norm": 0.7540529121722354, "learning_rate": 1.0355231143552313e-06, "loss": 0.6667, "step": 27867 }, { "epoch": 0.8136404776502876, "grad_norm": 0.6909557523432202, "learning_rate": 1.035360908353609e-06, "loss": 0.5477, "step": 27868 }, { "epoch": 0.8136696738781349, "grad_norm": 0.8410778087041358, "learning_rate": 1.035198702351987e-06, "loss": 0.8082, "step": 27869 }, { "epoch": 0.8136988701059823, "grad_norm": 0.6837740323551497, "learning_rate": 1.035036496350365e-06, "loss": 0.5922, "step": 27870 }, { "epoch": 0.8137280663338297, "grad_norm": 0.7458288311088744, "learning_rate": 1.0348742903487429e-06, "loss": 0.6797, "step": 27871 }, { "epoch": 0.813757262561677, "grad_norm": 0.7226259060385614, "learning_rate": 1.034712084347121e-06, "loss": 0.6133, "step": 27872 }, { "epoch": 0.8137864587895244, "grad_norm": 0.6480389089573395, "learning_rate": 1.034549878345499e-06, "loss": 0.5432, "step": 27873 }, { "epoch": 0.8138156550173717, "grad_norm": 0.7398744259690527, "learning_rate": 1.034387672343877e-06, "loss": 0.6807, "step": 27874 }, { "epoch": 0.8138448512452191, "grad_norm": 0.7687182940320512, "learning_rate": 1.0342254663422547e-06, "loss": 0.7215, "step": 27875 }, { "epoch": 0.8138740474730665, "grad_norm": 0.7037817741408902, "learning_rate": 1.0340632603406327e-06, "loss": 0.5974, "step": 27876 }, { "epoch": 0.8139032437009138, "grad_norm": 0.6880417059436239, "learning_rate": 1.0339010543390105e-06, "loss": 0.5768, "step": 27877 }, { "epoch": 0.8139324399287612, "grad_norm": 0.6965650835098305, "learning_rate": 1.0337388483373887e-06, "loss": 0.6534, "step": 27878 }, { "epoch": 0.8139616361566085, "grad_norm": 0.6906005576882528, "learning_rate": 1.0335766423357665e-06, "loss": 0.6116, "step": 27879 }, { "epoch": 0.8139908323844559, "grad_norm": 0.8171472485080629, "learning_rate": 1.0334144363341445e-06, "loss": 0.7277, "step": 27880 }, { "epoch": 0.8140200286123033, "grad_norm": 0.6867766960578275, "learning_rate": 1.0332522303325223e-06, "loss": 0.6012, "step": 27881 }, { "epoch": 0.8140492248401506, "grad_norm": 0.6864676476192397, "learning_rate": 1.0330900243309003e-06, "loss": 0.5809, "step": 27882 }, { "epoch": 0.814078421067998, "grad_norm": 0.7051075132732001, "learning_rate": 1.0329278183292783e-06, "loss": 0.6128, "step": 27883 }, { "epoch": 0.8141076172958454, "grad_norm": 0.699177797363343, "learning_rate": 1.0327656123276561e-06, "loss": 0.5828, "step": 27884 }, { "epoch": 0.8141368135236927, "grad_norm": 0.6906339480084059, "learning_rate": 1.0326034063260341e-06, "loss": 0.6048, "step": 27885 }, { "epoch": 0.8141660097515401, "grad_norm": 0.6674654665120261, "learning_rate": 1.0324412003244121e-06, "loss": 0.5915, "step": 27886 }, { "epoch": 0.8141952059793874, "grad_norm": 0.8455497871585479, "learning_rate": 1.03227899432279e-06, "loss": 0.7927, "step": 27887 }, { "epoch": 0.8142244022072348, "grad_norm": 0.7957050657119019, "learning_rate": 1.032116788321168e-06, "loss": 0.7496, "step": 27888 }, { "epoch": 0.8142535984350822, "grad_norm": 0.740048426297731, "learning_rate": 1.031954582319546e-06, "loss": 0.6472, "step": 27889 }, { "epoch": 0.8142827946629295, "grad_norm": 0.6907923897438588, "learning_rate": 1.0317923763179238e-06, "loss": 0.5953, "step": 27890 }, { "epoch": 0.8143119908907769, "grad_norm": 0.73443994648811, "learning_rate": 1.0316301703163018e-06, "loss": 0.6167, "step": 27891 }, { "epoch": 0.8143411871186242, "grad_norm": 0.7003882846263417, "learning_rate": 1.0314679643146798e-06, "loss": 0.5642, "step": 27892 }, { "epoch": 0.8143703833464716, "grad_norm": 0.7295768161687047, "learning_rate": 1.0313057583130578e-06, "loss": 0.5858, "step": 27893 }, { "epoch": 0.814399579574319, "grad_norm": 0.6684724526166866, "learning_rate": 1.0311435523114356e-06, "loss": 0.6076, "step": 27894 }, { "epoch": 0.8144287758021663, "grad_norm": 0.7251062400373055, "learning_rate": 1.0309813463098136e-06, "loss": 0.6701, "step": 27895 }, { "epoch": 0.8144579720300137, "grad_norm": 0.7339127471491979, "learning_rate": 1.0308191403081914e-06, "loss": 0.6761, "step": 27896 }, { "epoch": 0.814487168257861, "grad_norm": 0.6787185088632035, "learning_rate": 1.0306569343065696e-06, "loss": 0.5404, "step": 27897 }, { "epoch": 0.8145163644857084, "grad_norm": 0.8488342245647947, "learning_rate": 1.0304947283049474e-06, "loss": 0.6893, "step": 27898 }, { "epoch": 0.8145455607135558, "grad_norm": 0.7667107730991671, "learning_rate": 1.0303325223033254e-06, "loss": 0.7247, "step": 27899 }, { "epoch": 0.8145747569414031, "grad_norm": 0.7344478848592008, "learning_rate": 1.0301703163017032e-06, "loss": 0.6256, "step": 27900 }, { "epoch": 0.8146039531692505, "grad_norm": 0.7647470286660963, "learning_rate": 1.0300081103000812e-06, "loss": 0.7133, "step": 27901 }, { "epoch": 0.8146331493970979, "grad_norm": 0.7698949637927978, "learning_rate": 1.0298459042984592e-06, "loss": 0.7727, "step": 27902 }, { "epoch": 0.8146623456249452, "grad_norm": 0.660472056098786, "learning_rate": 1.029683698296837e-06, "loss": 0.5649, "step": 27903 }, { "epoch": 0.8146915418527926, "grad_norm": 0.8005366404057459, "learning_rate": 1.029521492295215e-06, "loss": 0.7412, "step": 27904 }, { "epoch": 0.8147207380806399, "grad_norm": 0.776167657188506, "learning_rate": 1.029359286293593e-06, "loss": 0.6877, "step": 27905 }, { "epoch": 0.8147499343084873, "grad_norm": 0.7185415156053726, "learning_rate": 1.0291970802919708e-06, "loss": 0.6519, "step": 27906 }, { "epoch": 0.8147791305363347, "grad_norm": 0.7319001914428724, "learning_rate": 1.0290348742903488e-06, "loss": 0.6788, "step": 27907 }, { "epoch": 0.814808326764182, "grad_norm": 0.6922456250451146, "learning_rate": 1.0288726682887268e-06, "loss": 0.6418, "step": 27908 }, { "epoch": 0.8148375229920294, "grad_norm": 0.7214552328399364, "learning_rate": 1.0287104622871046e-06, "loss": 0.6561, "step": 27909 }, { "epoch": 0.8148667192198767, "grad_norm": 0.705599503422257, "learning_rate": 1.0285482562854826e-06, "loss": 0.6429, "step": 27910 }, { "epoch": 0.8148959154477241, "grad_norm": 0.7190938029815185, "learning_rate": 1.0283860502838606e-06, "loss": 0.6541, "step": 27911 }, { "epoch": 0.8149251116755715, "grad_norm": 0.8497784727548964, "learning_rate": 1.0282238442822386e-06, "loss": 0.6601, "step": 27912 }, { "epoch": 0.8149543079034188, "grad_norm": 0.7500931819790323, "learning_rate": 1.0280616382806164e-06, "loss": 0.6993, "step": 27913 }, { "epoch": 0.8149835041312662, "grad_norm": 0.7660455360666933, "learning_rate": 1.0278994322789944e-06, "loss": 0.6954, "step": 27914 }, { "epoch": 0.8150127003591136, "grad_norm": 0.730701190758222, "learning_rate": 1.0277372262773722e-06, "loss": 0.6233, "step": 27915 }, { "epoch": 0.8150418965869609, "grad_norm": 0.6865087909431374, "learning_rate": 1.0275750202757504e-06, "loss": 0.5996, "step": 27916 }, { "epoch": 0.8150710928148083, "grad_norm": 0.6721984217227934, "learning_rate": 1.0274128142741282e-06, "loss": 0.5306, "step": 27917 }, { "epoch": 0.8151002890426556, "grad_norm": 0.6977901906578323, "learning_rate": 1.0272506082725062e-06, "loss": 0.5899, "step": 27918 }, { "epoch": 0.815129485270503, "grad_norm": 0.7199254009926792, "learning_rate": 1.027088402270884e-06, "loss": 0.62, "step": 27919 }, { "epoch": 0.8151586814983505, "grad_norm": 0.7026269827502273, "learning_rate": 1.026926196269262e-06, "loss": 0.6362, "step": 27920 }, { "epoch": 0.8151878777261978, "grad_norm": 0.7225323104279715, "learning_rate": 1.02676399026764e-06, "loss": 0.6832, "step": 27921 }, { "epoch": 0.8152170739540452, "grad_norm": 0.7694593246831464, "learning_rate": 1.0266017842660179e-06, "loss": 0.6729, "step": 27922 }, { "epoch": 0.8152462701818926, "grad_norm": 0.6385298835257933, "learning_rate": 1.0264395782643959e-06, "loss": 0.5253, "step": 27923 }, { "epoch": 0.8152754664097399, "grad_norm": 0.7294153995706542, "learning_rate": 1.0262773722627739e-06, "loss": 0.6806, "step": 27924 }, { "epoch": 0.8153046626375873, "grad_norm": 0.6963087088396374, "learning_rate": 1.0261151662611517e-06, "loss": 0.5535, "step": 27925 }, { "epoch": 0.8153338588654346, "grad_norm": 0.8407849234465447, "learning_rate": 1.0259529602595297e-06, "loss": 0.62, "step": 27926 }, { "epoch": 0.815363055093282, "grad_norm": 0.7265307955007637, "learning_rate": 1.0257907542579077e-06, "loss": 0.6406, "step": 27927 }, { "epoch": 0.8153922513211294, "grad_norm": 0.7199234564157222, "learning_rate": 1.0256285482562855e-06, "loss": 0.6289, "step": 27928 }, { "epoch": 0.8154214475489767, "grad_norm": 0.7369940731741155, "learning_rate": 1.0254663422546635e-06, "loss": 0.6342, "step": 27929 }, { "epoch": 0.8154506437768241, "grad_norm": 0.7317427781591219, "learning_rate": 1.0253041362530415e-06, "loss": 0.6473, "step": 27930 }, { "epoch": 0.8154798400046714, "grad_norm": 0.7234858251962961, "learning_rate": 1.0251419302514195e-06, "loss": 0.6244, "step": 27931 }, { "epoch": 0.8155090362325188, "grad_norm": 0.7016921765034473, "learning_rate": 1.0249797242497973e-06, "loss": 0.6089, "step": 27932 }, { "epoch": 0.8155382324603662, "grad_norm": 0.7467330084220597, "learning_rate": 1.0248175182481753e-06, "loss": 0.6925, "step": 27933 }, { "epoch": 0.8155674286882135, "grad_norm": 0.6865383685096912, "learning_rate": 1.024655312246553e-06, "loss": 0.6092, "step": 27934 }, { "epoch": 0.8155966249160609, "grad_norm": 0.7121248273806764, "learning_rate": 1.0244931062449313e-06, "loss": 0.6078, "step": 27935 }, { "epoch": 0.8156258211439082, "grad_norm": 0.6997664446293232, "learning_rate": 1.024330900243309e-06, "loss": 0.643, "step": 27936 }, { "epoch": 0.8156550173717556, "grad_norm": 0.6910212055104337, "learning_rate": 1.0241686942416871e-06, "loss": 0.6298, "step": 27937 }, { "epoch": 0.815684213599603, "grad_norm": 0.7644323197397919, "learning_rate": 1.024006488240065e-06, "loss": 0.708, "step": 27938 }, { "epoch": 0.8157134098274503, "grad_norm": 0.7158355452490778, "learning_rate": 1.023844282238443e-06, "loss": 0.5849, "step": 27939 }, { "epoch": 0.8157426060552977, "grad_norm": 0.6963290500866952, "learning_rate": 1.023682076236821e-06, "loss": 0.5466, "step": 27940 }, { "epoch": 0.815771802283145, "grad_norm": 0.7624993340302397, "learning_rate": 1.0235198702351987e-06, "loss": 0.6143, "step": 27941 }, { "epoch": 0.8158009985109924, "grad_norm": 0.6859098209930353, "learning_rate": 1.0233576642335767e-06, "loss": 0.581, "step": 27942 }, { "epoch": 0.8158301947388398, "grad_norm": 0.72029816238912, "learning_rate": 1.0231954582319547e-06, "loss": 0.6442, "step": 27943 }, { "epoch": 0.8158593909666871, "grad_norm": 0.7295373001435762, "learning_rate": 1.0230332522303327e-06, "loss": 0.7117, "step": 27944 }, { "epoch": 0.8158885871945345, "grad_norm": 0.6716786803150173, "learning_rate": 1.0228710462287105e-06, "loss": 0.5884, "step": 27945 }, { "epoch": 0.8159177834223819, "grad_norm": 0.7358767112883711, "learning_rate": 1.0227088402270885e-06, "loss": 0.6484, "step": 27946 }, { "epoch": 0.8159469796502292, "grad_norm": 0.8754488382855626, "learning_rate": 1.0225466342254663e-06, "loss": 0.7149, "step": 27947 }, { "epoch": 0.8159761758780766, "grad_norm": 0.7533949843234643, "learning_rate": 1.0223844282238443e-06, "loss": 0.6896, "step": 27948 }, { "epoch": 0.8160053721059239, "grad_norm": 0.7745054447533711, "learning_rate": 1.0222222222222223e-06, "loss": 0.7695, "step": 27949 }, { "epoch": 0.8160345683337713, "grad_norm": 0.696943575385754, "learning_rate": 1.0220600162206003e-06, "loss": 0.5937, "step": 27950 }, { "epoch": 0.8160637645616187, "grad_norm": 0.703796771620157, "learning_rate": 1.0218978102189781e-06, "loss": 0.6255, "step": 27951 }, { "epoch": 0.816092960789466, "grad_norm": 0.7185950946356411, "learning_rate": 1.0217356042173561e-06, "loss": 0.6429, "step": 27952 }, { "epoch": 0.8161221570173134, "grad_norm": 0.7223441689851671, "learning_rate": 1.021573398215734e-06, "loss": 0.674, "step": 27953 }, { "epoch": 0.8161513532451607, "grad_norm": 0.7482793398693763, "learning_rate": 1.0214111922141122e-06, "loss": 0.6221, "step": 27954 }, { "epoch": 0.8161805494730081, "grad_norm": 0.7882353461963242, "learning_rate": 1.02124898621249e-06, "loss": 0.7149, "step": 27955 }, { "epoch": 0.8162097457008555, "grad_norm": 0.7363025331222737, "learning_rate": 1.021086780210868e-06, "loss": 0.6934, "step": 27956 }, { "epoch": 0.8162389419287028, "grad_norm": 0.7160094566813489, "learning_rate": 1.0209245742092458e-06, "loss": 0.6363, "step": 27957 }, { "epoch": 0.8162681381565502, "grad_norm": 0.7344997066234785, "learning_rate": 1.0207623682076238e-06, "loss": 0.629, "step": 27958 }, { "epoch": 0.8162973343843976, "grad_norm": 0.7127992010894104, "learning_rate": 1.0206001622060018e-06, "loss": 0.6346, "step": 27959 }, { "epoch": 0.8163265306122449, "grad_norm": 0.7832688254963985, "learning_rate": 1.0204379562043796e-06, "loss": 0.6174, "step": 27960 }, { "epoch": 0.8163557268400923, "grad_norm": 0.6795182940325509, "learning_rate": 1.0202757502027576e-06, "loss": 0.582, "step": 27961 }, { "epoch": 0.8163849230679396, "grad_norm": 0.6950360532815321, "learning_rate": 1.0201135442011356e-06, "loss": 0.6246, "step": 27962 }, { "epoch": 0.816414119295787, "grad_norm": 0.7640991947456619, "learning_rate": 1.0199513381995136e-06, "loss": 0.63, "step": 27963 }, { "epoch": 0.8164433155236344, "grad_norm": 0.729878667561149, "learning_rate": 1.0197891321978914e-06, "loss": 0.6637, "step": 27964 }, { "epoch": 0.8164725117514817, "grad_norm": 0.7017856965730537, "learning_rate": 1.0196269261962694e-06, "loss": 0.6119, "step": 27965 }, { "epoch": 0.8165017079793291, "grad_norm": 0.7166375906787894, "learning_rate": 1.0194647201946472e-06, "loss": 0.6446, "step": 27966 }, { "epoch": 0.8165309042071764, "grad_norm": 0.7648376694918059, "learning_rate": 1.0193025141930252e-06, "loss": 0.6508, "step": 27967 }, { "epoch": 0.8165601004350238, "grad_norm": 0.7143117581895011, "learning_rate": 1.0191403081914032e-06, "loss": 0.5683, "step": 27968 }, { "epoch": 0.8165892966628712, "grad_norm": 0.7408548424952948, "learning_rate": 1.0189781021897812e-06, "loss": 0.659, "step": 27969 }, { "epoch": 0.8166184928907185, "grad_norm": 0.7479307509727313, "learning_rate": 1.018815896188159e-06, "loss": 0.6359, "step": 27970 }, { "epoch": 0.8166476891185659, "grad_norm": 0.7541524920554239, "learning_rate": 1.018653690186537e-06, "loss": 0.6901, "step": 27971 }, { "epoch": 0.8166768853464133, "grad_norm": 0.7162589541555294, "learning_rate": 1.0184914841849148e-06, "loss": 0.6643, "step": 27972 }, { "epoch": 0.8167060815742606, "grad_norm": 0.6859860207917103, "learning_rate": 1.0183292781832928e-06, "loss": 0.6027, "step": 27973 }, { "epoch": 0.816735277802108, "grad_norm": 0.7046607496475076, "learning_rate": 1.0181670721816708e-06, "loss": 0.6531, "step": 27974 }, { "epoch": 0.8167644740299553, "grad_norm": 0.7416544217893145, "learning_rate": 1.0180048661800488e-06, "loss": 0.6491, "step": 27975 }, { "epoch": 0.8167936702578027, "grad_norm": 0.7065499669122881, "learning_rate": 1.0178426601784266e-06, "loss": 0.6103, "step": 27976 }, { "epoch": 0.8168228664856501, "grad_norm": 0.7180989702881785, "learning_rate": 1.0176804541768046e-06, "loss": 0.6338, "step": 27977 }, { "epoch": 0.8168520627134974, "grad_norm": 0.789167941623185, "learning_rate": 1.0175182481751826e-06, "loss": 0.6742, "step": 27978 }, { "epoch": 0.8168812589413448, "grad_norm": 0.6831630504523574, "learning_rate": 1.0173560421735604e-06, "loss": 0.5597, "step": 27979 }, { "epoch": 0.8169104551691921, "grad_norm": 0.7192687786929813, "learning_rate": 1.0171938361719384e-06, "loss": 0.6465, "step": 27980 }, { "epoch": 0.8169396513970395, "grad_norm": 0.7408875615660521, "learning_rate": 1.0170316301703164e-06, "loss": 0.6153, "step": 27981 }, { "epoch": 0.8169688476248869, "grad_norm": 0.7753987750973639, "learning_rate": 1.0168694241686944e-06, "loss": 0.6941, "step": 27982 }, { "epoch": 0.8169980438527342, "grad_norm": 0.7095508203369056, "learning_rate": 1.0167072181670722e-06, "loss": 0.607, "step": 27983 }, { "epoch": 0.8170272400805816, "grad_norm": 0.736971606799698, "learning_rate": 1.0165450121654502e-06, "loss": 0.5868, "step": 27984 }, { "epoch": 0.817056436308429, "grad_norm": 0.715062538021348, "learning_rate": 1.016382806163828e-06, "loss": 0.6395, "step": 27985 }, { "epoch": 0.8170856325362763, "grad_norm": 0.7792699006009073, "learning_rate": 1.016220600162206e-06, "loss": 0.7441, "step": 27986 }, { "epoch": 0.8171148287641237, "grad_norm": 0.7222441040763742, "learning_rate": 1.016058394160584e-06, "loss": 0.6577, "step": 27987 }, { "epoch": 0.817144024991971, "grad_norm": 0.7491529886468191, "learning_rate": 1.015896188158962e-06, "loss": 0.6296, "step": 27988 }, { "epoch": 0.8171732212198184, "grad_norm": 0.770777866025426, "learning_rate": 1.0157339821573399e-06, "loss": 0.6367, "step": 27989 }, { "epoch": 0.8172024174476658, "grad_norm": 0.7159571075326694, "learning_rate": 1.0155717761557179e-06, "loss": 0.6263, "step": 27990 }, { "epoch": 0.8172316136755131, "grad_norm": 0.6752873456823516, "learning_rate": 1.0154095701540957e-06, "loss": 0.5592, "step": 27991 }, { "epoch": 0.8172608099033605, "grad_norm": 0.7143332064313604, "learning_rate": 1.0152473641524737e-06, "loss": 0.5971, "step": 27992 }, { "epoch": 0.8172900061312078, "grad_norm": 0.7507338635771144, "learning_rate": 1.0150851581508517e-06, "loss": 0.6621, "step": 27993 }, { "epoch": 0.8173192023590552, "grad_norm": 0.6394322764224851, "learning_rate": 1.0149229521492297e-06, "loss": 0.5102, "step": 27994 }, { "epoch": 0.8173483985869026, "grad_norm": 0.7039545632920822, "learning_rate": 1.0147607461476075e-06, "loss": 0.6098, "step": 27995 }, { "epoch": 0.8173775948147499, "grad_norm": 0.7246116679226897, "learning_rate": 1.0145985401459855e-06, "loss": 0.6545, "step": 27996 }, { "epoch": 0.8174067910425973, "grad_norm": 0.7230387923395929, "learning_rate": 1.0144363341443635e-06, "loss": 0.6401, "step": 27997 }, { "epoch": 0.8174359872704446, "grad_norm": 0.7528533807674843, "learning_rate": 1.0142741281427413e-06, "loss": 0.672, "step": 27998 }, { "epoch": 0.817465183498292, "grad_norm": 0.7119954960389775, "learning_rate": 1.0141119221411193e-06, "loss": 0.6155, "step": 27999 }, { "epoch": 0.8174943797261394, "grad_norm": 0.7019706315646635, "learning_rate": 1.013949716139497e-06, "loss": 0.6054, "step": 28000 }, { "epoch": 0.8175235759539867, "grad_norm": 0.7375373072743668, "learning_rate": 1.0137875101378753e-06, "loss": 0.5443, "step": 28001 }, { "epoch": 0.8175527721818341, "grad_norm": 0.7791019094979292, "learning_rate": 1.013625304136253e-06, "loss": 0.7185, "step": 28002 }, { "epoch": 0.8175819684096814, "grad_norm": 0.7107963342761003, "learning_rate": 1.0134630981346311e-06, "loss": 0.6555, "step": 28003 }, { "epoch": 0.8176111646375288, "grad_norm": 0.7638000053998986, "learning_rate": 1.013300892133009e-06, "loss": 0.7205, "step": 28004 }, { "epoch": 0.8176403608653762, "grad_norm": 0.7236715211567489, "learning_rate": 1.013138686131387e-06, "loss": 0.6343, "step": 28005 }, { "epoch": 0.8176695570932235, "grad_norm": 0.7280252635613528, "learning_rate": 1.012976480129765e-06, "loss": 0.6786, "step": 28006 }, { "epoch": 0.8176987533210709, "grad_norm": 0.6773328009064049, "learning_rate": 1.012814274128143e-06, "loss": 0.5982, "step": 28007 }, { "epoch": 0.8177279495489183, "grad_norm": 0.7323212437709907, "learning_rate": 1.0126520681265207e-06, "loss": 0.6325, "step": 28008 }, { "epoch": 0.8177571457767656, "grad_norm": 0.7195740016471277, "learning_rate": 1.0124898621248987e-06, "loss": 0.6705, "step": 28009 }, { "epoch": 0.817786342004613, "grad_norm": 0.727201648389971, "learning_rate": 1.0123276561232765e-06, "loss": 0.6402, "step": 28010 }, { "epoch": 0.8178155382324603, "grad_norm": 0.8696273368726328, "learning_rate": 1.0121654501216545e-06, "loss": 0.686, "step": 28011 }, { "epoch": 0.8178447344603077, "grad_norm": 0.7131880697322869, "learning_rate": 1.0120032441200325e-06, "loss": 0.6224, "step": 28012 }, { "epoch": 0.8178739306881551, "grad_norm": 0.758311288082684, "learning_rate": 1.0118410381184105e-06, "loss": 0.71, "step": 28013 }, { "epoch": 0.8179031269160024, "grad_norm": 0.7076810553895986, "learning_rate": 1.0116788321167883e-06, "loss": 0.634, "step": 28014 }, { "epoch": 0.8179323231438498, "grad_norm": 0.7191010818301973, "learning_rate": 1.0115166261151663e-06, "loss": 0.6246, "step": 28015 }, { "epoch": 0.8179615193716971, "grad_norm": 0.7169262445824127, "learning_rate": 1.0113544201135443e-06, "loss": 0.6455, "step": 28016 }, { "epoch": 0.8179907155995445, "grad_norm": 0.6680192429074604, "learning_rate": 1.0111922141119221e-06, "loss": 0.5905, "step": 28017 }, { "epoch": 0.8180199118273919, "grad_norm": 0.7141245767437769, "learning_rate": 1.0110300081103002e-06, "loss": 0.5862, "step": 28018 }, { "epoch": 0.8180491080552392, "grad_norm": 0.6634428114269525, "learning_rate": 1.010867802108678e-06, "loss": 0.5336, "step": 28019 }, { "epoch": 0.8180783042830866, "grad_norm": 0.6834236404901229, "learning_rate": 1.0107055961070562e-06, "loss": 0.61, "step": 28020 }, { "epoch": 0.818107500510934, "grad_norm": 0.6620729729598139, "learning_rate": 1.010543390105434e-06, "loss": 0.5411, "step": 28021 }, { "epoch": 0.8181366967387813, "grad_norm": 0.6884797302142476, "learning_rate": 1.010381184103812e-06, "loss": 0.5927, "step": 28022 }, { "epoch": 0.8181658929666287, "grad_norm": 0.6609676030461287, "learning_rate": 1.0102189781021898e-06, "loss": 0.522, "step": 28023 }, { "epoch": 0.818195089194476, "grad_norm": 0.7525105442440367, "learning_rate": 1.0100567721005678e-06, "loss": 0.6174, "step": 28024 }, { "epoch": 0.8182242854223234, "grad_norm": 0.7286752639609207, "learning_rate": 1.0098945660989458e-06, "loss": 0.6271, "step": 28025 }, { "epoch": 0.8182534816501708, "grad_norm": 0.6786517352569129, "learning_rate": 1.0097323600973238e-06, "loss": 0.5957, "step": 28026 }, { "epoch": 0.8182826778780181, "grad_norm": 0.729929635718964, "learning_rate": 1.0095701540957016e-06, "loss": 0.6834, "step": 28027 }, { "epoch": 0.8183118741058655, "grad_norm": 0.702971786275804, "learning_rate": 1.0094079480940796e-06, "loss": 0.6193, "step": 28028 }, { "epoch": 0.8183410703337128, "grad_norm": 0.7702978376458746, "learning_rate": 1.0092457420924576e-06, "loss": 0.7293, "step": 28029 }, { "epoch": 0.8183702665615602, "grad_norm": 0.754417402618515, "learning_rate": 1.0090835360908354e-06, "loss": 0.6983, "step": 28030 }, { "epoch": 0.8183994627894076, "grad_norm": 0.7830821840631825, "learning_rate": 1.0089213300892134e-06, "loss": 0.7141, "step": 28031 }, { "epoch": 0.8184286590172549, "grad_norm": 0.7333935662363378, "learning_rate": 1.0087591240875914e-06, "loss": 0.648, "step": 28032 }, { "epoch": 0.8184578552451023, "grad_norm": 0.775349129356682, "learning_rate": 1.0085969180859692e-06, "loss": 0.6997, "step": 28033 }, { "epoch": 0.8184870514729496, "grad_norm": 0.7595658490820428, "learning_rate": 1.0084347120843472e-06, "loss": 0.6638, "step": 28034 }, { "epoch": 0.818516247700797, "grad_norm": 0.7357274874223153, "learning_rate": 1.0082725060827252e-06, "loss": 0.6559, "step": 28035 }, { "epoch": 0.8185454439286444, "grad_norm": 0.7350591432105406, "learning_rate": 1.008110300081103e-06, "loss": 0.6704, "step": 28036 }, { "epoch": 0.8185746401564917, "grad_norm": 0.7370878347766807, "learning_rate": 1.007948094079481e-06, "loss": 0.6471, "step": 28037 }, { "epoch": 0.8186038363843391, "grad_norm": 0.7208897498757616, "learning_rate": 1.0077858880778588e-06, "loss": 0.649, "step": 28038 }, { "epoch": 0.8186330326121865, "grad_norm": 0.7307184150799332, "learning_rate": 1.007623682076237e-06, "loss": 0.6116, "step": 28039 }, { "epoch": 0.8186622288400339, "grad_norm": 0.7195844665899551, "learning_rate": 1.0074614760746148e-06, "loss": 0.6129, "step": 28040 }, { "epoch": 0.8186914250678813, "grad_norm": 0.7554142829101227, "learning_rate": 1.0072992700729928e-06, "loss": 0.7004, "step": 28041 }, { "epoch": 0.8187206212957286, "grad_norm": 0.7336366810468886, "learning_rate": 1.0071370640713706e-06, "loss": 0.6724, "step": 28042 }, { "epoch": 0.818749817523576, "grad_norm": 0.6999942840143125, "learning_rate": 1.0069748580697486e-06, "loss": 0.6457, "step": 28043 }, { "epoch": 0.8187790137514234, "grad_norm": 0.7294943129064203, "learning_rate": 1.0068126520681266e-06, "loss": 0.6428, "step": 28044 }, { "epoch": 0.8188082099792707, "grad_norm": 0.726783852889524, "learning_rate": 1.0066504460665046e-06, "loss": 0.6527, "step": 28045 }, { "epoch": 0.8188374062071181, "grad_norm": 0.7601867237443758, "learning_rate": 1.0064882400648824e-06, "loss": 0.6948, "step": 28046 }, { "epoch": 0.8188666024349655, "grad_norm": 0.6940235662996319, "learning_rate": 1.0063260340632604e-06, "loss": 0.6203, "step": 28047 }, { "epoch": 0.8188957986628128, "grad_norm": 0.7300176915078419, "learning_rate": 1.0061638280616384e-06, "loss": 0.6666, "step": 28048 }, { "epoch": 0.8189249948906602, "grad_norm": 0.7428411258526293, "learning_rate": 1.0060016220600162e-06, "loss": 0.7103, "step": 28049 }, { "epoch": 0.8189541911185075, "grad_norm": 0.7411735875400711, "learning_rate": 1.0058394160583943e-06, "loss": 0.655, "step": 28050 }, { "epoch": 0.8189833873463549, "grad_norm": 0.7172329024196182, "learning_rate": 1.0056772100567723e-06, "loss": 0.6386, "step": 28051 }, { "epoch": 0.8190125835742023, "grad_norm": 0.7338861829957062, "learning_rate": 1.00551500405515e-06, "loss": 0.6759, "step": 28052 }, { "epoch": 0.8190417798020496, "grad_norm": 0.7028985739719353, "learning_rate": 1.005352798053528e-06, "loss": 0.6324, "step": 28053 }, { "epoch": 0.819070976029897, "grad_norm": 0.7681906712505098, "learning_rate": 1.005190592051906e-06, "loss": 0.7518, "step": 28054 }, { "epoch": 0.8191001722577443, "grad_norm": 0.7000247243914852, "learning_rate": 1.0050283860502839e-06, "loss": 0.624, "step": 28055 }, { "epoch": 0.8191293684855917, "grad_norm": 0.7564220719176199, "learning_rate": 1.0048661800486619e-06, "loss": 0.6972, "step": 28056 }, { "epoch": 0.8191585647134391, "grad_norm": 0.8074129563244967, "learning_rate": 1.0047039740470397e-06, "loss": 0.7309, "step": 28057 }, { "epoch": 0.8191877609412864, "grad_norm": 0.68593986155172, "learning_rate": 1.0045417680454179e-06, "loss": 0.5845, "step": 28058 }, { "epoch": 0.8192169571691338, "grad_norm": 0.7931180589633356, "learning_rate": 1.0043795620437957e-06, "loss": 0.6915, "step": 28059 }, { "epoch": 0.8192461533969811, "grad_norm": 0.7167347460999417, "learning_rate": 1.0042173560421737e-06, "loss": 0.6516, "step": 28060 }, { "epoch": 0.8192753496248285, "grad_norm": 0.7382808350203233, "learning_rate": 1.0040551500405515e-06, "loss": 0.6536, "step": 28061 }, { "epoch": 0.8193045458526759, "grad_norm": 0.7150867116298252, "learning_rate": 1.0038929440389295e-06, "loss": 0.5921, "step": 28062 }, { "epoch": 0.8193337420805232, "grad_norm": 0.7170478114294433, "learning_rate": 1.0037307380373075e-06, "loss": 0.6098, "step": 28063 }, { "epoch": 0.8193629383083706, "grad_norm": 0.6786410068819937, "learning_rate": 1.0035685320356855e-06, "loss": 0.53, "step": 28064 }, { "epoch": 0.819392134536218, "grad_norm": 0.7309409370990168, "learning_rate": 1.0034063260340633e-06, "loss": 0.5864, "step": 28065 }, { "epoch": 0.8194213307640653, "grad_norm": 0.7243422657771662, "learning_rate": 1.0032441200324413e-06, "loss": 0.6245, "step": 28066 }, { "epoch": 0.8194505269919127, "grad_norm": 0.7624742229708102, "learning_rate": 1.0030819140308193e-06, "loss": 0.686, "step": 28067 }, { "epoch": 0.81947972321976, "grad_norm": 0.7574613893730061, "learning_rate": 1.002919708029197e-06, "loss": 0.64, "step": 28068 }, { "epoch": 0.8195089194476074, "grad_norm": 0.6364445607942723, "learning_rate": 1.0027575020275751e-06, "loss": 0.5256, "step": 28069 }, { "epoch": 0.8195381156754548, "grad_norm": 0.6979416989974587, "learning_rate": 1.0025952960259531e-06, "loss": 0.5829, "step": 28070 }, { "epoch": 0.8195673119033021, "grad_norm": 0.681815695313006, "learning_rate": 1.002433090024331e-06, "loss": 0.5557, "step": 28071 }, { "epoch": 0.8195965081311495, "grad_norm": 0.7980023876081328, "learning_rate": 1.002270884022709e-06, "loss": 0.6647, "step": 28072 }, { "epoch": 0.8196257043589968, "grad_norm": 0.7174446863831766, "learning_rate": 1.002108678021087e-06, "loss": 0.5956, "step": 28073 }, { "epoch": 0.8196549005868442, "grad_norm": 0.745505330158313, "learning_rate": 1.0019464720194647e-06, "loss": 0.6873, "step": 28074 }, { "epoch": 0.8196840968146916, "grad_norm": 0.7624494336751936, "learning_rate": 1.0017842660178427e-06, "loss": 0.6977, "step": 28075 }, { "epoch": 0.8197132930425389, "grad_norm": 0.6899057135966709, "learning_rate": 1.0016220600162205e-06, "loss": 0.5933, "step": 28076 }, { "epoch": 0.8197424892703863, "grad_norm": 0.8952062130161842, "learning_rate": 1.0014598540145987e-06, "loss": 0.7601, "step": 28077 }, { "epoch": 0.8197716854982336, "grad_norm": 0.9897856472717952, "learning_rate": 1.0012976480129765e-06, "loss": 0.6482, "step": 28078 }, { "epoch": 0.819800881726081, "grad_norm": 0.7278679317481019, "learning_rate": 1.0011354420113545e-06, "loss": 0.6744, "step": 28079 }, { "epoch": 0.8198300779539284, "grad_norm": 0.7116973586933901, "learning_rate": 1.0009732360097323e-06, "loss": 0.6509, "step": 28080 }, { "epoch": 0.8198592741817757, "grad_norm": 0.7945846861475586, "learning_rate": 1.0008110300081103e-06, "loss": 0.6724, "step": 28081 }, { "epoch": 0.8198884704096231, "grad_norm": 0.6876902123479136, "learning_rate": 1.0006488240064884e-06, "loss": 0.6072, "step": 28082 }, { "epoch": 0.8199176666374705, "grad_norm": 0.6874331139262233, "learning_rate": 1.0004866180048664e-06, "loss": 0.5781, "step": 28083 }, { "epoch": 0.8199468628653178, "grad_norm": 0.7244102188445803, "learning_rate": 1.0003244120032442e-06, "loss": 0.6258, "step": 28084 }, { "epoch": 0.8199760590931652, "grad_norm": 0.7209848697270982, "learning_rate": 1.0001622060016222e-06, "loss": 0.6001, "step": 28085 }, { "epoch": 0.8200052553210125, "grad_norm": 0.7686879988969741, "learning_rate": 1.0000000000000002e-06, "loss": 0.6936, "step": 28086 }, { "epoch": 0.8200344515488599, "grad_norm": 0.7598886676882318, "learning_rate": 9.99837793998378e-07, "loss": 0.6535, "step": 28087 }, { "epoch": 0.8200636477767073, "grad_norm": 0.741045133842165, "learning_rate": 9.99675587996756e-07, "loss": 0.6745, "step": 28088 }, { "epoch": 0.8200928440045546, "grad_norm": 0.7658740049918589, "learning_rate": 9.995133819951338e-07, "loss": 0.7085, "step": 28089 }, { "epoch": 0.820122040232402, "grad_norm": 0.7639056723293736, "learning_rate": 9.993511759935118e-07, "loss": 0.662, "step": 28090 }, { "epoch": 0.8201512364602493, "grad_norm": 0.7592262777348199, "learning_rate": 9.991889699918898e-07, "loss": 0.7008, "step": 28091 }, { "epoch": 0.8201804326880967, "grad_norm": 0.7227496042300923, "learning_rate": 9.990267639902678e-07, "loss": 0.6218, "step": 28092 }, { "epoch": 0.8202096289159441, "grad_norm": 0.7164984263880516, "learning_rate": 9.988645579886456e-07, "loss": 0.6421, "step": 28093 }, { "epoch": 0.8202388251437914, "grad_norm": 0.7508304255374136, "learning_rate": 9.987023519870236e-07, "loss": 0.6691, "step": 28094 }, { "epoch": 0.8202680213716388, "grad_norm": 0.6668254055468528, "learning_rate": 9.985401459854016e-07, "loss": 0.582, "step": 28095 }, { "epoch": 0.8202972175994862, "grad_norm": 0.6444371897383282, "learning_rate": 9.983779399837796e-07, "loss": 0.4842, "step": 28096 }, { "epoch": 0.8203264138273335, "grad_norm": 0.7034017198727389, "learning_rate": 9.982157339821574e-07, "loss": 0.5992, "step": 28097 }, { "epoch": 0.8203556100551809, "grad_norm": 0.6671215929847184, "learning_rate": 9.980535279805354e-07, "loss": 0.5496, "step": 28098 }, { "epoch": 0.8203848062830282, "grad_norm": 0.7451608170251046, "learning_rate": 9.978913219789132e-07, "loss": 0.6541, "step": 28099 }, { "epoch": 0.8204140025108756, "grad_norm": 0.7498947640303693, "learning_rate": 9.977291159772912e-07, "loss": 0.6495, "step": 28100 }, { "epoch": 0.820443198738723, "grad_norm": 0.7061213605520913, "learning_rate": 9.975669099756692e-07, "loss": 0.616, "step": 28101 }, { "epoch": 0.8204723949665703, "grad_norm": 0.6926881634983472, "learning_rate": 9.974047039740472e-07, "loss": 0.5657, "step": 28102 }, { "epoch": 0.8205015911944177, "grad_norm": 0.6800675489641616, "learning_rate": 9.97242497972425e-07, "loss": 0.5574, "step": 28103 }, { "epoch": 0.820530787422265, "grad_norm": 0.7095759831750817, "learning_rate": 9.97080291970803e-07, "loss": 0.6082, "step": 28104 }, { "epoch": 0.8205599836501124, "grad_norm": 0.7041959560486784, "learning_rate": 9.96918085969181e-07, "loss": 0.63, "step": 28105 }, { "epoch": 0.8205891798779598, "grad_norm": 0.7566065685932043, "learning_rate": 9.967558799675588e-07, "loss": 0.7086, "step": 28106 }, { "epoch": 0.8206183761058071, "grad_norm": 0.7364697756959531, "learning_rate": 9.965936739659368e-07, "loss": 0.675, "step": 28107 }, { "epoch": 0.8206475723336545, "grad_norm": 0.6939929130771877, "learning_rate": 9.964314679643146e-07, "loss": 0.5753, "step": 28108 }, { "epoch": 0.8206767685615018, "grad_norm": 0.7010931294536434, "learning_rate": 9.962692619626926e-07, "loss": 0.6104, "step": 28109 }, { "epoch": 0.8207059647893492, "grad_norm": 0.7437854384842251, "learning_rate": 9.961070559610706e-07, "loss": 0.6706, "step": 28110 }, { "epoch": 0.8207351610171966, "grad_norm": 0.7074064961679154, "learning_rate": 9.959448499594486e-07, "loss": 0.6482, "step": 28111 }, { "epoch": 0.8207643572450439, "grad_norm": 0.8200010199791867, "learning_rate": 9.957826439578264e-07, "loss": 0.6994, "step": 28112 }, { "epoch": 0.8207935534728913, "grad_norm": 0.6804186941684371, "learning_rate": 9.956204379562044e-07, "loss": 0.5729, "step": 28113 }, { "epoch": 0.8208227497007387, "grad_norm": 0.6957433571207459, "learning_rate": 9.954582319545825e-07, "loss": 0.6319, "step": 28114 }, { "epoch": 0.820851945928586, "grad_norm": 0.8001717090384128, "learning_rate": 9.952960259529605e-07, "loss": 0.7087, "step": 28115 }, { "epoch": 0.8208811421564334, "grad_norm": 0.7259511662835492, "learning_rate": 9.951338199513383e-07, "loss": 0.6156, "step": 28116 }, { "epoch": 0.8209103383842807, "grad_norm": 0.7564246151430781, "learning_rate": 9.949716139497163e-07, "loss": 0.6352, "step": 28117 }, { "epoch": 0.8209395346121281, "grad_norm": 0.9239041748862262, "learning_rate": 9.94809407948094e-07, "loss": 0.6387, "step": 28118 }, { "epoch": 0.8209687308399755, "grad_norm": 0.7395262963286184, "learning_rate": 9.94647201946472e-07, "loss": 0.6649, "step": 28119 }, { "epoch": 0.8209979270678228, "grad_norm": 0.6845365317624247, "learning_rate": 9.9448499594485e-07, "loss": 0.5704, "step": 28120 }, { "epoch": 0.8210271232956702, "grad_norm": 0.7116630920711736, "learning_rate": 9.94322789943228e-07, "loss": 0.6582, "step": 28121 }, { "epoch": 0.8210563195235175, "grad_norm": 0.7868218594491049, "learning_rate": 9.941605839416059e-07, "loss": 0.6873, "step": 28122 }, { "epoch": 0.8210855157513649, "grad_norm": 0.7584399927983325, "learning_rate": 9.939983779399839e-07, "loss": 0.6242, "step": 28123 }, { "epoch": 0.8211147119792123, "grad_norm": 0.6914836171030329, "learning_rate": 9.938361719383619e-07, "loss": 0.5724, "step": 28124 }, { "epoch": 0.8211439082070596, "grad_norm": 0.7753028289245126, "learning_rate": 9.936739659367397e-07, "loss": 0.6715, "step": 28125 }, { "epoch": 0.821173104434907, "grad_norm": 0.7524571487296029, "learning_rate": 9.935117599351177e-07, "loss": 0.6795, "step": 28126 }, { "epoch": 0.8212023006627543, "grad_norm": 0.6801625155617647, "learning_rate": 9.933495539334955e-07, "loss": 0.5643, "step": 28127 }, { "epoch": 0.8212314968906017, "grad_norm": 0.7207972073460002, "learning_rate": 9.931873479318737e-07, "loss": 0.597, "step": 28128 }, { "epoch": 0.8212606931184491, "grad_norm": 0.7522597367081738, "learning_rate": 9.930251419302515e-07, "loss": 0.6877, "step": 28129 }, { "epoch": 0.8212898893462964, "grad_norm": 0.7801235556207025, "learning_rate": 9.928629359286295e-07, "loss": 0.7453, "step": 28130 }, { "epoch": 0.8213190855741438, "grad_norm": 0.6661581623555263, "learning_rate": 9.927007299270073e-07, "loss": 0.5949, "step": 28131 }, { "epoch": 0.8213482818019912, "grad_norm": 0.7328317976576777, "learning_rate": 9.925385239253853e-07, "loss": 0.6497, "step": 28132 }, { "epoch": 0.8213774780298385, "grad_norm": 0.691865590702921, "learning_rate": 9.923763179237633e-07, "loss": 0.6014, "step": 28133 }, { "epoch": 0.8214066742576859, "grad_norm": 0.7667957941250345, "learning_rate": 9.922141119221413e-07, "loss": 0.6444, "step": 28134 }, { "epoch": 0.8214358704855332, "grad_norm": 0.7327893452231405, "learning_rate": 9.920519059205191e-07, "loss": 0.663, "step": 28135 }, { "epoch": 0.8214650667133806, "grad_norm": 0.7359976898279194, "learning_rate": 9.918896999188971e-07, "loss": 0.6397, "step": 28136 }, { "epoch": 0.821494262941228, "grad_norm": 0.7335053128977447, "learning_rate": 9.91727493917275e-07, "loss": 0.6427, "step": 28137 }, { "epoch": 0.8215234591690753, "grad_norm": 0.6626437596773177, "learning_rate": 9.91565287915653e-07, "loss": 0.5365, "step": 28138 }, { "epoch": 0.8215526553969227, "grad_norm": 0.7863344120832247, "learning_rate": 9.91403081914031e-07, "loss": 0.6814, "step": 28139 }, { "epoch": 0.82158185162477, "grad_norm": 0.7252458760202516, "learning_rate": 9.91240875912409e-07, "loss": 0.636, "step": 28140 }, { "epoch": 0.8216110478526174, "grad_norm": 0.6936403322239062, "learning_rate": 9.910786699107867e-07, "loss": 0.5053, "step": 28141 }, { "epoch": 0.8216402440804648, "grad_norm": 0.7422060206690175, "learning_rate": 9.909164639091647e-07, "loss": 0.661, "step": 28142 }, { "epoch": 0.8216694403083121, "grad_norm": 0.6944634011084662, "learning_rate": 9.907542579075427e-07, "loss": 0.6188, "step": 28143 }, { "epoch": 0.8216986365361595, "grad_norm": 0.6813648396048417, "learning_rate": 9.905920519059205e-07, "loss": 0.5719, "step": 28144 }, { "epoch": 0.8217278327640068, "grad_norm": 0.7057929506005752, "learning_rate": 9.904298459042985e-07, "loss": 0.654, "step": 28145 }, { "epoch": 0.8217570289918542, "grad_norm": 0.7157648737477782, "learning_rate": 9.902676399026763e-07, "loss": 0.6172, "step": 28146 }, { "epoch": 0.8217862252197016, "grad_norm": 0.664459600644576, "learning_rate": 9.901054339010546e-07, "loss": 0.546, "step": 28147 }, { "epoch": 0.8218154214475489, "grad_norm": 0.6844504719692782, "learning_rate": 9.899432278994324e-07, "loss": 0.5823, "step": 28148 }, { "epoch": 0.8218446176753963, "grad_norm": 0.6957157362314352, "learning_rate": 9.897810218978104e-07, "loss": 0.5963, "step": 28149 }, { "epoch": 0.8218738139032437, "grad_norm": 0.7145092557179411, "learning_rate": 9.896188158961882e-07, "loss": 0.6676, "step": 28150 }, { "epoch": 0.821903010131091, "grad_norm": 0.7139400701111744, "learning_rate": 9.894566098945662e-07, "loss": 0.627, "step": 28151 }, { "epoch": 0.8219322063589384, "grad_norm": 0.7329123746736058, "learning_rate": 9.892944038929442e-07, "loss": 0.6591, "step": 28152 }, { "epoch": 0.8219614025867857, "grad_norm": 0.6967712465507458, "learning_rate": 9.891321978913222e-07, "loss": 0.6381, "step": 28153 }, { "epoch": 0.8219905988146331, "grad_norm": 0.6976427878196726, "learning_rate": 9.889699918897e-07, "loss": 0.6211, "step": 28154 }, { "epoch": 0.8220197950424805, "grad_norm": 0.7687712131366797, "learning_rate": 9.88807785888078e-07, "loss": 0.6439, "step": 28155 }, { "epoch": 0.8220489912703278, "grad_norm": 0.7453590076446632, "learning_rate": 9.886455798864558e-07, "loss": 0.6221, "step": 28156 }, { "epoch": 0.8220781874981752, "grad_norm": 0.7021495504946619, "learning_rate": 9.884833738848338e-07, "loss": 0.5665, "step": 28157 }, { "epoch": 0.8221073837260225, "grad_norm": 0.7490337972042924, "learning_rate": 9.883211678832118e-07, "loss": 0.6712, "step": 28158 }, { "epoch": 0.8221365799538699, "grad_norm": 0.7124726326809417, "learning_rate": 9.881589618815898e-07, "loss": 0.6445, "step": 28159 }, { "epoch": 0.8221657761817173, "grad_norm": 0.7010468775116493, "learning_rate": 9.879967558799676e-07, "loss": 0.5937, "step": 28160 }, { "epoch": 0.8221949724095647, "grad_norm": 0.6808636525154147, "learning_rate": 9.878345498783456e-07, "loss": 0.5586, "step": 28161 }, { "epoch": 0.8222241686374121, "grad_norm": 0.7582492754574404, "learning_rate": 9.876723438767236e-07, "loss": 0.6621, "step": 28162 }, { "epoch": 0.8222533648652595, "grad_norm": 0.6790799256934176, "learning_rate": 9.875101378751014e-07, "loss": 0.5679, "step": 28163 }, { "epoch": 0.8222825610931068, "grad_norm": 0.6853662657874345, "learning_rate": 9.873479318734794e-07, "loss": 0.5743, "step": 28164 }, { "epoch": 0.8223117573209542, "grad_norm": 0.6912596181324745, "learning_rate": 9.871857258718572e-07, "loss": 0.5845, "step": 28165 }, { "epoch": 0.8223409535488015, "grad_norm": 0.7339753856103736, "learning_rate": 9.870235198702354e-07, "loss": 0.6678, "step": 28166 }, { "epoch": 0.8223701497766489, "grad_norm": 0.6885154409130683, "learning_rate": 9.868613138686132e-07, "loss": 0.5874, "step": 28167 }, { "epoch": 0.8223993460044963, "grad_norm": 0.6746168668081576, "learning_rate": 9.866991078669912e-07, "loss": 0.5545, "step": 28168 }, { "epoch": 0.8224285422323436, "grad_norm": 0.7602448284358397, "learning_rate": 9.86536901865369e-07, "loss": 0.6704, "step": 28169 }, { "epoch": 0.822457738460191, "grad_norm": 0.7102770203114626, "learning_rate": 9.86374695863747e-07, "loss": 0.5791, "step": 28170 }, { "epoch": 0.8224869346880384, "grad_norm": 0.7045879079925672, "learning_rate": 9.86212489862125e-07, "loss": 0.6529, "step": 28171 }, { "epoch": 0.8225161309158857, "grad_norm": 0.7660181243724498, "learning_rate": 9.86050283860503e-07, "loss": 0.7526, "step": 28172 }, { "epoch": 0.8225453271437331, "grad_norm": 0.6749278198105122, "learning_rate": 9.858880778588808e-07, "loss": 0.5846, "step": 28173 }, { "epoch": 0.8225745233715804, "grad_norm": 0.7526776403668219, "learning_rate": 9.857258718572588e-07, "loss": 0.6774, "step": 28174 }, { "epoch": 0.8226037195994278, "grad_norm": 0.7826704948929508, "learning_rate": 9.855636658556366e-07, "loss": 0.6635, "step": 28175 }, { "epoch": 0.8226329158272752, "grad_norm": 0.7098574521686848, "learning_rate": 9.854014598540146e-07, "loss": 0.6264, "step": 28176 }, { "epoch": 0.8226621120551225, "grad_norm": 0.7579379248430209, "learning_rate": 9.852392538523926e-07, "loss": 0.6789, "step": 28177 }, { "epoch": 0.8226913082829699, "grad_norm": 0.6996371111386755, "learning_rate": 9.850770478507707e-07, "loss": 0.5858, "step": 28178 }, { "epoch": 0.8227205045108172, "grad_norm": 0.7631671314080745, "learning_rate": 9.849148418491484e-07, "loss": 0.713, "step": 28179 }, { "epoch": 0.8227497007386646, "grad_norm": 0.7778980494645318, "learning_rate": 9.847526358475265e-07, "loss": 0.7162, "step": 28180 }, { "epoch": 0.822778896966512, "grad_norm": 0.7308486151475877, "learning_rate": 9.845904298459045e-07, "loss": 0.6778, "step": 28181 }, { "epoch": 0.8228080931943593, "grad_norm": 0.7229100084193039, "learning_rate": 9.844282238442823e-07, "loss": 0.6171, "step": 28182 }, { "epoch": 0.8228372894222067, "grad_norm": 0.7029946795269217, "learning_rate": 9.842660178426603e-07, "loss": 0.6148, "step": 28183 }, { "epoch": 0.822866485650054, "grad_norm": 0.7078955356491253, "learning_rate": 9.84103811841038e-07, "loss": 0.6459, "step": 28184 }, { "epoch": 0.8228956818779014, "grad_norm": 0.7540386321844241, "learning_rate": 9.839416058394163e-07, "loss": 0.6975, "step": 28185 }, { "epoch": 0.8229248781057488, "grad_norm": 0.7130154991325329, "learning_rate": 9.83779399837794e-07, "loss": 0.6548, "step": 28186 }, { "epoch": 0.8229540743335961, "grad_norm": 0.7107196387769771, "learning_rate": 9.83617193836172e-07, "loss": 0.6207, "step": 28187 }, { "epoch": 0.8229832705614435, "grad_norm": 0.7200869608596867, "learning_rate": 9.834549878345499e-07, "loss": 0.6357, "step": 28188 }, { "epoch": 0.8230124667892909, "grad_norm": 0.7194025921213277, "learning_rate": 9.832927818329279e-07, "loss": 0.6477, "step": 28189 }, { "epoch": 0.8230416630171382, "grad_norm": 0.7380552696287498, "learning_rate": 9.831305758313059e-07, "loss": 0.6926, "step": 28190 }, { "epoch": 0.8230708592449856, "grad_norm": 0.7567097837909773, "learning_rate": 9.829683698296839e-07, "loss": 0.6028, "step": 28191 }, { "epoch": 0.8231000554728329, "grad_norm": 0.7627804468267221, "learning_rate": 9.828061638280617e-07, "loss": 0.6131, "step": 28192 }, { "epoch": 0.8231292517006803, "grad_norm": 0.7021671999894752, "learning_rate": 9.826439578264397e-07, "loss": 0.618, "step": 28193 }, { "epoch": 0.8231584479285277, "grad_norm": 0.7446907340570681, "learning_rate": 9.824817518248175e-07, "loss": 0.607, "step": 28194 }, { "epoch": 0.823187644156375, "grad_norm": 0.7213912885289138, "learning_rate": 9.823195458231955e-07, "loss": 0.6021, "step": 28195 }, { "epoch": 0.8232168403842224, "grad_norm": 0.7244177838745028, "learning_rate": 9.821573398215735e-07, "loss": 0.6465, "step": 28196 }, { "epoch": 0.8232460366120697, "grad_norm": 0.7267455755132622, "learning_rate": 9.819951338199513e-07, "loss": 0.5708, "step": 28197 }, { "epoch": 0.8232752328399171, "grad_norm": 0.6800584589366953, "learning_rate": 9.818329278183293e-07, "loss": 0.5972, "step": 28198 }, { "epoch": 0.8233044290677645, "grad_norm": 0.7758818321216165, "learning_rate": 9.816707218167073e-07, "loss": 0.5406, "step": 28199 }, { "epoch": 0.8233336252956118, "grad_norm": 0.7721306395132711, "learning_rate": 9.815085158150853e-07, "loss": 0.7143, "step": 28200 }, { "epoch": 0.8233628215234592, "grad_norm": 0.6863863493278064, "learning_rate": 9.813463098134631e-07, "loss": 0.5616, "step": 28201 }, { "epoch": 0.8233920177513065, "grad_norm": 0.7296511281574881, "learning_rate": 9.811841038118411e-07, "loss": 0.6547, "step": 28202 }, { "epoch": 0.8234212139791539, "grad_norm": 0.7192402482606741, "learning_rate": 9.81021897810219e-07, "loss": 0.6214, "step": 28203 }, { "epoch": 0.8234504102070013, "grad_norm": 0.7239300389511554, "learning_rate": 9.808596918085971e-07, "loss": 0.6357, "step": 28204 }, { "epoch": 0.8234796064348486, "grad_norm": 0.72012911583215, "learning_rate": 9.80697485806975e-07, "loss": 0.6616, "step": 28205 }, { "epoch": 0.823508802662696, "grad_norm": 0.7050398541745542, "learning_rate": 9.80535279805353e-07, "loss": 0.5907, "step": 28206 }, { "epoch": 0.8235379988905434, "grad_norm": 0.733179186790738, "learning_rate": 9.803730738037307e-07, "loss": 0.6315, "step": 28207 }, { "epoch": 0.8235671951183907, "grad_norm": 0.7616864648180814, "learning_rate": 9.802108678021087e-07, "loss": 0.663, "step": 28208 }, { "epoch": 0.8235963913462381, "grad_norm": 0.6775579967192491, "learning_rate": 9.800486618004867e-07, "loss": 0.5758, "step": 28209 }, { "epoch": 0.8236255875740854, "grad_norm": 0.6983198323918356, "learning_rate": 9.798864557988648e-07, "loss": 0.563, "step": 28210 }, { "epoch": 0.8236547838019328, "grad_norm": 1.0099727801209228, "learning_rate": 9.797242497972425e-07, "loss": 0.7061, "step": 28211 }, { "epoch": 0.8236839800297802, "grad_norm": 0.67098886917929, "learning_rate": 9.795620437956206e-07, "loss": 0.5402, "step": 28212 }, { "epoch": 0.8237131762576275, "grad_norm": 0.7446944262020326, "learning_rate": 9.793998377939986e-07, "loss": 0.6524, "step": 28213 }, { "epoch": 0.8237423724854749, "grad_norm": 0.7404269505550503, "learning_rate": 9.792376317923764e-07, "loss": 0.6442, "step": 28214 }, { "epoch": 0.8237715687133222, "grad_norm": 0.7431430791165238, "learning_rate": 9.790754257907544e-07, "loss": 0.6372, "step": 28215 }, { "epoch": 0.8238007649411696, "grad_norm": 0.7509993378905935, "learning_rate": 9.789132197891322e-07, "loss": 0.6759, "step": 28216 }, { "epoch": 0.823829961169017, "grad_norm": 0.7137126952362062, "learning_rate": 9.787510137875102e-07, "loss": 0.576, "step": 28217 }, { "epoch": 0.8238591573968643, "grad_norm": 0.699821924374216, "learning_rate": 9.785888077858882e-07, "loss": 0.6455, "step": 28218 }, { "epoch": 0.8238883536247117, "grad_norm": 0.7739950748702209, "learning_rate": 9.784266017842662e-07, "loss": 0.6377, "step": 28219 }, { "epoch": 0.823917549852559, "grad_norm": 0.6975633425137794, "learning_rate": 9.78264395782644e-07, "loss": 0.6052, "step": 28220 }, { "epoch": 0.8239467460804064, "grad_norm": 0.68772553776848, "learning_rate": 9.78102189781022e-07, "loss": 0.6328, "step": 28221 }, { "epoch": 0.8239759423082538, "grad_norm": 0.6502541546797851, "learning_rate": 9.779399837793998e-07, "loss": 0.555, "step": 28222 }, { "epoch": 0.8240051385361011, "grad_norm": 0.7133310502992815, "learning_rate": 9.77777777777778e-07, "loss": 0.6012, "step": 28223 }, { "epoch": 0.8240343347639485, "grad_norm": 0.6673973368166742, "learning_rate": 9.776155717761558e-07, "loss": 0.5739, "step": 28224 }, { "epoch": 0.8240635309917959, "grad_norm": 0.6871208765567752, "learning_rate": 9.774533657745338e-07, "loss": 0.6274, "step": 28225 }, { "epoch": 0.8240927272196432, "grad_norm": 0.7073135031356443, "learning_rate": 9.772911597729116e-07, "loss": 0.5942, "step": 28226 }, { "epoch": 0.8241219234474906, "grad_norm": 0.7546545309392928, "learning_rate": 9.771289537712896e-07, "loss": 0.7076, "step": 28227 }, { "epoch": 0.8241511196753379, "grad_norm": 0.6943850679765503, "learning_rate": 9.769667477696676e-07, "loss": 0.6346, "step": 28228 }, { "epoch": 0.8241803159031853, "grad_norm": 0.6836576104855187, "learning_rate": 9.768045417680456e-07, "loss": 0.625, "step": 28229 }, { "epoch": 0.8242095121310327, "grad_norm": 0.7617941011679584, "learning_rate": 9.766423357664234e-07, "loss": 0.7106, "step": 28230 }, { "epoch": 0.82423870835888, "grad_norm": 0.7226303625094556, "learning_rate": 9.764801297648014e-07, "loss": 0.6663, "step": 28231 }, { "epoch": 0.8242679045867274, "grad_norm": 0.6824471636661165, "learning_rate": 9.763179237631794e-07, "loss": 0.6084, "step": 28232 }, { "epoch": 0.8242971008145747, "grad_norm": 0.7102081935666026, "learning_rate": 9.761557177615572e-07, "loss": 0.6307, "step": 28233 }, { "epoch": 0.8243262970424221, "grad_norm": 0.7363122214686548, "learning_rate": 9.759935117599352e-07, "loss": 0.6307, "step": 28234 }, { "epoch": 0.8243554932702695, "grad_norm": 0.7151049395869654, "learning_rate": 9.75831305758313e-07, "loss": 0.6682, "step": 28235 }, { "epoch": 0.8243846894981168, "grad_norm": 0.7481144561786156, "learning_rate": 9.75669099756691e-07, "loss": 0.6669, "step": 28236 }, { "epoch": 0.8244138857259642, "grad_norm": 0.6587870296267578, "learning_rate": 9.75506893755069e-07, "loss": 0.5691, "step": 28237 }, { "epoch": 0.8244430819538116, "grad_norm": 0.7497880306891634, "learning_rate": 9.75344687753447e-07, "loss": 0.6724, "step": 28238 }, { "epoch": 0.8244722781816589, "grad_norm": 0.732457929451686, "learning_rate": 9.751824817518248e-07, "loss": 0.6497, "step": 28239 }, { "epoch": 0.8245014744095063, "grad_norm": 0.7307237933796236, "learning_rate": 9.750202757502028e-07, "loss": 0.6915, "step": 28240 }, { "epoch": 0.8245306706373536, "grad_norm": 0.6819570399038181, "learning_rate": 9.748580697485806e-07, "loss": 0.5845, "step": 28241 }, { "epoch": 0.824559866865201, "grad_norm": 0.7469341126108267, "learning_rate": 9.746958637469589e-07, "loss": 0.6487, "step": 28242 }, { "epoch": 0.8245890630930484, "grad_norm": 0.6833050677966693, "learning_rate": 9.745336577453366e-07, "loss": 0.542, "step": 28243 }, { "epoch": 0.8246182593208957, "grad_norm": 0.6907018252781658, "learning_rate": 9.743714517437147e-07, "loss": 0.5883, "step": 28244 }, { "epoch": 0.8246474555487431, "grad_norm": 0.7432249657402662, "learning_rate": 9.742092457420924e-07, "loss": 0.6482, "step": 28245 }, { "epoch": 0.8246766517765904, "grad_norm": 0.7509105828618994, "learning_rate": 9.740470397404705e-07, "loss": 0.7051, "step": 28246 }, { "epoch": 0.8247058480044378, "grad_norm": 0.7143850372820206, "learning_rate": 9.738848337388485e-07, "loss": 0.556, "step": 28247 }, { "epoch": 0.8247350442322852, "grad_norm": 0.6955906784958377, "learning_rate": 9.737226277372265e-07, "loss": 0.601, "step": 28248 }, { "epoch": 0.8247642404601325, "grad_norm": 0.8250975859218795, "learning_rate": 9.735604217356043e-07, "loss": 0.5851, "step": 28249 }, { "epoch": 0.8247934366879799, "grad_norm": 0.7108742174852557, "learning_rate": 9.733982157339823e-07, "loss": 0.585, "step": 28250 }, { "epoch": 0.8248226329158272, "grad_norm": 0.7116543196013475, "learning_rate": 9.732360097323603e-07, "loss": 0.5698, "step": 28251 }, { "epoch": 0.8248518291436746, "grad_norm": 0.7340414744528946, "learning_rate": 9.73073803730738e-07, "loss": 0.6465, "step": 28252 }, { "epoch": 0.824881025371522, "grad_norm": 0.6845603877801201, "learning_rate": 9.72911597729116e-07, "loss": 0.5844, "step": 28253 }, { "epoch": 0.8249102215993693, "grad_norm": 0.6531785885306189, "learning_rate": 9.727493917274939e-07, "loss": 0.5559, "step": 28254 }, { "epoch": 0.8249394178272167, "grad_norm": 0.7052894738624867, "learning_rate": 9.725871857258719e-07, "loss": 0.6303, "step": 28255 }, { "epoch": 0.824968614055064, "grad_norm": 0.7012702630370925, "learning_rate": 9.724249797242499e-07, "loss": 0.6156, "step": 28256 }, { "epoch": 0.8249978102829114, "grad_norm": 0.7202496972754776, "learning_rate": 9.72262773722628e-07, "loss": 0.585, "step": 28257 }, { "epoch": 0.8250270065107588, "grad_norm": 0.7580742452411188, "learning_rate": 9.721005677210057e-07, "loss": 0.7047, "step": 28258 }, { "epoch": 0.8250562027386061, "grad_norm": 0.7412944225481573, "learning_rate": 9.719383617193837e-07, "loss": 0.5976, "step": 28259 }, { "epoch": 0.8250853989664535, "grad_norm": 0.8043462216941544, "learning_rate": 9.717761557177615e-07, "loss": 0.783, "step": 28260 }, { "epoch": 0.8251145951943009, "grad_norm": 0.7768042655656588, "learning_rate": 9.716139497161397e-07, "loss": 0.6945, "step": 28261 }, { "epoch": 0.8251437914221482, "grad_norm": 0.8231445144551355, "learning_rate": 9.714517437145175e-07, "loss": 0.7285, "step": 28262 }, { "epoch": 0.8251729876499956, "grad_norm": 0.7900360403605262, "learning_rate": 9.712895377128955e-07, "loss": 0.7555, "step": 28263 }, { "epoch": 0.8252021838778429, "grad_norm": 0.6962163457335443, "learning_rate": 9.711273317112733e-07, "loss": 0.6446, "step": 28264 }, { "epoch": 0.8252313801056903, "grad_norm": 0.7298687198679684, "learning_rate": 9.709651257096513e-07, "loss": 0.6286, "step": 28265 }, { "epoch": 0.8252605763335377, "grad_norm": 0.7359098461755517, "learning_rate": 9.708029197080293e-07, "loss": 0.7076, "step": 28266 }, { "epoch": 0.825289772561385, "grad_norm": 0.7299155390110207, "learning_rate": 9.706407137064073e-07, "loss": 0.6653, "step": 28267 }, { "epoch": 0.8253189687892324, "grad_norm": 0.7279178126738516, "learning_rate": 9.704785077047851e-07, "loss": 0.6367, "step": 28268 }, { "epoch": 0.8253481650170797, "grad_norm": 0.7588177342076795, "learning_rate": 9.703163017031631e-07, "loss": 0.6574, "step": 28269 }, { "epoch": 0.8253773612449271, "grad_norm": 0.7561563053031035, "learning_rate": 9.701540957015411e-07, "loss": 0.6838, "step": 28270 }, { "epoch": 0.8254065574727745, "grad_norm": 0.7201638732122955, "learning_rate": 9.69991889699919e-07, "loss": 0.6489, "step": 28271 }, { "epoch": 0.8254357537006218, "grad_norm": 0.7192854523951203, "learning_rate": 9.69829683698297e-07, "loss": 0.6442, "step": 28272 }, { "epoch": 0.8254649499284692, "grad_norm": 0.687766495337282, "learning_rate": 9.696674776966747e-07, "loss": 0.5711, "step": 28273 }, { "epoch": 0.8254941461563166, "grad_norm": 0.7148126161512189, "learning_rate": 9.695052716950527e-07, "loss": 0.6429, "step": 28274 }, { "epoch": 0.8255233423841639, "grad_norm": 0.7345137958733062, "learning_rate": 9.693430656934307e-07, "loss": 0.607, "step": 28275 }, { "epoch": 0.8255525386120113, "grad_norm": 0.7256111800482289, "learning_rate": 9.691808596918088e-07, "loss": 0.6873, "step": 28276 }, { "epoch": 0.8255817348398586, "grad_norm": 0.770789675041391, "learning_rate": 9.690186536901865e-07, "loss": 0.7021, "step": 28277 }, { "epoch": 0.825610931067706, "grad_norm": 0.7389270919088269, "learning_rate": 9.688564476885646e-07, "loss": 0.674, "step": 28278 }, { "epoch": 0.8256401272955534, "grad_norm": 0.7324686558553217, "learning_rate": 9.686942416869423e-07, "loss": 0.6431, "step": 28279 }, { "epoch": 0.8256693235234007, "grad_norm": 0.6983063472822552, "learning_rate": 9.685320356853206e-07, "loss": 0.6022, "step": 28280 }, { "epoch": 0.8256985197512481, "grad_norm": 0.7071312045778477, "learning_rate": 9.683698296836984e-07, "loss": 0.6348, "step": 28281 }, { "epoch": 0.8257277159790956, "grad_norm": 0.6934446221837661, "learning_rate": 9.682076236820764e-07, "loss": 0.5981, "step": 28282 }, { "epoch": 0.8257569122069429, "grad_norm": 0.7072330728890248, "learning_rate": 9.680454176804542e-07, "loss": 0.6201, "step": 28283 }, { "epoch": 0.8257861084347903, "grad_norm": 0.6867711036127552, "learning_rate": 9.678832116788322e-07, "loss": 0.5651, "step": 28284 }, { "epoch": 0.8258153046626376, "grad_norm": 0.748506117168427, "learning_rate": 9.677210056772102e-07, "loss": 0.7027, "step": 28285 }, { "epoch": 0.825844500890485, "grad_norm": 0.7438564537353087, "learning_rate": 9.67558799675588e-07, "loss": 0.6268, "step": 28286 }, { "epoch": 0.8258736971183324, "grad_norm": 0.7068237045755518, "learning_rate": 9.67396593673966e-07, "loss": 0.6397, "step": 28287 }, { "epoch": 0.8259028933461797, "grad_norm": 0.7973504993152043, "learning_rate": 9.67234387672344e-07, "loss": 0.7483, "step": 28288 }, { "epoch": 0.8259320895740271, "grad_norm": 0.7966385110942774, "learning_rate": 9.67072181670722e-07, "loss": 0.6065, "step": 28289 }, { "epoch": 0.8259612858018744, "grad_norm": 0.7586588036368823, "learning_rate": 9.669099756690998e-07, "loss": 0.6801, "step": 28290 }, { "epoch": 0.8259904820297218, "grad_norm": 0.7699792568205737, "learning_rate": 9.667477696674778e-07, "loss": 0.7053, "step": 28291 }, { "epoch": 0.8260196782575692, "grad_norm": 0.7242907766340335, "learning_rate": 9.665855636658556e-07, "loss": 0.6394, "step": 28292 }, { "epoch": 0.8260488744854165, "grad_norm": 0.727078872647145, "learning_rate": 9.664233576642336e-07, "loss": 0.6904, "step": 28293 }, { "epoch": 0.8260780707132639, "grad_norm": 0.6889727009672791, "learning_rate": 9.662611516626116e-07, "loss": 0.5937, "step": 28294 }, { "epoch": 0.8261072669411113, "grad_norm": 0.718088971776877, "learning_rate": 9.660989456609896e-07, "loss": 0.6125, "step": 28295 }, { "epoch": 0.8261364631689586, "grad_norm": 0.7390014559657856, "learning_rate": 9.659367396593674e-07, "loss": 0.7079, "step": 28296 }, { "epoch": 0.826165659396806, "grad_norm": 0.747017106780888, "learning_rate": 9.657745336577454e-07, "loss": 0.6401, "step": 28297 }, { "epoch": 0.8261948556246533, "grad_norm": 0.7199053290298113, "learning_rate": 9.656123276561234e-07, "loss": 0.6534, "step": 28298 }, { "epoch": 0.8262240518525007, "grad_norm": 0.6772306877962175, "learning_rate": 9.654501216545014e-07, "loss": 0.5946, "step": 28299 }, { "epoch": 0.8262532480803481, "grad_norm": 0.8586496344889272, "learning_rate": 9.652879156528792e-07, "loss": 0.6542, "step": 28300 }, { "epoch": 0.8262824443081954, "grad_norm": 0.7456763383702983, "learning_rate": 9.651257096512572e-07, "loss": 0.6716, "step": 28301 }, { "epoch": 0.8263116405360428, "grad_norm": 0.7435864719822957, "learning_rate": 9.64963503649635e-07, "loss": 0.6602, "step": 28302 }, { "epoch": 0.8263408367638901, "grad_norm": 0.7153672034432581, "learning_rate": 9.64801297648013e-07, "loss": 0.5894, "step": 28303 }, { "epoch": 0.8263700329917375, "grad_norm": 0.7163397139477394, "learning_rate": 9.64639091646391e-07, "loss": 0.6108, "step": 28304 }, { "epoch": 0.8263992292195849, "grad_norm": 0.6358327883917121, "learning_rate": 9.644768856447688e-07, "loss": 0.5123, "step": 28305 }, { "epoch": 0.8264284254474322, "grad_norm": 0.6968778124533539, "learning_rate": 9.643146796431468e-07, "loss": 0.6278, "step": 28306 }, { "epoch": 0.8264576216752796, "grad_norm": 0.7118037717583707, "learning_rate": 9.641524736415248e-07, "loss": 0.6131, "step": 28307 }, { "epoch": 0.826486817903127, "grad_norm": 0.7023434605686762, "learning_rate": 9.639902676399029e-07, "loss": 0.6424, "step": 28308 }, { "epoch": 0.8265160141309743, "grad_norm": 0.654575983975357, "learning_rate": 9.638280616382806e-07, "loss": 0.5623, "step": 28309 }, { "epoch": 0.8265452103588217, "grad_norm": 0.7450792252479015, "learning_rate": 9.636658556366587e-07, "loss": 0.6265, "step": 28310 }, { "epoch": 0.826574406586669, "grad_norm": 0.6975178183596942, "learning_rate": 9.635036496350364e-07, "loss": 0.5917, "step": 28311 }, { "epoch": 0.8266036028145164, "grad_norm": 0.7134805724984269, "learning_rate": 9.633414436334145e-07, "loss": 0.5862, "step": 28312 }, { "epoch": 0.8266327990423638, "grad_norm": 0.7161782377873124, "learning_rate": 9.631792376317925e-07, "loss": 0.5763, "step": 28313 }, { "epoch": 0.8266619952702111, "grad_norm": 0.749823352376181, "learning_rate": 9.630170316301705e-07, "loss": 0.6975, "step": 28314 }, { "epoch": 0.8266911914980585, "grad_norm": 0.6731431594632771, "learning_rate": 9.628548256285483e-07, "loss": 0.5538, "step": 28315 }, { "epoch": 0.8267203877259058, "grad_norm": 0.7257628137947677, "learning_rate": 9.626926196269263e-07, "loss": 0.6618, "step": 28316 }, { "epoch": 0.8267495839537532, "grad_norm": 0.7184533746290939, "learning_rate": 9.625304136253043e-07, "loss": 0.6215, "step": 28317 }, { "epoch": 0.8267787801816006, "grad_norm": 0.7346863165821857, "learning_rate": 9.623682076236823e-07, "loss": 0.5782, "step": 28318 }, { "epoch": 0.8268079764094479, "grad_norm": 0.7040747417642738, "learning_rate": 9.6220600162206e-07, "loss": 0.5557, "step": 28319 }, { "epoch": 0.8268371726372953, "grad_norm": 0.7526170583258356, "learning_rate": 9.62043795620438e-07, "loss": 0.6439, "step": 28320 }, { "epoch": 0.8268663688651426, "grad_norm": 0.7142142487023272, "learning_rate": 9.618815896188159e-07, "loss": 0.5902, "step": 28321 }, { "epoch": 0.82689556509299, "grad_norm": 0.7680550369820737, "learning_rate": 9.617193836171939e-07, "loss": 0.6498, "step": 28322 }, { "epoch": 0.8269247613208374, "grad_norm": 0.7835551717320299, "learning_rate": 9.61557177615572e-07, "loss": 0.6941, "step": 28323 }, { "epoch": 0.8269539575486847, "grad_norm": 0.9808357343544164, "learning_rate": 9.613949716139497e-07, "loss": 0.7643, "step": 28324 }, { "epoch": 0.8269831537765321, "grad_norm": 0.6983565121997525, "learning_rate": 9.612327656123277e-07, "loss": 0.5815, "step": 28325 }, { "epoch": 0.8270123500043794, "grad_norm": 0.7677143017158468, "learning_rate": 9.610705596107057e-07, "loss": 0.722, "step": 28326 }, { "epoch": 0.8270415462322268, "grad_norm": 0.6845179082407921, "learning_rate": 9.609083536090837e-07, "loss": 0.5692, "step": 28327 }, { "epoch": 0.8270707424600742, "grad_norm": 0.698952978973864, "learning_rate": 9.607461476074615e-07, "loss": 0.5976, "step": 28328 }, { "epoch": 0.8270999386879215, "grad_norm": 0.7370108852940166, "learning_rate": 9.605839416058395e-07, "loss": 0.6239, "step": 28329 }, { "epoch": 0.8271291349157689, "grad_norm": 0.6911041478784917, "learning_rate": 9.604217356042173e-07, "loss": 0.5658, "step": 28330 }, { "epoch": 0.8271583311436163, "grad_norm": 0.8188970802854585, "learning_rate": 9.602595296025955e-07, "loss": 0.7245, "step": 28331 }, { "epoch": 0.8271875273714636, "grad_norm": 0.7666104001298236, "learning_rate": 9.600973236009733e-07, "loss": 0.6673, "step": 28332 }, { "epoch": 0.827216723599311, "grad_norm": 0.7522745804107692, "learning_rate": 9.599351175993513e-07, "loss": 0.6771, "step": 28333 }, { "epoch": 0.8272459198271583, "grad_norm": 0.7257055928399802, "learning_rate": 9.597729115977291e-07, "loss": 0.6201, "step": 28334 }, { "epoch": 0.8272751160550057, "grad_norm": 0.728540776355369, "learning_rate": 9.596107055961071e-07, "loss": 0.6778, "step": 28335 }, { "epoch": 0.8273043122828531, "grad_norm": 0.7668429766424355, "learning_rate": 9.594484995944851e-07, "loss": 0.7339, "step": 28336 }, { "epoch": 0.8273335085107004, "grad_norm": 0.7317731448664214, "learning_rate": 9.592862935928631e-07, "loss": 0.7145, "step": 28337 }, { "epoch": 0.8273627047385478, "grad_norm": 0.6826769692199945, "learning_rate": 9.59124087591241e-07, "loss": 0.5712, "step": 28338 }, { "epoch": 0.8273919009663951, "grad_norm": 0.7261201987956174, "learning_rate": 9.58961881589619e-07, "loss": 0.6754, "step": 28339 }, { "epoch": 0.8274210971942425, "grad_norm": 0.7403876360380893, "learning_rate": 9.587996755879967e-07, "loss": 0.6483, "step": 28340 }, { "epoch": 0.8274502934220899, "grad_norm": 0.6712629701694041, "learning_rate": 9.586374695863747e-07, "loss": 0.5788, "step": 28341 }, { "epoch": 0.8274794896499372, "grad_norm": 0.7250842129783629, "learning_rate": 9.584752635847528e-07, "loss": 0.6442, "step": 28342 }, { "epoch": 0.8275086858777846, "grad_norm": 0.7230212616566704, "learning_rate": 9.583130575831305e-07, "loss": 0.6575, "step": 28343 }, { "epoch": 0.827537882105632, "grad_norm": 0.6963980167840975, "learning_rate": 9.581508515815086e-07, "loss": 0.6405, "step": 28344 }, { "epoch": 0.8275670783334793, "grad_norm": 0.6920184921142835, "learning_rate": 9.579886455798866e-07, "loss": 0.5856, "step": 28345 }, { "epoch": 0.8275962745613267, "grad_norm": 0.7162251715275011, "learning_rate": 9.578264395782646e-07, "loss": 0.6064, "step": 28346 }, { "epoch": 0.827625470789174, "grad_norm": 0.718063451343144, "learning_rate": 9.576642335766424e-07, "loss": 0.6383, "step": 28347 }, { "epoch": 0.8276546670170214, "grad_norm": 0.6743078558411542, "learning_rate": 9.575020275750204e-07, "loss": 0.5948, "step": 28348 }, { "epoch": 0.8276838632448688, "grad_norm": 0.7289784657445967, "learning_rate": 9.573398215733982e-07, "loss": 0.6472, "step": 28349 }, { "epoch": 0.8277130594727161, "grad_norm": 0.7271039282786109, "learning_rate": 9.571776155717764e-07, "loss": 0.6074, "step": 28350 }, { "epoch": 0.8277422557005635, "grad_norm": 0.712197843198028, "learning_rate": 9.570154095701542e-07, "loss": 0.6129, "step": 28351 }, { "epoch": 0.8277714519284108, "grad_norm": 0.7401979022324574, "learning_rate": 9.568532035685322e-07, "loss": 0.6161, "step": 28352 }, { "epoch": 0.8278006481562582, "grad_norm": 0.7953750287522995, "learning_rate": 9.5669099756691e-07, "loss": 0.7173, "step": 28353 }, { "epoch": 0.8278298443841056, "grad_norm": 0.7308865131763169, "learning_rate": 9.56528791565288e-07, "loss": 0.6845, "step": 28354 }, { "epoch": 0.8278590406119529, "grad_norm": 0.77372472131614, "learning_rate": 9.56366585563666e-07, "loss": 0.6178, "step": 28355 }, { "epoch": 0.8278882368398003, "grad_norm": 0.7245683310620292, "learning_rate": 9.56204379562044e-07, "loss": 0.608, "step": 28356 }, { "epoch": 0.8279174330676476, "grad_norm": 0.8076531882447968, "learning_rate": 9.560421735604218e-07, "loss": 0.7722, "step": 28357 }, { "epoch": 0.827946629295495, "grad_norm": 0.7411459474332337, "learning_rate": 9.558799675587998e-07, "loss": 0.6746, "step": 28358 }, { "epoch": 0.8279758255233424, "grad_norm": 0.6779177447415009, "learning_rate": 9.557177615571776e-07, "loss": 0.5532, "step": 28359 }, { "epoch": 0.8280050217511897, "grad_norm": 0.7043356945768162, "learning_rate": 9.555555555555556e-07, "loss": 0.6405, "step": 28360 }, { "epoch": 0.8280342179790371, "grad_norm": 0.7155765590943923, "learning_rate": 9.553933495539336e-07, "loss": 0.5983, "step": 28361 }, { "epoch": 0.8280634142068845, "grad_norm": 0.7893942195102494, "learning_rate": 9.552311435523114e-07, "loss": 0.7258, "step": 28362 }, { "epoch": 0.8280926104347318, "grad_norm": 0.7474512170379497, "learning_rate": 9.550689375506894e-07, "loss": 0.6249, "step": 28363 }, { "epoch": 0.8281218066625792, "grad_norm": 0.7040569982113306, "learning_rate": 9.549067315490674e-07, "loss": 0.5951, "step": 28364 }, { "epoch": 0.8281510028904265, "grad_norm": 0.712088347297971, "learning_rate": 9.547445255474454e-07, "loss": 0.618, "step": 28365 }, { "epoch": 0.8281801991182739, "grad_norm": 0.693297066874267, "learning_rate": 9.545823195458232e-07, "loss": 0.5932, "step": 28366 }, { "epoch": 0.8282093953461213, "grad_norm": 0.7298052707621232, "learning_rate": 9.544201135442012e-07, "loss": 0.6627, "step": 28367 }, { "epoch": 0.8282385915739686, "grad_norm": 0.6763350249848527, "learning_rate": 9.54257907542579e-07, "loss": 0.5757, "step": 28368 }, { "epoch": 0.828267787801816, "grad_norm": 0.7169880351395221, "learning_rate": 9.540957015409572e-07, "loss": 0.6262, "step": 28369 }, { "epoch": 0.8282969840296633, "grad_norm": 0.7038270666183903, "learning_rate": 9.53933495539335e-07, "loss": 0.6245, "step": 28370 }, { "epoch": 0.8283261802575107, "grad_norm": 0.7054102408380594, "learning_rate": 9.53771289537713e-07, "loss": 0.6151, "step": 28371 }, { "epoch": 0.8283553764853581, "grad_norm": 0.7221604686848077, "learning_rate": 9.536090835360908e-07, "loss": 0.6436, "step": 28372 }, { "epoch": 0.8283845727132054, "grad_norm": 0.7485054779189579, "learning_rate": 9.534468775344688e-07, "loss": 0.6897, "step": 28373 }, { "epoch": 0.8284137689410528, "grad_norm": 0.7338651846531942, "learning_rate": 9.532846715328469e-07, "loss": 0.6954, "step": 28374 }, { "epoch": 0.8284429651689001, "grad_norm": 0.7514103169365887, "learning_rate": 9.531224655312248e-07, "loss": 0.7414, "step": 28375 }, { "epoch": 0.8284721613967475, "grad_norm": 0.7470523729868557, "learning_rate": 9.529602595296027e-07, "loss": 0.6834, "step": 28376 }, { "epoch": 0.8285013576245949, "grad_norm": 0.7656271758512769, "learning_rate": 9.527980535279806e-07, "loss": 0.7302, "step": 28377 }, { "epoch": 0.8285305538524422, "grad_norm": 0.6877100781340227, "learning_rate": 9.526358475263585e-07, "loss": 0.5925, "step": 28378 }, { "epoch": 0.8285597500802896, "grad_norm": 0.7118772641898976, "learning_rate": 9.524736415247366e-07, "loss": 0.6562, "step": 28379 }, { "epoch": 0.828588946308137, "grad_norm": 0.7395205916433375, "learning_rate": 9.523114355231145e-07, "loss": 0.7171, "step": 28380 }, { "epoch": 0.8286181425359843, "grad_norm": 1.017925287142828, "learning_rate": 9.521492295214924e-07, "loss": 0.7592, "step": 28381 }, { "epoch": 0.8286473387638317, "grad_norm": 0.7082961819551796, "learning_rate": 9.519870235198703e-07, "loss": 0.5743, "step": 28382 }, { "epoch": 0.828676534991679, "grad_norm": 0.783291982534589, "learning_rate": 9.518248175182483e-07, "loss": 0.6766, "step": 28383 }, { "epoch": 0.8287057312195264, "grad_norm": 0.7661955801520844, "learning_rate": 9.516626115166262e-07, "loss": 0.6932, "step": 28384 }, { "epoch": 0.8287349274473738, "grad_norm": 0.7707468018954452, "learning_rate": 9.515004055150042e-07, "loss": 0.6593, "step": 28385 }, { "epoch": 0.8287641236752211, "grad_norm": 1.2015944382467454, "learning_rate": 9.513381995133821e-07, "loss": 0.6499, "step": 28386 }, { "epoch": 0.8287933199030685, "grad_norm": 0.7341715382873205, "learning_rate": 9.5117599351176e-07, "loss": 0.6453, "step": 28387 }, { "epoch": 0.8288225161309158, "grad_norm": 0.7426671937783574, "learning_rate": 9.51013787510138e-07, "loss": 0.6905, "step": 28388 }, { "epoch": 0.8288517123587632, "grad_norm": 0.7844876638884246, "learning_rate": 9.508515815085159e-07, "loss": 0.7121, "step": 28389 }, { "epoch": 0.8288809085866106, "grad_norm": 0.7156271054780187, "learning_rate": 9.506893755068938e-07, "loss": 0.5953, "step": 28390 }, { "epoch": 0.8289101048144579, "grad_norm": 0.7317408917504394, "learning_rate": 9.505271695052717e-07, "loss": 0.6784, "step": 28391 }, { "epoch": 0.8289393010423053, "grad_norm": 0.6844641008070023, "learning_rate": 9.503649635036496e-07, "loss": 0.5665, "step": 28392 }, { "epoch": 0.8289684972701526, "grad_norm": 0.7206386960668668, "learning_rate": 9.502027575020277e-07, "loss": 0.6334, "step": 28393 }, { "epoch": 0.828997693498, "grad_norm": 0.7372985371992724, "learning_rate": 9.500405515004056e-07, "loss": 0.6696, "step": 28394 }, { "epoch": 0.8290268897258474, "grad_norm": 0.6956080979459668, "learning_rate": 9.498783454987835e-07, "loss": 0.5981, "step": 28395 }, { "epoch": 0.8290560859536947, "grad_norm": 0.6967700578201204, "learning_rate": 9.497161394971614e-07, "loss": 0.6515, "step": 28396 }, { "epoch": 0.8290852821815421, "grad_norm": 0.7618081583519579, "learning_rate": 9.495539334955395e-07, "loss": 0.6753, "step": 28397 }, { "epoch": 0.8291144784093895, "grad_norm": 0.7061479674221689, "learning_rate": 9.493917274939174e-07, "loss": 0.6095, "step": 28398 }, { "epoch": 0.8291436746372368, "grad_norm": 0.6574539096217556, "learning_rate": 9.492295214922953e-07, "loss": 0.536, "step": 28399 }, { "epoch": 0.8291728708650842, "grad_norm": 0.6949099713756423, "learning_rate": 9.490673154906732e-07, "loss": 0.6127, "step": 28400 }, { "epoch": 0.8292020670929315, "grad_norm": 0.735361483430718, "learning_rate": 9.489051094890511e-07, "loss": 0.6716, "step": 28401 }, { "epoch": 0.829231263320779, "grad_norm": 0.7188036378794769, "learning_rate": 9.487429034874291e-07, "loss": 0.6474, "step": 28402 }, { "epoch": 0.8292604595486264, "grad_norm": 0.6822884787048905, "learning_rate": 9.48580697485807e-07, "loss": 0.5948, "step": 28403 }, { "epoch": 0.8292896557764737, "grad_norm": 0.7620969482726849, "learning_rate": 9.48418491484185e-07, "loss": 0.7179, "step": 28404 }, { "epoch": 0.8293188520043211, "grad_norm": 0.7058134253102746, "learning_rate": 9.48256285482563e-07, "loss": 0.5834, "step": 28405 }, { "epoch": 0.8293480482321685, "grad_norm": 0.7550319121410184, "learning_rate": 9.480940794809408e-07, "loss": 0.6476, "step": 28406 }, { "epoch": 0.8293772444600158, "grad_norm": 0.7214508936089217, "learning_rate": 9.479318734793189e-07, "loss": 0.5679, "step": 28407 }, { "epoch": 0.8294064406878632, "grad_norm": 0.7749938836384986, "learning_rate": 9.477696674776968e-07, "loss": 0.6858, "step": 28408 }, { "epoch": 0.8294356369157105, "grad_norm": 0.7200438580526036, "learning_rate": 9.476074614760747e-07, "loss": 0.6444, "step": 28409 }, { "epoch": 0.8294648331435579, "grad_norm": 0.7336293417471041, "learning_rate": 9.474452554744526e-07, "loss": 0.6503, "step": 28410 }, { "epoch": 0.8294940293714053, "grad_norm": 0.701973410293618, "learning_rate": 9.472830494728305e-07, "loss": 0.6319, "step": 28411 }, { "epoch": 0.8295232255992526, "grad_norm": 0.6997194274869981, "learning_rate": 9.471208434712086e-07, "loss": 0.5888, "step": 28412 }, { "epoch": 0.8295524218271, "grad_norm": 0.6748155823852191, "learning_rate": 9.469586374695865e-07, "loss": 0.5519, "step": 28413 }, { "epoch": 0.8295816180549473, "grad_norm": 0.7012470974426483, "learning_rate": 9.467964314679644e-07, "loss": 0.6353, "step": 28414 }, { "epoch": 0.8296108142827947, "grad_norm": 0.7172025034048268, "learning_rate": 9.466342254663423e-07, "loss": 0.6254, "step": 28415 }, { "epoch": 0.8296400105106421, "grad_norm": 0.7470083175335657, "learning_rate": 9.464720194647204e-07, "loss": 0.7343, "step": 28416 }, { "epoch": 0.8296692067384894, "grad_norm": 0.7456885817504461, "learning_rate": 9.463098134630983e-07, "loss": 0.6753, "step": 28417 }, { "epoch": 0.8296984029663368, "grad_norm": 0.7339557889701958, "learning_rate": 9.461476074614762e-07, "loss": 0.6596, "step": 28418 }, { "epoch": 0.8297275991941842, "grad_norm": 0.6950578475127663, "learning_rate": 9.459854014598541e-07, "loss": 0.6179, "step": 28419 }, { "epoch": 0.8297567954220315, "grad_norm": 0.722135518524046, "learning_rate": 9.45823195458232e-07, "loss": 0.6493, "step": 28420 }, { "epoch": 0.8297859916498789, "grad_norm": 0.7065145417958807, "learning_rate": 9.4566098945661e-07, "loss": 0.6007, "step": 28421 }, { "epoch": 0.8298151878777262, "grad_norm": 0.7706751760460524, "learning_rate": 9.454987834549879e-07, "loss": 0.7264, "step": 28422 }, { "epoch": 0.8298443841055736, "grad_norm": 0.6598582780561966, "learning_rate": 9.453365774533658e-07, "loss": 0.5415, "step": 28423 }, { "epoch": 0.829873580333421, "grad_norm": 0.6795458892032497, "learning_rate": 9.451743714517438e-07, "loss": 0.5693, "step": 28424 }, { "epoch": 0.8299027765612683, "grad_norm": 0.6558936649078425, "learning_rate": 9.450121654501217e-07, "loss": 0.5495, "step": 28425 }, { "epoch": 0.8299319727891157, "grad_norm": 0.7156797060987675, "learning_rate": 9.448499594484997e-07, "loss": 0.6654, "step": 28426 }, { "epoch": 0.829961169016963, "grad_norm": 0.6975791213058269, "learning_rate": 9.446877534468776e-07, "loss": 0.5951, "step": 28427 }, { "epoch": 0.8299903652448104, "grad_norm": 0.8012365314795964, "learning_rate": 9.445255474452555e-07, "loss": 0.6953, "step": 28428 }, { "epoch": 0.8300195614726578, "grad_norm": 0.8407863681845852, "learning_rate": 9.443633414436334e-07, "loss": 0.7294, "step": 28429 }, { "epoch": 0.8300487577005051, "grad_norm": 0.7192149810413839, "learning_rate": 9.442011354420113e-07, "loss": 0.6162, "step": 28430 }, { "epoch": 0.8300779539283525, "grad_norm": 0.7116314250556768, "learning_rate": 9.440389294403894e-07, "loss": 0.6641, "step": 28431 }, { "epoch": 0.8301071501561998, "grad_norm": 0.7155261914660122, "learning_rate": 9.438767234387673e-07, "loss": 0.5298, "step": 28432 }, { "epoch": 0.8301363463840472, "grad_norm": 0.7073162005956353, "learning_rate": 9.437145174371452e-07, "loss": 0.5979, "step": 28433 }, { "epoch": 0.8301655426118946, "grad_norm": 0.7927425049084474, "learning_rate": 9.435523114355231e-07, "loss": 0.7438, "step": 28434 }, { "epoch": 0.8301947388397419, "grad_norm": 0.7717498871892216, "learning_rate": 9.433901054339012e-07, "loss": 0.7477, "step": 28435 }, { "epoch": 0.8302239350675893, "grad_norm": 0.7482315193369541, "learning_rate": 9.432278994322791e-07, "loss": 0.6073, "step": 28436 }, { "epoch": 0.8302531312954367, "grad_norm": 0.7056857330069052, "learning_rate": 9.43065693430657e-07, "loss": 0.667, "step": 28437 }, { "epoch": 0.830282327523284, "grad_norm": 0.6985398098227544, "learning_rate": 9.42903487429035e-07, "loss": 0.5695, "step": 28438 }, { "epoch": 0.8303115237511314, "grad_norm": 0.7232963941186, "learning_rate": 9.427412814274128e-07, "loss": 0.5871, "step": 28439 }, { "epoch": 0.8303407199789787, "grad_norm": 0.700392536306568, "learning_rate": 9.425790754257909e-07, "loss": 0.5708, "step": 28440 }, { "epoch": 0.8303699162068261, "grad_norm": 0.6328463935325686, "learning_rate": 9.424168694241688e-07, "loss": 0.5138, "step": 28441 }, { "epoch": 0.8303991124346735, "grad_norm": 0.7518837150839821, "learning_rate": 9.422546634225467e-07, "loss": 0.6614, "step": 28442 }, { "epoch": 0.8304283086625208, "grad_norm": 0.7362938079976887, "learning_rate": 9.420924574209247e-07, "loss": 0.6555, "step": 28443 }, { "epoch": 0.8304575048903682, "grad_norm": 0.7180134035066444, "learning_rate": 9.419302514193026e-07, "loss": 0.6583, "step": 28444 }, { "epoch": 0.8304867011182155, "grad_norm": 0.7729441376847925, "learning_rate": 9.417680454176806e-07, "loss": 0.7308, "step": 28445 }, { "epoch": 0.8305158973460629, "grad_norm": 0.7437987008860226, "learning_rate": 9.416058394160585e-07, "loss": 0.6099, "step": 28446 }, { "epoch": 0.8305450935739103, "grad_norm": 0.7761731162938849, "learning_rate": 9.414436334144364e-07, "loss": 0.666, "step": 28447 }, { "epoch": 0.8305742898017576, "grad_norm": 0.6801149276559849, "learning_rate": 9.412814274128143e-07, "loss": 0.582, "step": 28448 }, { "epoch": 0.830603486029605, "grad_norm": 0.7319559496337114, "learning_rate": 9.411192214111924e-07, "loss": 0.6575, "step": 28449 }, { "epoch": 0.8306326822574523, "grad_norm": 0.7399708548346875, "learning_rate": 9.409570154095703e-07, "loss": 0.6968, "step": 28450 }, { "epoch": 0.8306618784852997, "grad_norm": 0.6882137206150175, "learning_rate": 9.407948094079482e-07, "loss": 0.5866, "step": 28451 }, { "epoch": 0.8306910747131471, "grad_norm": 0.712108240789566, "learning_rate": 9.406326034063261e-07, "loss": 0.6122, "step": 28452 }, { "epoch": 0.8307202709409944, "grad_norm": 0.7498888097870192, "learning_rate": 9.40470397404704e-07, "loss": 0.7133, "step": 28453 }, { "epoch": 0.8307494671688418, "grad_norm": 0.7208369804306287, "learning_rate": 9.40308191403082e-07, "loss": 0.6314, "step": 28454 }, { "epoch": 0.8307786633966892, "grad_norm": 0.7173348092407068, "learning_rate": 9.4014598540146e-07, "loss": 0.6049, "step": 28455 }, { "epoch": 0.8308078596245365, "grad_norm": 0.6748134496251855, "learning_rate": 9.399837793998379e-07, "loss": 0.5499, "step": 28456 }, { "epoch": 0.8308370558523839, "grad_norm": 0.8015582031788764, "learning_rate": 9.398215733982158e-07, "loss": 0.7475, "step": 28457 }, { "epoch": 0.8308662520802312, "grad_norm": 0.6868849799712403, "learning_rate": 9.396593673965937e-07, "loss": 0.6061, "step": 28458 }, { "epoch": 0.8308954483080786, "grad_norm": 0.7446155919926774, "learning_rate": 9.394971613949717e-07, "loss": 0.6951, "step": 28459 }, { "epoch": 0.830924644535926, "grad_norm": 0.6849402499578015, "learning_rate": 9.393349553933496e-07, "loss": 0.6129, "step": 28460 }, { "epoch": 0.8309538407637733, "grad_norm": 0.7032955925106777, "learning_rate": 9.391727493917275e-07, "loss": 0.6062, "step": 28461 }, { "epoch": 0.8309830369916207, "grad_norm": 0.6973100845870661, "learning_rate": 9.390105433901055e-07, "loss": 0.5682, "step": 28462 }, { "epoch": 0.831012233219468, "grad_norm": 0.7294056312984973, "learning_rate": 9.388483373884834e-07, "loss": 0.6204, "step": 28463 }, { "epoch": 0.8310414294473154, "grad_norm": 0.6776716042505342, "learning_rate": 9.386861313868614e-07, "loss": 0.5768, "step": 28464 }, { "epoch": 0.8310706256751628, "grad_norm": 0.6846502736112015, "learning_rate": 9.385239253852393e-07, "loss": 0.5615, "step": 28465 }, { "epoch": 0.8310998219030101, "grad_norm": 0.7177230629568675, "learning_rate": 9.383617193836172e-07, "loss": 0.6232, "step": 28466 }, { "epoch": 0.8311290181308575, "grad_norm": 0.6861265209676639, "learning_rate": 9.381995133819951e-07, "loss": 0.5994, "step": 28467 }, { "epoch": 0.8311582143587048, "grad_norm": 0.7084185712705902, "learning_rate": 9.380373073803732e-07, "loss": 0.6095, "step": 28468 }, { "epoch": 0.8311874105865522, "grad_norm": 0.7019086359785974, "learning_rate": 9.378751013787511e-07, "loss": 0.5937, "step": 28469 }, { "epoch": 0.8312166068143996, "grad_norm": 0.7042249602167564, "learning_rate": 9.37712895377129e-07, "loss": 0.6242, "step": 28470 }, { "epoch": 0.8312458030422469, "grad_norm": 0.7582098210505861, "learning_rate": 9.37550689375507e-07, "loss": 0.6226, "step": 28471 }, { "epoch": 0.8312749992700943, "grad_norm": 0.6377070031128016, "learning_rate": 9.373884833738849e-07, "loss": 0.475, "step": 28472 }, { "epoch": 0.8313041954979417, "grad_norm": 0.75190419495484, "learning_rate": 9.372262773722629e-07, "loss": 0.702, "step": 28473 }, { "epoch": 0.831333391725789, "grad_norm": 0.752888168183148, "learning_rate": 9.370640713706409e-07, "loss": 0.6639, "step": 28474 }, { "epoch": 0.8313625879536364, "grad_norm": 0.7460411618390205, "learning_rate": 9.369018653690188e-07, "loss": 0.6892, "step": 28475 }, { "epoch": 0.8313917841814837, "grad_norm": 0.711766980339837, "learning_rate": 9.367396593673967e-07, "loss": 0.6449, "step": 28476 }, { "epoch": 0.8314209804093311, "grad_norm": 0.6894554783353498, "learning_rate": 9.365774533657746e-07, "loss": 0.5615, "step": 28477 }, { "epoch": 0.8314501766371785, "grad_norm": 0.7562369199791202, "learning_rate": 9.364152473641526e-07, "loss": 0.6583, "step": 28478 }, { "epoch": 0.8314793728650258, "grad_norm": 0.7588856122428227, "learning_rate": 9.362530413625305e-07, "loss": 0.6684, "step": 28479 }, { "epoch": 0.8315085690928732, "grad_norm": 0.7768457264263551, "learning_rate": 9.360908353609084e-07, "loss": 0.6701, "step": 28480 }, { "epoch": 0.8315377653207205, "grad_norm": 0.7012191182389642, "learning_rate": 9.359286293592863e-07, "loss": 0.6157, "step": 28481 }, { "epoch": 0.8315669615485679, "grad_norm": 0.6987199506670085, "learning_rate": 9.357664233576644e-07, "loss": 0.5884, "step": 28482 }, { "epoch": 0.8315961577764153, "grad_norm": 0.6730624654547717, "learning_rate": 9.356042173560423e-07, "loss": 0.5638, "step": 28483 }, { "epoch": 0.8316253540042626, "grad_norm": 0.7615706832290382, "learning_rate": 9.354420113544202e-07, "loss": 0.6957, "step": 28484 }, { "epoch": 0.83165455023211, "grad_norm": 0.681074866059188, "learning_rate": 9.352798053527981e-07, "loss": 0.5636, "step": 28485 }, { "epoch": 0.8316837464599574, "grad_norm": 0.7725122377312675, "learning_rate": 9.35117599351176e-07, "loss": 0.6791, "step": 28486 }, { "epoch": 0.8317129426878047, "grad_norm": 0.728033254063238, "learning_rate": 9.349553933495541e-07, "loss": 0.6883, "step": 28487 }, { "epoch": 0.8317421389156521, "grad_norm": 0.725980251823361, "learning_rate": 9.34793187347932e-07, "loss": 0.7168, "step": 28488 }, { "epoch": 0.8317713351434994, "grad_norm": 0.6866754114277582, "learning_rate": 9.346309813463099e-07, "loss": 0.6078, "step": 28489 }, { "epoch": 0.8318005313713468, "grad_norm": 0.71574223577565, "learning_rate": 9.344687753446878e-07, "loss": 0.6171, "step": 28490 }, { "epoch": 0.8318297275991942, "grad_norm": 0.704468933415624, "learning_rate": 9.343065693430657e-07, "loss": 0.6388, "step": 28491 }, { "epoch": 0.8318589238270415, "grad_norm": 0.6950735153997696, "learning_rate": 9.341443633414437e-07, "loss": 0.6078, "step": 28492 }, { "epoch": 0.8318881200548889, "grad_norm": 0.8113453759720896, "learning_rate": 9.339821573398217e-07, "loss": 0.7342, "step": 28493 }, { "epoch": 0.8319173162827362, "grad_norm": 0.7921512457425098, "learning_rate": 9.338199513381996e-07, "loss": 0.6278, "step": 28494 }, { "epoch": 0.8319465125105836, "grad_norm": 0.7429963632956291, "learning_rate": 9.336577453365775e-07, "loss": 0.6709, "step": 28495 }, { "epoch": 0.831975708738431, "grad_norm": 0.6562451543964507, "learning_rate": 9.334955393349554e-07, "loss": 0.5062, "step": 28496 }, { "epoch": 0.8320049049662783, "grad_norm": 0.7304346617289097, "learning_rate": 9.333333333333334e-07, "loss": 0.6939, "step": 28497 }, { "epoch": 0.8320341011941257, "grad_norm": 0.7326272689474596, "learning_rate": 9.331711273317113e-07, "loss": 0.6705, "step": 28498 }, { "epoch": 0.832063297421973, "grad_norm": 0.7700962827958626, "learning_rate": 9.330089213300892e-07, "loss": 0.7472, "step": 28499 }, { "epoch": 0.8320924936498204, "grad_norm": 0.7796136580026719, "learning_rate": 9.328467153284671e-07, "loss": 0.6435, "step": 28500 }, { "epoch": 0.8321216898776678, "grad_norm": 0.7018575995651789, "learning_rate": 9.326845093268452e-07, "loss": 0.6481, "step": 28501 }, { "epoch": 0.8321508861055151, "grad_norm": 0.7764178755292426, "learning_rate": 9.325223033252231e-07, "loss": 0.7051, "step": 28502 }, { "epoch": 0.8321800823333625, "grad_norm": 0.7484105514013196, "learning_rate": 9.32360097323601e-07, "loss": 0.6648, "step": 28503 }, { "epoch": 0.8322092785612099, "grad_norm": 0.7223569549074512, "learning_rate": 9.32197891321979e-07, "loss": 0.6135, "step": 28504 }, { "epoch": 0.8322384747890572, "grad_norm": 0.7668408377927847, "learning_rate": 9.320356853203569e-07, "loss": 0.7026, "step": 28505 }, { "epoch": 0.8322676710169046, "grad_norm": 0.7867152011107921, "learning_rate": 9.31873479318735e-07, "loss": 0.7297, "step": 28506 }, { "epoch": 0.8322968672447519, "grad_norm": 0.7468536606988049, "learning_rate": 9.317112733171129e-07, "loss": 0.595, "step": 28507 }, { "epoch": 0.8323260634725993, "grad_norm": 0.7417578569821527, "learning_rate": 9.315490673154908e-07, "loss": 0.6425, "step": 28508 }, { "epoch": 0.8323552597004467, "grad_norm": 0.8078580630983928, "learning_rate": 9.313868613138687e-07, "loss": 0.6438, "step": 28509 }, { "epoch": 0.832384455928294, "grad_norm": 0.728544952318548, "learning_rate": 9.312246553122466e-07, "loss": 0.6394, "step": 28510 }, { "epoch": 0.8324136521561414, "grad_norm": 0.7091043547622101, "learning_rate": 9.310624493106246e-07, "loss": 0.6625, "step": 28511 }, { "epoch": 0.8324428483839887, "grad_norm": 0.7539011301310486, "learning_rate": 9.309002433090025e-07, "loss": 0.6437, "step": 28512 }, { "epoch": 0.8324720446118361, "grad_norm": 0.7108615107293466, "learning_rate": 9.307380373073805e-07, "loss": 0.5815, "step": 28513 }, { "epoch": 0.8325012408396835, "grad_norm": 0.6894202645488192, "learning_rate": 9.305758313057584e-07, "loss": 0.5919, "step": 28514 }, { "epoch": 0.8325304370675308, "grad_norm": 0.738570782473054, "learning_rate": 9.304136253041364e-07, "loss": 0.654, "step": 28515 }, { "epoch": 0.8325596332953782, "grad_norm": 0.7219687291843282, "learning_rate": 9.302514193025143e-07, "loss": 0.6831, "step": 28516 }, { "epoch": 0.8325888295232255, "grad_norm": 0.668448776561234, "learning_rate": 9.300892133008922e-07, "loss": 0.5339, "step": 28517 }, { "epoch": 0.8326180257510729, "grad_norm": 0.7198846944953031, "learning_rate": 9.299270072992701e-07, "loss": 0.6613, "step": 28518 }, { "epoch": 0.8326472219789203, "grad_norm": 0.6744399577840172, "learning_rate": 9.29764801297648e-07, "loss": 0.5837, "step": 28519 }, { "epoch": 0.8326764182067676, "grad_norm": 0.6818832776881216, "learning_rate": 9.296025952960261e-07, "loss": 0.6074, "step": 28520 }, { "epoch": 0.832705614434615, "grad_norm": 0.7038311043720313, "learning_rate": 9.29440389294404e-07, "loss": 0.6383, "step": 28521 }, { "epoch": 0.8327348106624624, "grad_norm": 0.7595204198915788, "learning_rate": 9.292781832927819e-07, "loss": 0.7071, "step": 28522 }, { "epoch": 0.8327640068903098, "grad_norm": 0.7010245336138266, "learning_rate": 9.291159772911598e-07, "loss": 0.5875, "step": 28523 }, { "epoch": 0.8327932031181572, "grad_norm": 0.765150494523508, "learning_rate": 9.289537712895377e-07, "loss": 0.7072, "step": 28524 }, { "epoch": 0.8328223993460045, "grad_norm": 0.773275401295298, "learning_rate": 9.287915652879158e-07, "loss": 0.6984, "step": 28525 }, { "epoch": 0.8328515955738519, "grad_norm": 0.7438260880499148, "learning_rate": 9.286293592862937e-07, "loss": 0.6555, "step": 28526 }, { "epoch": 0.8328807918016993, "grad_norm": 0.7111258552972844, "learning_rate": 9.284671532846716e-07, "loss": 0.6303, "step": 28527 }, { "epoch": 0.8329099880295466, "grad_norm": 0.7086504818399344, "learning_rate": 9.283049472830495e-07, "loss": 0.63, "step": 28528 }, { "epoch": 0.832939184257394, "grad_norm": 0.7720739784873628, "learning_rate": 9.281427412814274e-07, "loss": 0.7175, "step": 28529 }, { "epoch": 0.8329683804852414, "grad_norm": 0.7028906533421874, "learning_rate": 9.279805352798054e-07, "loss": 0.631, "step": 28530 }, { "epoch": 0.8329975767130887, "grad_norm": 0.732483211774241, "learning_rate": 9.278183292781833e-07, "loss": 0.673, "step": 28531 }, { "epoch": 0.8330267729409361, "grad_norm": 0.7188638805612262, "learning_rate": 9.276561232765613e-07, "loss": 0.6623, "step": 28532 }, { "epoch": 0.8330559691687834, "grad_norm": 0.7501376056546898, "learning_rate": 9.274939172749392e-07, "loss": 0.5912, "step": 28533 }, { "epoch": 0.8330851653966308, "grad_norm": 0.8073077256424325, "learning_rate": 9.273317112733172e-07, "loss": 0.7192, "step": 28534 }, { "epoch": 0.8331143616244782, "grad_norm": 0.7474917646265563, "learning_rate": 9.271695052716951e-07, "loss": 0.6259, "step": 28535 }, { "epoch": 0.8331435578523255, "grad_norm": 0.8116568581497898, "learning_rate": 9.27007299270073e-07, "loss": 0.7725, "step": 28536 }, { "epoch": 0.8331727540801729, "grad_norm": 0.691571215312465, "learning_rate": 9.26845093268451e-07, "loss": 0.6418, "step": 28537 }, { "epoch": 0.8332019503080202, "grad_norm": 0.704026336757592, "learning_rate": 9.266828872668289e-07, "loss": 0.6308, "step": 28538 }, { "epoch": 0.8332311465358676, "grad_norm": 0.7485486002076638, "learning_rate": 9.26520681265207e-07, "loss": 0.6732, "step": 28539 }, { "epoch": 0.833260342763715, "grad_norm": 0.6530456132035015, "learning_rate": 9.263584752635849e-07, "loss": 0.5387, "step": 28540 }, { "epoch": 0.8332895389915623, "grad_norm": 0.7098627549341933, "learning_rate": 9.261962692619628e-07, "loss": 0.6213, "step": 28541 }, { "epoch": 0.8333187352194097, "grad_norm": 0.753270477130166, "learning_rate": 9.260340632603407e-07, "loss": 0.6636, "step": 28542 }, { "epoch": 0.833347931447257, "grad_norm": 0.6702813753962309, "learning_rate": 9.258718572587186e-07, "loss": 0.5315, "step": 28543 }, { "epoch": 0.8333771276751044, "grad_norm": 0.6909733930531531, "learning_rate": 9.257096512570967e-07, "loss": 0.6183, "step": 28544 }, { "epoch": 0.8334063239029518, "grad_norm": 0.683271407094254, "learning_rate": 9.255474452554746e-07, "loss": 0.5948, "step": 28545 }, { "epoch": 0.8334355201307991, "grad_norm": 0.6991345765435072, "learning_rate": 9.253852392538525e-07, "loss": 0.5862, "step": 28546 }, { "epoch": 0.8334647163586465, "grad_norm": 0.7112624835661265, "learning_rate": 9.252230332522304e-07, "loss": 0.6588, "step": 28547 }, { "epoch": 0.8334939125864939, "grad_norm": 0.7752448430012712, "learning_rate": 9.250608272506084e-07, "loss": 0.6938, "step": 28548 }, { "epoch": 0.8335231088143412, "grad_norm": 0.7012256427541523, "learning_rate": 9.248986212489863e-07, "loss": 0.6035, "step": 28549 }, { "epoch": 0.8335523050421886, "grad_norm": 0.720029124148487, "learning_rate": 9.247364152473642e-07, "loss": 0.6022, "step": 28550 }, { "epoch": 0.8335815012700359, "grad_norm": 0.7375308057187853, "learning_rate": 9.245742092457422e-07, "loss": 0.6064, "step": 28551 }, { "epoch": 0.8336106974978833, "grad_norm": 0.7804807093376585, "learning_rate": 9.244120032441201e-07, "loss": 0.6833, "step": 28552 }, { "epoch": 0.8336398937257307, "grad_norm": 0.729893134576875, "learning_rate": 9.242497972424981e-07, "loss": 0.6073, "step": 28553 }, { "epoch": 0.833669089953578, "grad_norm": 0.7628559748265547, "learning_rate": 9.24087591240876e-07, "loss": 0.7216, "step": 28554 }, { "epoch": 0.8336982861814254, "grad_norm": 0.7297108168604248, "learning_rate": 9.239253852392539e-07, "loss": 0.6332, "step": 28555 }, { "epoch": 0.8337274824092727, "grad_norm": 0.747239241819545, "learning_rate": 9.237631792376318e-07, "loss": 0.6307, "step": 28556 }, { "epoch": 0.8337566786371201, "grad_norm": 0.7351536467390274, "learning_rate": 9.236009732360097e-07, "loss": 0.7001, "step": 28557 }, { "epoch": 0.8337858748649675, "grad_norm": 0.7290305789886283, "learning_rate": 9.234387672343878e-07, "loss": 0.613, "step": 28558 }, { "epoch": 0.8338150710928148, "grad_norm": 0.7124049858056114, "learning_rate": 9.232765612327657e-07, "loss": 0.6031, "step": 28559 }, { "epoch": 0.8338442673206622, "grad_norm": 0.7275906426301422, "learning_rate": 9.231143552311436e-07, "loss": 0.6598, "step": 28560 }, { "epoch": 0.8338734635485096, "grad_norm": 0.6906039025967465, "learning_rate": 9.229521492295215e-07, "loss": 0.6206, "step": 28561 }, { "epoch": 0.8339026597763569, "grad_norm": 0.6918713356413269, "learning_rate": 9.227899432278994e-07, "loss": 0.5812, "step": 28562 }, { "epoch": 0.8339318560042043, "grad_norm": 0.7087647873079304, "learning_rate": 9.226277372262775e-07, "loss": 0.6259, "step": 28563 }, { "epoch": 0.8339610522320516, "grad_norm": 0.6919864375465992, "learning_rate": 9.224655312246554e-07, "loss": 0.6111, "step": 28564 }, { "epoch": 0.833990248459899, "grad_norm": 0.7010718389856695, "learning_rate": 9.223033252230333e-07, "loss": 0.6477, "step": 28565 }, { "epoch": 0.8340194446877464, "grad_norm": 0.7404516028326743, "learning_rate": 9.221411192214112e-07, "loss": 0.5505, "step": 28566 }, { "epoch": 0.8340486409155937, "grad_norm": 0.8707568514185404, "learning_rate": 9.219789132197892e-07, "loss": 0.6823, "step": 28567 }, { "epoch": 0.8340778371434411, "grad_norm": 0.74724010721157, "learning_rate": 9.218167072181672e-07, "loss": 0.6725, "step": 28568 }, { "epoch": 0.8341070333712884, "grad_norm": 0.7079379898777725, "learning_rate": 9.21654501216545e-07, "loss": 0.6118, "step": 28569 }, { "epoch": 0.8341362295991358, "grad_norm": 0.7072680645334513, "learning_rate": 9.21492295214923e-07, "loss": 0.5996, "step": 28570 }, { "epoch": 0.8341654258269832, "grad_norm": 0.7478863083923055, "learning_rate": 9.21330089213301e-07, "loss": 0.6702, "step": 28571 }, { "epoch": 0.8341946220548305, "grad_norm": 0.7102781996763785, "learning_rate": 9.21167883211679e-07, "loss": 0.6518, "step": 28572 }, { "epoch": 0.8342238182826779, "grad_norm": 0.732889366524901, "learning_rate": 9.210056772100569e-07, "loss": 0.68, "step": 28573 }, { "epoch": 0.8342530145105252, "grad_norm": 0.6901954796304921, "learning_rate": 9.208434712084348e-07, "loss": 0.6077, "step": 28574 }, { "epoch": 0.8342822107383726, "grad_norm": 0.8045523664410742, "learning_rate": 9.206812652068127e-07, "loss": 0.6748, "step": 28575 }, { "epoch": 0.83431140696622, "grad_norm": 0.7204300564243175, "learning_rate": 9.205190592051906e-07, "loss": 0.598, "step": 28576 }, { "epoch": 0.8343406031940673, "grad_norm": 0.6936068496480856, "learning_rate": 9.203568532035687e-07, "loss": 0.6254, "step": 28577 }, { "epoch": 0.8343697994219147, "grad_norm": 0.7083880505762911, "learning_rate": 9.201946472019466e-07, "loss": 0.6421, "step": 28578 }, { "epoch": 0.834398995649762, "grad_norm": 0.744649079859923, "learning_rate": 9.200324412003245e-07, "loss": 0.6853, "step": 28579 }, { "epoch": 0.8344281918776094, "grad_norm": 0.7790944739442595, "learning_rate": 9.198702351987024e-07, "loss": 0.6101, "step": 28580 }, { "epoch": 0.8344573881054568, "grad_norm": 0.7300495657023829, "learning_rate": 9.197080291970803e-07, "loss": 0.6891, "step": 28581 }, { "epoch": 0.8344865843333041, "grad_norm": 0.7684979980300644, "learning_rate": 9.195458231954584e-07, "loss": 0.6134, "step": 28582 }, { "epoch": 0.8345157805611515, "grad_norm": 0.7248995049004485, "learning_rate": 9.193836171938363e-07, "loss": 0.6737, "step": 28583 }, { "epoch": 0.8345449767889989, "grad_norm": 0.7326397911429516, "learning_rate": 9.192214111922142e-07, "loss": 0.6249, "step": 28584 }, { "epoch": 0.8345741730168462, "grad_norm": 0.6298234525394864, "learning_rate": 9.190592051905921e-07, "loss": 0.4519, "step": 28585 }, { "epoch": 0.8346033692446936, "grad_norm": 0.7502243836802359, "learning_rate": 9.188969991889701e-07, "loss": 0.6653, "step": 28586 }, { "epoch": 0.8346325654725409, "grad_norm": 0.6875386584247636, "learning_rate": 9.18734793187348e-07, "loss": 0.5724, "step": 28587 }, { "epoch": 0.8346617617003883, "grad_norm": 0.7080393676655788, "learning_rate": 9.185725871857259e-07, "loss": 0.6089, "step": 28588 }, { "epoch": 0.8346909579282357, "grad_norm": 0.7575232318685184, "learning_rate": 9.184103811841038e-07, "loss": 0.7134, "step": 28589 }, { "epoch": 0.834720154156083, "grad_norm": 0.7717543840986052, "learning_rate": 9.182481751824818e-07, "loss": 0.7099, "step": 28590 }, { "epoch": 0.8347493503839304, "grad_norm": 0.7710269609109662, "learning_rate": 9.180859691808598e-07, "loss": 0.6021, "step": 28591 }, { "epoch": 0.8347785466117777, "grad_norm": 0.7082972132248457, "learning_rate": 9.179237631792377e-07, "loss": 0.6504, "step": 28592 }, { "epoch": 0.8348077428396251, "grad_norm": 0.7066370210926946, "learning_rate": 9.177615571776156e-07, "loss": 0.6213, "step": 28593 }, { "epoch": 0.8348369390674725, "grad_norm": 0.7704495831621845, "learning_rate": 9.175993511759935e-07, "loss": 0.685, "step": 28594 }, { "epoch": 0.8348661352953198, "grad_norm": 0.7049632622583376, "learning_rate": 9.174371451743714e-07, "loss": 0.6402, "step": 28595 }, { "epoch": 0.8348953315231672, "grad_norm": 0.7952146241954146, "learning_rate": 9.172749391727495e-07, "loss": 0.7002, "step": 28596 }, { "epoch": 0.8349245277510146, "grad_norm": 0.689585262160575, "learning_rate": 9.171127331711274e-07, "loss": 0.6494, "step": 28597 }, { "epoch": 0.8349537239788619, "grad_norm": 0.754128173746984, "learning_rate": 9.169505271695053e-07, "loss": 0.7011, "step": 28598 }, { "epoch": 0.8349829202067093, "grad_norm": 0.6914595606842008, "learning_rate": 9.167883211678832e-07, "loss": 0.5973, "step": 28599 }, { "epoch": 0.8350121164345566, "grad_norm": 0.7301964205188632, "learning_rate": 9.166261151662613e-07, "loss": 0.6136, "step": 28600 }, { "epoch": 0.835041312662404, "grad_norm": 0.6960309262861474, "learning_rate": 9.164639091646392e-07, "loss": 0.5743, "step": 28601 }, { "epoch": 0.8350705088902514, "grad_norm": 0.6945806544546911, "learning_rate": 9.163017031630172e-07, "loss": 0.6319, "step": 28602 }, { "epoch": 0.8350997051180987, "grad_norm": 0.7505266735815752, "learning_rate": 9.161394971613951e-07, "loss": 0.6075, "step": 28603 }, { "epoch": 0.8351289013459461, "grad_norm": 0.657615900104587, "learning_rate": 9.15977291159773e-07, "loss": 0.5359, "step": 28604 }, { "epoch": 0.8351580975737934, "grad_norm": 0.714327595659501, "learning_rate": 9.15815085158151e-07, "loss": 0.5977, "step": 28605 }, { "epoch": 0.8351872938016408, "grad_norm": 0.7387662095548748, "learning_rate": 9.156528791565289e-07, "loss": 0.6358, "step": 28606 }, { "epoch": 0.8352164900294882, "grad_norm": 0.7946770859529788, "learning_rate": 9.154906731549068e-07, "loss": 0.6979, "step": 28607 }, { "epoch": 0.8352456862573355, "grad_norm": 0.6573985492174722, "learning_rate": 9.153284671532847e-07, "loss": 0.5369, "step": 28608 }, { "epoch": 0.8352748824851829, "grad_norm": 0.7055629643011264, "learning_rate": 9.151662611516627e-07, "loss": 0.6073, "step": 28609 }, { "epoch": 0.8353040787130303, "grad_norm": 0.7644927656902419, "learning_rate": 9.150040551500407e-07, "loss": 0.7181, "step": 28610 }, { "epoch": 0.8353332749408776, "grad_norm": 0.7350646651337418, "learning_rate": 9.148418491484186e-07, "loss": 0.6441, "step": 28611 }, { "epoch": 0.835362471168725, "grad_norm": 0.7351699573045617, "learning_rate": 9.146796431467965e-07, "loss": 0.648, "step": 28612 }, { "epoch": 0.8353916673965723, "grad_norm": 0.7617883869976502, "learning_rate": 9.145174371451744e-07, "loss": 0.6714, "step": 28613 }, { "epoch": 0.8354208636244197, "grad_norm": 0.7101081706745345, "learning_rate": 9.143552311435523e-07, "loss": 0.6151, "step": 28614 }, { "epoch": 0.8354500598522671, "grad_norm": 0.7335364056772107, "learning_rate": 9.141930251419304e-07, "loss": 0.6241, "step": 28615 }, { "epoch": 0.8354792560801144, "grad_norm": 0.729139226762789, "learning_rate": 9.140308191403083e-07, "loss": 0.6608, "step": 28616 }, { "epoch": 0.8355084523079618, "grad_norm": 0.7311167481944804, "learning_rate": 9.138686131386862e-07, "loss": 0.6222, "step": 28617 }, { "epoch": 0.8355376485358091, "grad_norm": 0.7456650495572427, "learning_rate": 9.137064071370641e-07, "loss": 0.7132, "step": 28618 }, { "epoch": 0.8355668447636565, "grad_norm": 0.7160089421388358, "learning_rate": 9.135442011354421e-07, "loss": 0.6083, "step": 28619 }, { "epoch": 0.8355960409915039, "grad_norm": 0.7171231578742882, "learning_rate": 9.1338199513382e-07, "loss": 0.583, "step": 28620 }, { "epoch": 0.8356252372193512, "grad_norm": 0.69373316832189, "learning_rate": 9.13219789132198e-07, "loss": 0.642, "step": 28621 }, { "epoch": 0.8356544334471986, "grad_norm": 0.7817308730308851, "learning_rate": 9.130575831305759e-07, "loss": 0.6607, "step": 28622 }, { "epoch": 0.835683629675046, "grad_norm": 0.7277250912289385, "learning_rate": 9.128953771289538e-07, "loss": 0.5775, "step": 28623 }, { "epoch": 0.8357128259028933, "grad_norm": 0.7808693759660792, "learning_rate": 9.127331711273318e-07, "loss": 0.6402, "step": 28624 }, { "epoch": 0.8357420221307407, "grad_norm": 0.7177000293237791, "learning_rate": 9.125709651257097e-07, "loss": 0.6506, "step": 28625 }, { "epoch": 0.835771218358588, "grad_norm": 0.7055790225553409, "learning_rate": 9.124087591240876e-07, "loss": 0.5968, "step": 28626 }, { "epoch": 0.8358004145864354, "grad_norm": 0.7644086744522042, "learning_rate": 9.122465531224655e-07, "loss": 0.7309, "step": 28627 }, { "epoch": 0.8358296108142828, "grad_norm": 0.7281802079158598, "learning_rate": 9.120843471208435e-07, "loss": 0.6755, "step": 28628 }, { "epoch": 0.8358588070421301, "grad_norm": 0.7053837559177436, "learning_rate": 9.119221411192215e-07, "loss": 0.6336, "step": 28629 }, { "epoch": 0.8358880032699775, "grad_norm": 0.6750753616626901, "learning_rate": 9.117599351175994e-07, "loss": 0.5635, "step": 28630 }, { "epoch": 0.8359171994978248, "grad_norm": 0.7366451289012464, "learning_rate": 9.115977291159773e-07, "loss": 0.6112, "step": 28631 }, { "epoch": 0.8359463957256722, "grad_norm": 0.7359070049291498, "learning_rate": 9.114355231143552e-07, "loss": 0.6332, "step": 28632 }, { "epoch": 0.8359755919535196, "grad_norm": 0.7591714330508478, "learning_rate": 9.112733171127334e-07, "loss": 0.6762, "step": 28633 }, { "epoch": 0.8360047881813669, "grad_norm": 0.7145678451420866, "learning_rate": 9.111111111111113e-07, "loss": 0.6599, "step": 28634 }, { "epoch": 0.8360339844092143, "grad_norm": 0.771062316451301, "learning_rate": 9.109489051094892e-07, "loss": 0.6637, "step": 28635 }, { "epoch": 0.8360631806370616, "grad_norm": 0.7086088182629546, "learning_rate": 9.107866991078671e-07, "loss": 0.6272, "step": 28636 }, { "epoch": 0.836092376864909, "grad_norm": 0.6780583579854371, "learning_rate": 9.10624493106245e-07, "loss": 0.582, "step": 28637 }, { "epoch": 0.8361215730927564, "grad_norm": 0.7150770855897095, "learning_rate": 9.10462287104623e-07, "loss": 0.6542, "step": 28638 }, { "epoch": 0.8361507693206037, "grad_norm": 0.6961891034791281, "learning_rate": 9.103000811030009e-07, "loss": 0.5462, "step": 28639 }, { "epoch": 0.8361799655484511, "grad_norm": 0.7103762957421075, "learning_rate": 9.101378751013789e-07, "loss": 0.6239, "step": 28640 }, { "epoch": 0.8362091617762984, "grad_norm": 0.7118561991761033, "learning_rate": 9.099756690997568e-07, "loss": 0.5995, "step": 28641 }, { "epoch": 0.8362383580041458, "grad_norm": 0.715772031197283, "learning_rate": 9.098134630981347e-07, "loss": 0.645, "step": 28642 }, { "epoch": 0.8362675542319932, "grad_norm": 0.6954483793832648, "learning_rate": 9.096512570965127e-07, "loss": 0.6347, "step": 28643 }, { "epoch": 0.8362967504598406, "grad_norm": 0.7327436191885127, "learning_rate": 9.094890510948906e-07, "loss": 0.6419, "step": 28644 }, { "epoch": 0.836325946687688, "grad_norm": 0.7221901814193583, "learning_rate": 9.093268450932685e-07, "loss": 0.6538, "step": 28645 }, { "epoch": 0.8363551429155354, "grad_norm": 0.713353579539866, "learning_rate": 9.091646390916464e-07, "loss": 0.6308, "step": 28646 }, { "epoch": 0.8363843391433827, "grad_norm": 0.7071642834764432, "learning_rate": 9.090024330900243e-07, "loss": 0.6066, "step": 28647 }, { "epoch": 0.8364135353712301, "grad_norm": 0.6754787104002026, "learning_rate": 9.088402270884024e-07, "loss": 0.5944, "step": 28648 }, { "epoch": 0.8364427315990774, "grad_norm": 0.7221331876306448, "learning_rate": 9.086780210867803e-07, "loss": 0.6131, "step": 28649 }, { "epoch": 0.8364719278269248, "grad_norm": 0.782594213112511, "learning_rate": 9.085158150851582e-07, "loss": 0.5849, "step": 28650 }, { "epoch": 0.8365011240547722, "grad_norm": 0.7441087829902476, "learning_rate": 9.083536090835361e-07, "loss": 0.6703, "step": 28651 }, { "epoch": 0.8365303202826195, "grad_norm": 0.7470582637964908, "learning_rate": 9.081914030819142e-07, "loss": 0.6356, "step": 28652 }, { "epoch": 0.8365595165104669, "grad_norm": 0.737527440186084, "learning_rate": 9.080291970802921e-07, "loss": 0.6898, "step": 28653 }, { "epoch": 0.8365887127383143, "grad_norm": 0.7483048976935139, "learning_rate": 9.0786699107867e-07, "loss": 0.665, "step": 28654 }, { "epoch": 0.8366179089661616, "grad_norm": 0.6979024514336857, "learning_rate": 9.077047850770479e-07, "loss": 0.5783, "step": 28655 }, { "epoch": 0.836647105194009, "grad_norm": 0.6777331011114653, "learning_rate": 9.075425790754258e-07, "loss": 0.5809, "step": 28656 }, { "epoch": 0.8366763014218563, "grad_norm": 0.7509422830304485, "learning_rate": 9.073803730738038e-07, "loss": 0.6327, "step": 28657 }, { "epoch": 0.8367054976497037, "grad_norm": 0.7127911300287187, "learning_rate": 9.072181670721817e-07, "loss": 0.6363, "step": 28658 }, { "epoch": 0.8367346938775511, "grad_norm": 0.6876947776879427, "learning_rate": 9.070559610705597e-07, "loss": 0.5821, "step": 28659 }, { "epoch": 0.8367638901053984, "grad_norm": 0.7322724416343924, "learning_rate": 9.068937550689376e-07, "loss": 0.652, "step": 28660 }, { "epoch": 0.8367930863332458, "grad_norm": 0.7008509669213773, "learning_rate": 9.067315490673155e-07, "loss": 0.6261, "step": 28661 }, { "epoch": 0.8368222825610931, "grad_norm": 0.7399328520422656, "learning_rate": 9.065693430656935e-07, "loss": 0.6252, "step": 28662 }, { "epoch": 0.8368514787889405, "grad_norm": 0.6936540119961395, "learning_rate": 9.064071370640714e-07, "loss": 0.5327, "step": 28663 }, { "epoch": 0.8368806750167879, "grad_norm": 0.6930276151994011, "learning_rate": 9.062449310624493e-07, "loss": 0.5626, "step": 28664 }, { "epoch": 0.8369098712446352, "grad_norm": 0.8250677783429192, "learning_rate": 9.060827250608272e-07, "loss": 0.8462, "step": 28665 }, { "epoch": 0.8369390674724826, "grad_norm": 0.7186925976174473, "learning_rate": 9.059205190592054e-07, "loss": 0.6405, "step": 28666 }, { "epoch": 0.83696826370033, "grad_norm": 0.6712845332481118, "learning_rate": 9.057583130575833e-07, "loss": 0.545, "step": 28667 }, { "epoch": 0.8369974599281773, "grad_norm": 0.730278978189306, "learning_rate": 9.055961070559612e-07, "loss": 0.6257, "step": 28668 }, { "epoch": 0.8370266561560247, "grad_norm": 0.8272493518567379, "learning_rate": 9.054339010543391e-07, "loss": 0.6499, "step": 28669 }, { "epoch": 0.837055852383872, "grad_norm": 0.7635623775416714, "learning_rate": 9.05271695052717e-07, "loss": 0.6935, "step": 28670 }, { "epoch": 0.8370850486117194, "grad_norm": 0.7214985323397246, "learning_rate": 9.051094890510951e-07, "loss": 0.6091, "step": 28671 }, { "epoch": 0.8371142448395668, "grad_norm": 0.7496989172076782, "learning_rate": 9.04947283049473e-07, "loss": 0.7014, "step": 28672 }, { "epoch": 0.8371434410674141, "grad_norm": 0.6656123725655153, "learning_rate": 9.047850770478509e-07, "loss": 0.5482, "step": 28673 }, { "epoch": 0.8371726372952615, "grad_norm": 0.7100412064034781, "learning_rate": 9.046228710462288e-07, "loss": 0.5831, "step": 28674 }, { "epoch": 0.8372018335231088, "grad_norm": 0.7015071960482343, "learning_rate": 9.044606650446067e-07, "loss": 0.6368, "step": 28675 }, { "epoch": 0.8372310297509562, "grad_norm": 0.6952742982631915, "learning_rate": 9.042984590429847e-07, "loss": 0.5916, "step": 28676 }, { "epoch": 0.8372602259788036, "grad_norm": 0.8282982766377074, "learning_rate": 9.041362530413626e-07, "loss": 0.7288, "step": 28677 }, { "epoch": 0.8372894222066509, "grad_norm": 0.7802482423008467, "learning_rate": 9.039740470397405e-07, "loss": 0.6872, "step": 28678 }, { "epoch": 0.8373186184344983, "grad_norm": 0.7128680741427622, "learning_rate": 9.038118410381185e-07, "loss": 0.5871, "step": 28679 }, { "epoch": 0.8373478146623456, "grad_norm": 0.7044873146974868, "learning_rate": 9.036496350364964e-07, "loss": 0.5752, "step": 28680 }, { "epoch": 0.837377010890193, "grad_norm": 0.7167156363414514, "learning_rate": 9.034874290348744e-07, "loss": 0.6458, "step": 28681 }, { "epoch": 0.8374062071180404, "grad_norm": 0.7692891952450726, "learning_rate": 9.033252230332523e-07, "loss": 0.644, "step": 28682 }, { "epoch": 0.8374354033458877, "grad_norm": 0.765620341464783, "learning_rate": 9.031630170316302e-07, "loss": 0.6778, "step": 28683 }, { "epoch": 0.8374645995737351, "grad_norm": 0.7095218472077549, "learning_rate": 9.030008110300081e-07, "loss": 0.6335, "step": 28684 }, { "epoch": 0.8374937958015825, "grad_norm": 0.6917579056203097, "learning_rate": 9.028386050283862e-07, "loss": 0.5805, "step": 28685 }, { "epoch": 0.8375229920294298, "grad_norm": 0.7089045470923843, "learning_rate": 9.026763990267641e-07, "loss": 0.6305, "step": 28686 }, { "epoch": 0.8375521882572772, "grad_norm": 0.7063509480787602, "learning_rate": 9.02514193025142e-07, "loss": 0.5977, "step": 28687 }, { "epoch": 0.8375813844851245, "grad_norm": 0.7176182154977022, "learning_rate": 9.023519870235199e-07, "loss": 0.6508, "step": 28688 }, { "epoch": 0.8376105807129719, "grad_norm": 0.8663542839028232, "learning_rate": 9.021897810218978e-07, "loss": 0.7091, "step": 28689 }, { "epoch": 0.8376397769408193, "grad_norm": 0.7884397523945909, "learning_rate": 9.020275750202759e-07, "loss": 0.7725, "step": 28690 }, { "epoch": 0.8376689731686666, "grad_norm": 0.7567602648542519, "learning_rate": 9.018653690186538e-07, "loss": 0.6115, "step": 28691 }, { "epoch": 0.837698169396514, "grad_norm": 0.6753126710959976, "learning_rate": 9.017031630170317e-07, "loss": 0.5851, "step": 28692 }, { "epoch": 0.8377273656243613, "grad_norm": 0.7295708480073405, "learning_rate": 9.015409570154096e-07, "loss": 0.6527, "step": 28693 }, { "epoch": 0.8377565618522087, "grad_norm": 0.7040087316130109, "learning_rate": 9.013787510137875e-07, "loss": 0.5854, "step": 28694 }, { "epoch": 0.8377857580800561, "grad_norm": 0.7084010382308109, "learning_rate": 9.012165450121655e-07, "loss": 0.6198, "step": 28695 }, { "epoch": 0.8378149543079034, "grad_norm": 0.6935439823590639, "learning_rate": 9.010543390105434e-07, "loss": 0.6175, "step": 28696 }, { "epoch": 0.8378441505357508, "grad_norm": 0.7571420775297997, "learning_rate": 9.008921330089213e-07, "loss": 0.6626, "step": 28697 }, { "epoch": 0.8378733467635981, "grad_norm": 0.6938561362892903, "learning_rate": 9.007299270072994e-07, "loss": 0.6011, "step": 28698 }, { "epoch": 0.8379025429914455, "grad_norm": 0.7797332584606256, "learning_rate": 9.005677210056773e-07, "loss": 0.7031, "step": 28699 }, { "epoch": 0.8379317392192929, "grad_norm": 0.6543349703462803, "learning_rate": 9.004055150040553e-07, "loss": 0.4931, "step": 28700 }, { "epoch": 0.8379609354471402, "grad_norm": 0.7177762795362846, "learning_rate": 9.002433090024332e-07, "loss": 0.5924, "step": 28701 }, { "epoch": 0.8379901316749876, "grad_norm": 0.6828844987243445, "learning_rate": 9.000811030008111e-07, "loss": 0.5816, "step": 28702 }, { "epoch": 0.838019327902835, "grad_norm": 0.6930947917131974, "learning_rate": 8.99918896999189e-07, "loss": 0.6316, "step": 28703 }, { "epoch": 0.8380485241306823, "grad_norm": 0.8051182333770467, "learning_rate": 8.997566909975671e-07, "loss": 0.6366, "step": 28704 }, { "epoch": 0.8380777203585297, "grad_norm": 0.7316216900109488, "learning_rate": 8.99594484995945e-07, "loss": 0.6606, "step": 28705 }, { "epoch": 0.838106916586377, "grad_norm": 0.7153863939975978, "learning_rate": 8.994322789943229e-07, "loss": 0.6461, "step": 28706 }, { "epoch": 0.8381361128142244, "grad_norm": 0.7092641294399681, "learning_rate": 8.992700729927008e-07, "loss": 0.6097, "step": 28707 }, { "epoch": 0.8381653090420718, "grad_norm": 0.8024455104289365, "learning_rate": 8.991078669910787e-07, "loss": 0.5743, "step": 28708 }, { "epoch": 0.8381945052699191, "grad_norm": 0.7713891461783802, "learning_rate": 8.989456609894567e-07, "loss": 0.6915, "step": 28709 }, { "epoch": 0.8382237014977665, "grad_norm": 0.7583761457826524, "learning_rate": 8.987834549878347e-07, "loss": 0.6726, "step": 28710 }, { "epoch": 0.8382528977256138, "grad_norm": 0.725064893418584, "learning_rate": 8.986212489862126e-07, "loss": 0.6292, "step": 28711 }, { "epoch": 0.8382820939534612, "grad_norm": 0.6813296866774348, "learning_rate": 8.984590429845905e-07, "loss": 0.5986, "step": 28712 }, { "epoch": 0.8383112901813086, "grad_norm": 0.7639527605475991, "learning_rate": 8.982968369829684e-07, "loss": 0.6452, "step": 28713 }, { "epoch": 0.8383404864091559, "grad_norm": 0.7490394107630142, "learning_rate": 8.981346309813464e-07, "loss": 0.6168, "step": 28714 }, { "epoch": 0.8383696826370033, "grad_norm": 0.7179741914302495, "learning_rate": 8.979724249797243e-07, "loss": 0.6036, "step": 28715 }, { "epoch": 0.8383988788648506, "grad_norm": 0.6966982889978293, "learning_rate": 8.978102189781022e-07, "loss": 0.6434, "step": 28716 }, { "epoch": 0.838428075092698, "grad_norm": 0.7311352629558795, "learning_rate": 8.976480129764802e-07, "loss": 0.6908, "step": 28717 }, { "epoch": 0.8384572713205454, "grad_norm": 0.7747848189824413, "learning_rate": 8.974858069748582e-07, "loss": 0.6765, "step": 28718 }, { "epoch": 0.8384864675483927, "grad_norm": 0.6663056407539107, "learning_rate": 8.973236009732361e-07, "loss": 0.5291, "step": 28719 }, { "epoch": 0.8385156637762401, "grad_norm": 0.7028335041687392, "learning_rate": 8.97161394971614e-07, "loss": 0.6033, "step": 28720 }, { "epoch": 0.8385448600040875, "grad_norm": 0.7340539532422314, "learning_rate": 8.969991889699919e-07, "loss": 0.6324, "step": 28721 }, { "epoch": 0.8385740562319348, "grad_norm": 0.7714409598465172, "learning_rate": 8.968369829683698e-07, "loss": 0.6625, "step": 28722 }, { "epoch": 0.8386032524597822, "grad_norm": 0.7179174605757876, "learning_rate": 8.966747769667479e-07, "loss": 0.6661, "step": 28723 }, { "epoch": 0.8386324486876295, "grad_norm": 0.7414892510371535, "learning_rate": 8.965125709651258e-07, "loss": 0.6204, "step": 28724 }, { "epoch": 0.8386616449154769, "grad_norm": 0.7202182517551754, "learning_rate": 8.963503649635037e-07, "loss": 0.6816, "step": 28725 }, { "epoch": 0.8386908411433243, "grad_norm": 0.7479649297111308, "learning_rate": 8.961881589618816e-07, "loss": 0.663, "step": 28726 }, { "epoch": 0.8387200373711716, "grad_norm": 0.7041850162304566, "learning_rate": 8.960259529602595e-07, "loss": 0.6065, "step": 28727 }, { "epoch": 0.838749233599019, "grad_norm": 0.7730968493664769, "learning_rate": 8.958637469586375e-07, "loss": 0.6954, "step": 28728 }, { "epoch": 0.8387784298268663, "grad_norm": 0.7564016470214897, "learning_rate": 8.957015409570156e-07, "loss": 0.7125, "step": 28729 }, { "epoch": 0.8388076260547137, "grad_norm": 0.6918143995162875, "learning_rate": 8.955393349553935e-07, "loss": 0.58, "step": 28730 }, { "epoch": 0.8388368222825611, "grad_norm": 0.6754897849754671, "learning_rate": 8.953771289537714e-07, "loss": 0.5604, "step": 28731 }, { "epoch": 0.8388660185104084, "grad_norm": 0.7083713857123417, "learning_rate": 8.952149229521493e-07, "loss": 0.6239, "step": 28732 }, { "epoch": 0.8388952147382558, "grad_norm": 0.7140653988279683, "learning_rate": 8.950527169505273e-07, "loss": 0.617, "step": 28733 }, { "epoch": 0.8389244109661032, "grad_norm": 0.7256267175414043, "learning_rate": 8.948905109489052e-07, "loss": 0.6725, "step": 28734 }, { "epoch": 0.8389536071939505, "grad_norm": 0.7366209910140165, "learning_rate": 8.947283049472831e-07, "loss": 0.6667, "step": 28735 }, { "epoch": 0.8389828034217979, "grad_norm": 0.7171062654338131, "learning_rate": 8.94566098945661e-07, "loss": 0.6443, "step": 28736 }, { "epoch": 0.8390119996496452, "grad_norm": 0.6922808681943152, "learning_rate": 8.944038929440391e-07, "loss": 0.6029, "step": 28737 }, { "epoch": 0.8390411958774926, "grad_norm": 0.7189532205003026, "learning_rate": 8.94241686942417e-07, "loss": 0.6614, "step": 28738 }, { "epoch": 0.83907039210534, "grad_norm": 0.7148764891513022, "learning_rate": 8.940794809407949e-07, "loss": 0.6231, "step": 28739 }, { "epoch": 0.8390995883331873, "grad_norm": 0.7345832520622005, "learning_rate": 8.939172749391728e-07, "loss": 0.6828, "step": 28740 }, { "epoch": 0.8391287845610347, "grad_norm": 0.6966204245740591, "learning_rate": 8.937550689375507e-07, "loss": 0.5783, "step": 28741 }, { "epoch": 0.839157980788882, "grad_norm": 0.7157769468769155, "learning_rate": 8.935928629359288e-07, "loss": 0.5943, "step": 28742 }, { "epoch": 0.8391871770167294, "grad_norm": 0.7319915222708494, "learning_rate": 8.934306569343067e-07, "loss": 0.6711, "step": 28743 }, { "epoch": 0.8392163732445768, "grad_norm": 0.7327774291278519, "learning_rate": 8.932684509326846e-07, "loss": 0.6236, "step": 28744 }, { "epoch": 0.8392455694724241, "grad_norm": 0.7179135897042362, "learning_rate": 8.931062449310625e-07, "loss": 0.6678, "step": 28745 }, { "epoch": 0.8392747657002715, "grad_norm": 0.7217941209310014, "learning_rate": 8.929440389294404e-07, "loss": 0.6002, "step": 28746 }, { "epoch": 0.8393039619281188, "grad_norm": 0.7490771656823115, "learning_rate": 8.927818329278184e-07, "loss": 0.6929, "step": 28747 }, { "epoch": 0.8393331581559662, "grad_norm": 0.705752965140523, "learning_rate": 8.926196269261964e-07, "loss": 0.6008, "step": 28748 }, { "epoch": 0.8393623543838136, "grad_norm": 0.6669292585863225, "learning_rate": 8.924574209245743e-07, "loss": 0.5947, "step": 28749 }, { "epoch": 0.8393915506116609, "grad_norm": 0.7608227304730358, "learning_rate": 8.922952149229522e-07, "loss": 0.6745, "step": 28750 }, { "epoch": 0.8394207468395083, "grad_norm": 0.6867325320825239, "learning_rate": 8.921330089213302e-07, "loss": 0.6131, "step": 28751 }, { "epoch": 0.8394499430673557, "grad_norm": 0.7275231615414015, "learning_rate": 8.919708029197081e-07, "loss": 0.6338, "step": 28752 }, { "epoch": 0.839479139295203, "grad_norm": 0.8972762387192367, "learning_rate": 8.91808596918086e-07, "loss": 0.6765, "step": 28753 }, { "epoch": 0.8395083355230504, "grad_norm": 0.6978786472670897, "learning_rate": 8.916463909164639e-07, "loss": 0.6335, "step": 28754 }, { "epoch": 0.8395375317508977, "grad_norm": 0.7604057396565365, "learning_rate": 8.914841849148418e-07, "loss": 0.6655, "step": 28755 }, { "epoch": 0.8395667279787451, "grad_norm": 0.7417428520313403, "learning_rate": 8.913219789132199e-07, "loss": 0.6504, "step": 28756 }, { "epoch": 0.8395959242065925, "grad_norm": 0.7678266686502512, "learning_rate": 8.911597729115978e-07, "loss": 0.647, "step": 28757 }, { "epoch": 0.8396251204344398, "grad_norm": 0.7689533623579256, "learning_rate": 8.909975669099757e-07, "loss": 0.7076, "step": 28758 }, { "epoch": 0.8396543166622872, "grad_norm": 0.7577681891926299, "learning_rate": 8.908353609083536e-07, "loss": 0.6522, "step": 28759 }, { "epoch": 0.8396835128901345, "grad_norm": 0.7019272420252921, "learning_rate": 8.906731549067315e-07, "loss": 0.6435, "step": 28760 }, { "epoch": 0.8397127091179819, "grad_norm": 0.7248742456633476, "learning_rate": 8.905109489051097e-07, "loss": 0.6491, "step": 28761 }, { "epoch": 0.8397419053458293, "grad_norm": 0.7300947365540461, "learning_rate": 8.903487429034876e-07, "loss": 0.6787, "step": 28762 }, { "epoch": 0.8397711015736766, "grad_norm": 0.7155792406715257, "learning_rate": 8.901865369018655e-07, "loss": 0.6412, "step": 28763 }, { "epoch": 0.8398002978015241, "grad_norm": 0.7217115089661318, "learning_rate": 8.900243309002434e-07, "loss": 0.6439, "step": 28764 }, { "epoch": 0.8398294940293715, "grad_norm": 0.7453908613769739, "learning_rate": 8.898621248986213e-07, "loss": 0.7001, "step": 28765 }, { "epoch": 0.8398586902572188, "grad_norm": 0.6772779393445513, "learning_rate": 8.896999188969993e-07, "loss": 0.5608, "step": 28766 }, { "epoch": 0.8398878864850662, "grad_norm": 0.7310980482404533, "learning_rate": 8.895377128953772e-07, "loss": 0.6366, "step": 28767 }, { "epoch": 0.8399170827129135, "grad_norm": 0.7245088446268801, "learning_rate": 8.893755068937552e-07, "loss": 0.6658, "step": 28768 }, { "epoch": 0.8399462789407609, "grad_norm": 0.7895713516113723, "learning_rate": 8.892133008921331e-07, "loss": 0.7361, "step": 28769 }, { "epoch": 0.8399754751686083, "grad_norm": 0.7195500713362986, "learning_rate": 8.890510948905111e-07, "loss": 0.5851, "step": 28770 }, { "epoch": 0.8400046713964556, "grad_norm": 0.7897260734862278, "learning_rate": 8.88888888888889e-07, "loss": 0.6788, "step": 28771 }, { "epoch": 0.840033867624303, "grad_norm": 0.7354773386268243, "learning_rate": 8.887266828872669e-07, "loss": 0.6943, "step": 28772 }, { "epoch": 0.8400630638521503, "grad_norm": 0.6733092517008796, "learning_rate": 8.885644768856448e-07, "loss": 0.5354, "step": 28773 }, { "epoch": 0.8400922600799977, "grad_norm": 0.7368413684449304, "learning_rate": 8.884022708840227e-07, "loss": 0.6458, "step": 28774 }, { "epoch": 0.8401214563078451, "grad_norm": 0.7583139179845027, "learning_rate": 8.882400648824008e-07, "loss": 0.6061, "step": 28775 }, { "epoch": 0.8401506525356924, "grad_norm": 0.7623780323878682, "learning_rate": 8.880778588807787e-07, "loss": 0.7528, "step": 28776 }, { "epoch": 0.8401798487635398, "grad_norm": 0.745241305316323, "learning_rate": 8.879156528791566e-07, "loss": 0.619, "step": 28777 }, { "epoch": 0.8402090449913872, "grad_norm": 0.7148753845200939, "learning_rate": 8.877534468775345e-07, "loss": 0.6487, "step": 28778 }, { "epoch": 0.8402382412192345, "grad_norm": 0.7648951910529117, "learning_rate": 8.875912408759124e-07, "loss": 0.6628, "step": 28779 }, { "epoch": 0.8402674374470819, "grad_norm": 0.7989795190775443, "learning_rate": 8.874290348742905e-07, "loss": 0.8224, "step": 28780 }, { "epoch": 0.8402966336749292, "grad_norm": 0.7232272704971873, "learning_rate": 8.872668288726684e-07, "loss": 0.5979, "step": 28781 }, { "epoch": 0.8403258299027766, "grad_norm": 0.6914725344358847, "learning_rate": 8.871046228710463e-07, "loss": 0.5848, "step": 28782 }, { "epoch": 0.840355026130624, "grad_norm": 0.7197034116929423, "learning_rate": 8.869424168694242e-07, "loss": 0.6283, "step": 28783 }, { "epoch": 0.8403842223584713, "grad_norm": 0.6928114646244787, "learning_rate": 8.867802108678022e-07, "loss": 0.6175, "step": 28784 }, { "epoch": 0.8404134185863187, "grad_norm": 0.6658534235860858, "learning_rate": 8.866180048661801e-07, "loss": 0.5528, "step": 28785 }, { "epoch": 0.840442614814166, "grad_norm": 0.6938177661549492, "learning_rate": 8.86455798864558e-07, "loss": 0.5782, "step": 28786 }, { "epoch": 0.8404718110420134, "grad_norm": 0.6897505317505271, "learning_rate": 8.86293592862936e-07, "loss": 0.6106, "step": 28787 }, { "epoch": 0.8405010072698608, "grad_norm": 0.694293461592478, "learning_rate": 8.861313868613139e-07, "loss": 0.6, "step": 28788 }, { "epoch": 0.8405302034977081, "grad_norm": 0.7607543887707071, "learning_rate": 8.859691808596919e-07, "loss": 0.6685, "step": 28789 }, { "epoch": 0.8405593997255555, "grad_norm": 0.7301836125589912, "learning_rate": 8.858069748580698e-07, "loss": 0.6454, "step": 28790 }, { "epoch": 0.8405885959534029, "grad_norm": 0.7319792807693948, "learning_rate": 8.856447688564477e-07, "loss": 0.6414, "step": 28791 }, { "epoch": 0.8406177921812502, "grad_norm": 0.7904667481192923, "learning_rate": 8.854825628548256e-07, "loss": 0.651, "step": 28792 }, { "epoch": 0.8406469884090976, "grad_norm": 0.7128505207604108, "learning_rate": 8.853203568532035e-07, "loss": 0.6182, "step": 28793 }, { "epoch": 0.8406761846369449, "grad_norm": 0.8297600559031033, "learning_rate": 8.851581508515817e-07, "loss": 0.7584, "step": 28794 }, { "epoch": 0.8407053808647923, "grad_norm": 0.7021183772219369, "learning_rate": 8.849959448499596e-07, "loss": 0.6627, "step": 28795 }, { "epoch": 0.8407345770926397, "grad_norm": 0.7347495919440444, "learning_rate": 8.848337388483375e-07, "loss": 0.646, "step": 28796 }, { "epoch": 0.840763773320487, "grad_norm": 0.6995405274445569, "learning_rate": 8.846715328467154e-07, "loss": 0.5825, "step": 28797 }, { "epoch": 0.8407929695483344, "grad_norm": 0.7237935691405036, "learning_rate": 8.845093268450933e-07, "loss": 0.6515, "step": 28798 }, { "epoch": 0.8408221657761817, "grad_norm": 0.7141958952964887, "learning_rate": 8.843471208434714e-07, "loss": 0.5903, "step": 28799 }, { "epoch": 0.8408513620040291, "grad_norm": 0.8837547048984761, "learning_rate": 8.841849148418493e-07, "loss": 0.7507, "step": 28800 }, { "epoch": 0.8408805582318765, "grad_norm": 0.6864072340320387, "learning_rate": 8.840227088402272e-07, "loss": 0.6111, "step": 28801 }, { "epoch": 0.8409097544597238, "grad_norm": 0.7051805734380603, "learning_rate": 8.838605028386051e-07, "loss": 0.6261, "step": 28802 }, { "epoch": 0.8409389506875712, "grad_norm": 0.7283227841763562, "learning_rate": 8.836982968369831e-07, "loss": 0.6312, "step": 28803 }, { "epoch": 0.8409681469154185, "grad_norm": 0.6711322008469397, "learning_rate": 8.83536090835361e-07, "loss": 0.5359, "step": 28804 }, { "epoch": 0.8409973431432659, "grad_norm": 0.7252623781649271, "learning_rate": 8.833738848337389e-07, "loss": 0.6733, "step": 28805 }, { "epoch": 0.8410265393711133, "grad_norm": 0.705838762394946, "learning_rate": 8.832116788321169e-07, "loss": 0.6154, "step": 28806 }, { "epoch": 0.8410557355989606, "grad_norm": 0.7112185275513648, "learning_rate": 8.830494728304948e-07, "loss": 0.6414, "step": 28807 }, { "epoch": 0.841084931826808, "grad_norm": 0.6781868982102359, "learning_rate": 8.828872668288728e-07, "loss": 0.565, "step": 28808 }, { "epoch": 0.8411141280546554, "grad_norm": 0.8055351871323347, "learning_rate": 8.827250608272507e-07, "loss": 0.6691, "step": 28809 }, { "epoch": 0.8411433242825027, "grad_norm": 0.6511921944518302, "learning_rate": 8.825628548256286e-07, "loss": 0.5544, "step": 28810 }, { "epoch": 0.8411725205103501, "grad_norm": 0.6971560910479323, "learning_rate": 8.824006488240065e-07, "loss": 0.6121, "step": 28811 }, { "epoch": 0.8412017167381974, "grad_norm": 0.7236527182373401, "learning_rate": 8.822384428223844e-07, "loss": 0.6656, "step": 28812 }, { "epoch": 0.8412309129660448, "grad_norm": 0.7173683871118838, "learning_rate": 8.820762368207625e-07, "loss": 0.6301, "step": 28813 }, { "epoch": 0.8412601091938922, "grad_norm": 0.7484040235881281, "learning_rate": 8.819140308191404e-07, "loss": 0.6979, "step": 28814 }, { "epoch": 0.8412893054217395, "grad_norm": 0.8335076354441153, "learning_rate": 8.817518248175183e-07, "loss": 0.616, "step": 28815 }, { "epoch": 0.8413185016495869, "grad_norm": 0.6998408780185377, "learning_rate": 8.815896188158962e-07, "loss": 0.649, "step": 28816 }, { "epoch": 0.8413476978774342, "grad_norm": 0.7319780167517338, "learning_rate": 8.814274128142742e-07, "loss": 0.6206, "step": 28817 }, { "epoch": 0.8413768941052816, "grad_norm": 0.7241949748871181, "learning_rate": 8.812652068126522e-07, "loss": 0.6427, "step": 28818 }, { "epoch": 0.841406090333129, "grad_norm": 0.6825692795280007, "learning_rate": 8.811030008110301e-07, "loss": 0.5596, "step": 28819 }, { "epoch": 0.8414352865609763, "grad_norm": 0.7541707835627447, "learning_rate": 8.80940794809408e-07, "loss": 0.6971, "step": 28820 }, { "epoch": 0.8414644827888237, "grad_norm": 0.7118611830475429, "learning_rate": 8.807785888077859e-07, "loss": 0.6268, "step": 28821 }, { "epoch": 0.841493679016671, "grad_norm": 0.6859532981727312, "learning_rate": 8.806163828061639e-07, "loss": 0.5696, "step": 28822 }, { "epoch": 0.8415228752445184, "grad_norm": 0.6736008151059228, "learning_rate": 8.804541768045418e-07, "loss": 0.5616, "step": 28823 }, { "epoch": 0.8415520714723658, "grad_norm": 0.7055407097239432, "learning_rate": 8.802919708029197e-07, "loss": 0.6229, "step": 28824 }, { "epoch": 0.8415812677002131, "grad_norm": 0.7499031072605058, "learning_rate": 8.801297648012977e-07, "loss": 0.6738, "step": 28825 }, { "epoch": 0.8416104639280605, "grad_norm": 0.6825357127185203, "learning_rate": 8.799675587996756e-07, "loss": 0.612, "step": 28826 }, { "epoch": 0.8416396601559079, "grad_norm": 0.7215096098213212, "learning_rate": 8.798053527980537e-07, "loss": 0.6104, "step": 28827 }, { "epoch": 0.8416688563837552, "grad_norm": 0.7482674638842409, "learning_rate": 8.796431467964316e-07, "loss": 0.6145, "step": 28828 }, { "epoch": 0.8416980526116026, "grad_norm": 0.7370999735312844, "learning_rate": 8.794809407948095e-07, "loss": 0.71, "step": 28829 }, { "epoch": 0.8417272488394499, "grad_norm": 0.6955102465922781, "learning_rate": 8.793187347931874e-07, "loss": 0.647, "step": 28830 }, { "epoch": 0.8417564450672973, "grad_norm": 0.7469789460755168, "learning_rate": 8.791565287915653e-07, "loss": 0.6347, "step": 28831 }, { "epoch": 0.8417856412951447, "grad_norm": 0.7247346398044584, "learning_rate": 8.789943227899434e-07, "loss": 0.5943, "step": 28832 }, { "epoch": 0.841814837522992, "grad_norm": 0.7841963646767667, "learning_rate": 8.788321167883213e-07, "loss": 0.7345, "step": 28833 }, { "epoch": 0.8418440337508394, "grad_norm": 0.7952951246404949, "learning_rate": 8.786699107866992e-07, "loss": 0.7259, "step": 28834 }, { "epoch": 0.8418732299786867, "grad_norm": 0.6917487182439384, "learning_rate": 8.785077047850771e-07, "loss": 0.6231, "step": 28835 }, { "epoch": 0.8419024262065341, "grad_norm": 0.6900441841764267, "learning_rate": 8.783454987834551e-07, "loss": 0.6217, "step": 28836 }, { "epoch": 0.8419316224343815, "grad_norm": 0.7813428733589527, "learning_rate": 8.781832927818331e-07, "loss": 0.763, "step": 28837 }, { "epoch": 0.8419608186622288, "grad_norm": 0.6996211386799189, "learning_rate": 8.78021086780211e-07, "loss": 0.6313, "step": 28838 }, { "epoch": 0.8419900148900762, "grad_norm": 0.7004461462630757, "learning_rate": 8.778588807785889e-07, "loss": 0.6185, "step": 28839 }, { "epoch": 0.8420192111179235, "grad_norm": 0.8067525298223515, "learning_rate": 8.776966747769668e-07, "loss": 0.6687, "step": 28840 }, { "epoch": 0.8420484073457709, "grad_norm": 0.7498285814270549, "learning_rate": 8.775344687753448e-07, "loss": 0.6552, "step": 28841 }, { "epoch": 0.8420776035736183, "grad_norm": 0.7129370580199809, "learning_rate": 8.773722627737227e-07, "loss": 0.5845, "step": 28842 }, { "epoch": 0.8421067998014656, "grad_norm": 0.7099046680996232, "learning_rate": 8.772100567721006e-07, "loss": 0.5715, "step": 28843 }, { "epoch": 0.842135996029313, "grad_norm": 0.703340628948755, "learning_rate": 8.770478507704785e-07, "loss": 0.612, "step": 28844 }, { "epoch": 0.8421651922571604, "grad_norm": 0.7444030180295318, "learning_rate": 8.768856447688565e-07, "loss": 0.7039, "step": 28845 }, { "epoch": 0.8421943884850077, "grad_norm": 0.7671873922993724, "learning_rate": 8.767234387672345e-07, "loss": 0.6838, "step": 28846 }, { "epoch": 0.8422235847128551, "grad_norm": 0.7380566294845567, "learning_rate": 8.765612327656124e-07, "loss": 0.6628, "step": 28847 }, { "epoch": 0.8422527809407024, "grad_norm": 0.6870382469779113, "learning_rate": 8.763990267639903e-07, "loss": 0.6037, "step": 28848 }, { "epoch": 0.8422819771685498, "grad_norm": 0.7175461410117858, "learning_rate": 8.762368207623682e-07, "loss": 0.6201, "step": 28849 }, { "epoch": 0.8423111733963972, "grad_norm": 0.6832857014154656, "learning_rate": 8.760746147607461e-07, "loss": 0.6047, "step": 28850 }, { "epoch": 0.8423403696242445, "grad_norm": 0.7594368641106711, "learning_rate": 8.759124087591242e-07, "loss": 0.7566, "step": 28851 }, { "epoch": 0.8423695658520919, "grad_norm": 0.7365025877611076, "learning_rate": 8.757502027575021e-07, "loss": 0.6521, "step": 28852 }, { "epoch": 0.8423987620799392, "grad_norm": 0.7173949201444262, "learning_rate": 8.7558799675588e-07, "loss": 0.6588, "step": 28853 }, { "epoch": 0.8424279583077866, "grad_norm": 0.6642046612055433, "learning_rate": 8.754257907542579e-07, "loss": 0.5568, "step": 28854 }, { "epoch": 0.842457154535634, "grad_norm": 0.7454621427019474, "learning_rate": 8.752635847526359e-07, "loss": 0.6805, "step": 28855 }, { "epoch": 0.8424863507634813, "grad_norm": 0.7225541856192561, "learning_rate": 8.751013787510138e-07, "loss": 0.6447, "step": 28856 }, { "epoch": 0.8425155469913287, "grad_norm": 0.7686964903542407, "learning_rate": 8.749391727493918e-07, "loss": 0.7133, "step": 28857 }, { "epoch": 0.842544743219176, "grad_norm": 0.6826076434111104, "learning_rate": 8.747769667477697e-07, "loss": 0.5852, "step": 28858 }, { "epoch": 0.8425739394470234, "grad_norm": 0.8199937203305808, "learning_rate": 8.746147607461476e-07, "loss": 0.6947, "step": 28859 }, { "epoch": 0.8426031356748708, "grad_norm": 0.6750156958928444, "learning_rate": 8.744525547445257e-07, "loss": 0.5389, "step": 28860 }, { "epoch": 0.8426323319027181, "grad_norm": 0.7199927594886502, "learning_rate": 8.742903487429036e-07, "loss": 0.5937, "step": 28861 }, { "epoch": 0.8426615281305655, "grad_norm": 0.6873627269329863, "learning_rate": 8.741281427412815e-07, "loss": 0.6506, "step": 28862 }, { "epoch": 0.8426907243584129, "grad_norm": 0.7380281801522778, "learning_rate": 8.739659367396594e-07, "loss": 0.6311, "step": 28863 }, { "epoch": 0.8427199205862602, "grad_norm": 0.7662322252721038, "learning_rate": 8.738037307380374e-07, "loss": 0.7092, "step": 28864 }, { "epoch": 0.8427491168141076, "grad_norm": 0.7014589581853771, "learning_rate": 8.736415247364154e-07, "loss": 0.625, "step": 28865 }, { "epoch": 0.8427783130419549, "grad_norm": 0.7212054982894543, "learning_rate": 8.734793187347933e-07, "loss": 0.5997, "step": 28866 }, { "epoch": 0.8428075092698023, "grad_norm": 0.742704879739089, "learning_rate": 8.733171127331712e-07, "loss": 0.6683, "step": 28867 }, { "epoch": 0.8428367054976497, "grad_norm": 0.85485722964805, "learning_rate": 8.731549067315491e-07, "loss": 0.7339, "step": 28868 }, { "epoch": 0.842865901725497, "grad_norm": 0.6771305703923017, "learning_rate": 8.729927007299272e-07, "loss": 0.5592, "step": 28869 }, { "epoch": 0.8428950979533444, "grad_norm": 0.6986372503481506, "learning_rate": 8.728304947283051e-07, "loss": 0.6148, "step": 28870 }, { "epoch": 0.8429242941811917, "grad_norm": 0.7244238159309259, "learning_rate": 8.72668288726683e-07, "loss": 0.6297, "step": 28871 }, { "epoch": 0.8429534904090391, "grad_norm": 0.7067141733282336, "learning_rate": 8.725060827250609e-07, "loss": 0.5861, "step": 28872 }, { "epoch": 0.8429826866368865, "grad_norm": 0.6977941337735244, "learning_rate": 8.723438767234388e-07, "loss": 0.6118, "step": 28873 }, { "epoch": 0.8430118828647338, "grad_norm": 0.7164740358963906, "learning_rate": 8.721816707218168e-07, "loss": 0.5981, "step": 28874 }, { "epoch": 0.8430410790925812, "grad_norm": 0.694611728223421, "learning_rate": 8.720194647201947e-07, "loss": 0.5866, "step": 28875 }, { "epoch": 0.8430702753204286, "grad_norm": 0.735819208718538, "learning_rate": 8.718572587185727e-07, "loss": 0.615, "step": 28876 }, { "epoch": 0.8430994715482759, "grad_norm": 0.6989987285509495, "learning_rate": 8.716950527169506e-07, "loss": 0.5823, "step": 28877 }, { "epoch": 0.8431286677761233, "grad_norm": 0.6570301061343985, "learning_rate": 8.715328467153285e-07, "loss": 0.5417, "step": 28878 }, { "epoch": 0.8431578640039706, "grad_norm": 0.7381097991429258, "learning_rate": 8.713706407137065e-07, "loss": 0.6503, "step": 28879 }, { "epoch": 0.843187060231818, "grad_norm": 0.7376448714570083, "learning_rate": 8.712084347120844e-07, "loss": 0.6728, "step": 28880 }, { "epoch": 0.8432162564596654, "grad_norm": 0.6985498187067396, "learning_rate": 8.710462287104623e-07, "loss": 0.5976, "step": 28881 }, { "epoch": 0.8432454526875127, "grad_norm": 0.7726928758068332, "learning_rate": 8.708840227088402e-07, "loss": 0.624, "step": 28882 }, { "epoch": 0.8432746489153601, "grad_norm": 0.7266476498330144, "learning_rate": 8.707218167072182e-07, "loss": 0.6518, "step": 28883 }, { "epoch": 0.8433038451432074, "grad_norm": 0.754787962278129, "learning_rate": 8.705596107055962e-07, "loss": 0.6785, "step": 28884 }, { "epoch": 0.8433330413710549, "grad_norm": 0.7259440602661645, "learning_rate": 8.703974047039741e-07, "loss": 0.6307, "step": 28885 }, { "epoch": 0.8433622375989023, "grad_norm": 0.7104841455572142, "learning_rate": 8.70235198702352e-07, "loss": 0.6235, "step": 28886 }, { "epoch": 0.8433914338267496, "grad_norm": 0.6955284447567419, "learning_rate": 8.700729927007299e-07, "loss": 0.6456, "step": 28887 }, { "epoch": 0.843420630054597, "grad_norm": 0.7615723420743814, "learning_rate": 8.69910786699108e-07, "loss": 0.6798, "step": 28888 }, { "epoch": 0.8434498262824444, "grad_norm": 0.7378636829292633, "learning_rate": 8.697485806974859e-07, "loss": 0.6878, "step": 28889 }, { "epoch": 0.8434790225102917, "grad_norm": 0.7143231410773708, "learning_rate": 8.695863746958638e-07, "loss": 0.6545, "step": 28890 }, { "epoch": 0.8435082187381391, "grad_norm": 0.690386649752437, "learning_rate": 8.694241686942417e-07, "loss": 0.594, "step": 28891 }, { "epoch": 0.8435374149659864, "grad_norm": 0.7089465797199052, "learning_rate": 8.692619626926196e-07, "loss": 0.6626, "step": 28892 }, { "epoch": 0.8435666111938338, "grad_norm": 0.754187747638412, "learning_rate": 8.690997566909977e-07, "loss": 0.641, "step": 28893 }, { "epoch": 0.8435958074216812, "grad_norm": 0.7131285532647823, "learning_rate": 8.689375506893756e-07, "loss": 0.6257, "step": 28894 }, { "epoch": 0.8436250036495285, "grad_norm": 0.7502301077282019, "learning_rate": 8.687753446877536e-07, "loss": 0.6695, "step": 28895 }, { "epoch": 0.8436541998773759, "grad_norm": 0.7280041180815267, "learning_rate": 8.686131386861315e-07, "loss": 0.6714, "step": 28896 }, { "epoch": 0.8436833961052232, "grad_norm": 0.6947841966933954, "learning_rate": 8.684509326845094e-07, "loss": 0.6013, "step": 28897 }, { "epoch": 0.8437125923330706, "grad_norm": 0.688185069091691, "learning_rate": 8.682887266828874e-07, "loss": 0.5982, "step": 28898 }, { "epoch": 0.843741788560918, "grad_norm": 0.6831618874447651, "learning_rate": 8.681265206812653e-07, "loss": 0.5699, "step": 28899 }, { "epoch": 0.8437709847887653, "grad_norm": 0.7465663121401582, "learning_rate": 8.679643146796432e-07, "loss": 0.7164, "step": 28900 }, { "epoch": 0.8438001810166127, "grad_norm": 0.72520569608853, "learning_rate": 8.678021086780211e-07, "loss": 0.6489, "step": 28901 }, { "epoch": 0.84382937724446, "grad_norm": 0.6600483398736648, "learning_rate": 8.676399026763992e-07, "loss": 0.5675, "step": 28902 }, { "epoch": 0.8438585734723074, "grad_norm": 0.7232054504166301, "learning_rate": 8.674776966747771e-07, "loss": 0.608, "step": 28903 }, { "epoch": 0.8438877697001548, "grad_norm": 0.7341330739753638, "learning_rate": 8.67315490673155e-07, "loss": 0.6788, "step": 28904 }, { "epoch": 0.8439169659280021, "grad_norm": 0.6741986438309503, "learning_rate": 8.671532846715329e-07, "loss": 0.5754, "step": 28905 }, { "epoch": 0.8439461621558495, "grad_norm": 0.703345638515374, "learning_rate": 8.669910786699108e-07, "loss": 0.6021, "step": 28906 }, { "epoch": 0.8439753583836969, "grad_norm": 0.7155843689068206, "learning_rate": 8.668288726682889e-07, "loss": 0.5893, "step": 28907 }, { "epoch": 0.8440045546115442, "grad_norm": 0.7013720693086694, "learning_rate": 8.666666666666668e-07, "loss": 0.6107, "step": 28908 }, { "epoch": 0.8440337508393916, "grad_norm": 0.8019198121540512, "learning_rate": 8.665044606650447e-07, "loss": 0.7124, "step": 28909 }, { "epoch": 0.844062947067239, "grad_norm": 0.6869783796694411, "learning_rate": 8.663422546634226e-07, "loss": 0.6138, "step": 28910 }, { "epoch": 0.8440921432950863, "grad_norm": 0.6993449770161811, "learning_rate": 8.661800486618005e-07, "loss": 0.5805, "step": 28911 }, { "epoch": 0.8441213395229337, "grad_norm": 0.7491078679420367, "learning_rate": 8.660178426601785e-07, "loss": 0.6875, "step": 28912 }, { "epoch": 0.844150535750781, "grad_norm": 0.7763504038672738, "learning_rate": 8.658556366585564e-07, "loss": 0.6794, "step": 28913 }, { "epoch": 0.8441797319786284, "grad_norm": 0.6991788816956926, "learning_rate": 8.656934306569344e-07, "loss": 0.6227, "step": 28914 }, { "epoch": 0.8442089282064758, "grad_norm": 0.7435333880165069, "learning_rate": 8.655312246553123e-07, "loss": 0.6517, "step": 28915 }, { "epoch": 0.8442381244343231, "grad_norm": 0.7722250057420872, "learning_rate": 8.653690186536902e-07, "loss": 0.6374, "step": 28916 }, { "epoch": 0.8442673206621705, "grad_norm": 0.7534434576629898, "learning_rate": 8.652068126520682e-07, "loss": 0.6328, "step": 28917 }, { "epoch": 0.8442965168900178, "grad_norm": 0.7392919732775027, "learning_rate": 8.650446066504461e-07, "loss": 0.6444, "step": 28918 }, { "epoch": 0.8443257131178652, "grad_norm": 0.7769283212985243, "learning_rate": 8.64882400648824e-07, "loss": 0.6606, "step": 28919 }, { "epoch": 0.8443549093457126, "grad_norm": 0.709416643480322, "learning_rate": 8.647201946472019e-07, "loss": 0.6629, "step": 28920 }, { "epoch": 0.8443841055735599, "grad_norm": 0.7660115116318503, "learning_rate": 8.6455798864558e-07, "loss": 0.6985, "step": 28921 }, { "epoch": 0.8444133018014073, "grad_norm": 0.7939142998550144, "learning_rate": 8.643957826439579e-07, "loss": 0.7235, "step": 28922 }, { "epoch": 0.8444424980292546, "grad_norm": 0.6977245261957454, "learning_rate": 8.642335766423358e-07, "loss": 0.6023, "step": 28923 }, { "epoch": 0.844471694257102, "grad_norm": 0.7300807247285148, "learning_rate": 8.640713706407137e-07, "loss": 0.7281, "step": 28924 }, { "epoch": 0.8445008904849494, "grad_norm": 0.6968203005442983, "learning_rate": 8.639091646390916e-07, "loss": 0.6066, "step": 28925 }, { "epoch": 0.8445300867127967, "grad_norm": 0.7318104766763113, "learning_rate": 8.637469586374698e-07, "loss": 0.6215, "step": 28926 }, { "epoch": 0.8445592829406441, "grad_norm": 0.7172264056919385, "learning_rate": 8.635847526358477e-07, "loss": 0.6382, "step": 28927 }, { "epoch": 0.8445884791684914, "grad_norm": 0.7361548063599019, "learning_rate": 8.634225466342256e-07, "loss": 0.6455, "step": 28928 }, { "epoch": 0.8446176753963388, "grad_norm": 0.711311557792429, "learning_rate": 8.632603406326035e-07, "loss": 0.6253, "step": 28929 }, { "epoch": 0.8446468716241862, "grad_norm": 0.7120021563483468, "learning_rate": 8.630981346309814e-07, "loss": 0.6107, "step": 28930 }, { "epoch": 0.8446760678520335, "grad_norm": 0.7091937113149628, "learning_rate": 8.629359286293594e-07, "loss": 0.6063, "step": 28931 }, { "epoch": 0.8447052640798809, "grad_norm": 0.7271875402396386, "learning_rate": 8.627737226277373e-07, "loss": 0.6845, "step": 28932 }, { "epoch": 0.8447344603077283, "grad_norm": 0.7224301944462136, "learning_rate": 8.626115166261152e-07, "loss": 0.6075, "step": 28933 }, { "epoch": 0.8447636565355756, "grad_norm": 0.723759376610504, "learning_rate": 8.624493106244932e-07, "loss": 0.6994, "step": 28934 }, { "epoch": 0.844792852763423, "grad_norm": 0.7480345119307189, "learning_rate": 8.622871046228712e-07, "loss": 0.6082, "step": 28935 }, { "epoch": 0.8448220489912703, "grad_norm": 0.7354212304726703, "learning_rate": 8.621248986212491e-07, "loss": 0.677, "step": 28936 }, { "epoch": 0.8448512452191177, "grad_norm": 0.7122263839337072, "learning_rate": 8.61962692619627e-07, "loss": 0.686, "step": 28937 }, { "epoch": 0.8448804414469651, "grad_norm": 0.7262896655882695, "learning_rate": 8.618004866180049e-07, "loss": 0.6287, "step": 28938 }, { "epoch": 0.8449096376748124, "grad_norm": 0.7166027565234595, "learning_rate": 8.616382806163828e-07, "loss": 0.6129, "step": 28939 }, { "epoch": 0.8449388339026598, "grad_norm": 0.7216084946525915, "learning_rate": 8.614760746147609e-07, "loss": 0.6314, "step": 28940 }, { "epoch": 0.8449680301305071, "grad_norm": 0.7309708593652859, "learning_rate": 8.613138686131388e-07, "loss": 0.6003, "step": 28941 }, { "epoch": 0.8449972263583545, "grad_norm": 0.714284684189819, "learning_rate": 8.611516626115167e-07, "loss": 0.654, "step": 28942 }, { "epoch": 0.8450264225862019, "grad_norm": 0.7630010141420694, "learning_rate": 8.609894566098946e-07, "loss": 0.6595, "step": 28943 }, { "epoch": 0.8450556188140492, "grad_norm": 0.7095372670678806, "learning_rate": 8.608272506082725e-07, "loss": 0.6273, "step": 28944 }, { "epoch": 0.8450848150418966, "grad_norm": 0.6663825046992042, "learning_rate": 8.606650446066506e-07, "loss": 0.5515, "step": 28945 }, { "epoch": 0.845114011269744, "grad_norm": 0.7619361113021104, "learning_rate": 8.605028386050285e-07, "loss": 0.6827, "step": 28946 }, { "epoch": 0.8451432074975913, "grad_norm": 0.7161421306218493, "learning_rate": 8.603406326034064e-07, "loss": 0.6134, "step": 28947 }, { "epoch": 0.8451724037254387, "grad_norm": 0.7526724665940189, "learning_rate": 8.601784266017843e-07, "loss": 0.6808, "step": 28948 }, { "epoch": 0.845201599953286, "grad_norm": 0.7793877043032883, "learning_rate": 8.600162206001622e-07, "loss": 0.6305, "step": 28949 }, { "epoch": 0.8452307961811334, "grad_norm": 0.697125740257534, "learning_rate": 8.598540145985402e-07, "loss": 0.5886, "step": 28950 }, { "epoch": 0.8452599924089808, "grad_norm": 0.7243087188982431, "learning_rate": 8.596918085969181e-07, "loss": 0.639, "step": 28951 }, { "epoch": 0.8452891886368281, "grad_norm": 0.7619297198933639, "learning_rate": 8.59529602595296e-07, "loss": 0.6857, "step": 28952 }, { "epoch": 0.8453183848646755, "grad_norm": 0.7794246899397245, "learning_rate": 8.59367396593674e-07, "loss": 0.7232, "step": 28953 }, { "epoch": 0.8453475810925228, "grad_norm": 0.7492756259955712, "learning_rate": 8.59205190592052e-07, "loss": 0.6636, "step": 28954 }, { "epoch": 0.8453767773203702, "grad_norm": 0.7022732270357237, "learning_rate": 8.5904298459043e-07, "loss": 0.5788, "step": 28955 }, { "epoch": 0.8454059735482176, "grad_norm": 0.7348290893479346, "learning_rate": 8.588807785888078e-07, "loss": 0.635, "step": 28956 }, { "epoch": 0.8454351697760649, "grad_norm": 0.7286545199958716, "learning_rate": 8.587185725871857e-07, "loss": 0.6711, "step": 28957 }, { "epoch": 0.8454643660039123, "grad_norm": 0.7067738744415325, "learning_rate": 8.585563665855636e-07, "loss": 0.6329, "step": 28958 }, { "epoch": 0.8454935622317596, "grad_norm": 0.751508663627572, "learning_rate": 8.583941605839418e-07, "loss": 0.7089, "step": 28959 }, { "epoch": 0.845522758459607, "grad_norm": 0.7307055721662195, "learning_rate": 8.582319545823197e-07, "loss": 0.5959, "step": 28960 }, { "epoch": 0.8455519546874544, "grad_norm": 0.9788803861335474, "learning_rate": 8.580697485806976e-07, "loss": 0.6724, "step": 28961 }, { "epoch": 0.8455811509153017, "grad_norm": 0.6667741581347272, "learning_rate": 8.579075425790755e-07, "loss": 0.5724, "step": 28962 }, { "epoch": 0.8456103471431491, "grad_norm": 0.6996021083829119, "learning_rate": 8.577453365774534e-07, "loss": 0.5703, "step": 28963 }, { "epoch": 0.8456395433709964, "grad_norm": 0.7157749665159306, "learning_rate": 8.575831305758314e-07, "loss": 0.6327, "step": 28964 }, { "epoch": 0.8456687395988438, "grad_norm": 0.8639888164963927, "learning_rate": 8.574209245742094e-07, "loss": 0.7304, "step": 28965 }, { "epoch": 0.8456979358266912, "grad_norm": 0.7112433669410844, "learning_rate": 8.572587185725873e-07, "loss": 0.5906, "step": 28966 }, { "epoch": 0.8457271320545385, "grad_norm": 0.6646396271526229, "learning_rate": 8.570965125709652e-07, "loss": 0.5707, "step": 28967 }, { "epoch": 0.8457563282823859, "grad_norm": 0.7615253523971758, "learning_rate": 8.569343065693432e-07, "loss": 0.6849, "step": 28968 }, { "epoch": 0.8457855245102333, "grad_norm": 0.6927892589422194, "learning_rate": 8.567721005677211e-07, "loss": 0.582, "step": 28969 }, { "epoch": 0.8458147207380806, "grad_norm": 0.7485245635892842, "learning_rate": 8.56609894566099e-07, "loss": 0.7048, "step": 28970 }, { "epoch": 0.845843916965928, "grad_norm": 0.7268206898454131, "learning_rate": 8.564476885644769e-07, "loss": 0.6728, "step": 28971 }, { "epoch": 0.8458731131937753, "grad_norm": 0.7332708342236656, "learning_rate": 8.562854825628549e-07, "loss": 0.6809, "step": 28972 }, { "epoch": 0.8459023094216227, "grad_norm": 0.6795834779647966, "learning_rate": 8.561232765612329e-07, "loss": 0.5361, "step": 28973 }, { "epoch": 0.8459315056494701, "grad_norm": 0.6741454111465175, "learning_rate": 8.559610705596108e-07, "loss": 0.5729, "step": 28974 }, { "epoch": 0.8459607018773174, "grad_norm": 0.726964772150475, "learning_rate": 8.557988645579887e-07, "loss": 0.6529, "step": 28975 }, { "epoch": 0.8459898981051648, "grad_norm": 0.6848975519310379, "learning_rate": 8.556366585563666e-07, "loss": 0.5916, "step": 28976 }, { "epoch": 0.8460190943330121, "grad_norm": 0.7277139742231287, "learning_rate": 8.554744525547445e-07, "loss": 0.6406, "step": 28977 }, { "epoch": 0.8460482905608595, "grad_norm": 0.7681666020651391, "learning_rate": 8.553122465531226e-07, "loss": 0.6215, "step": 28978 }, { "epoch": 0.8460774867887069, "grad_norm": 0.7309204612615222, "learning_rate": 8.551500405515005e-07, "loss": 0.7151, "step": 28979 }, { "epoch": 0.8461066830165542, "grad_norm": 0.689534661320827, "learning_rate": 8.549878345498784e-07, "loss": 0.5787, "step": 28980 }, { "epoch": 0.8461358792444016, "grad_norm": 0.7512343757942411, "learning_rate": 8.548256285482563e-07, "loss": 0.6657, "step": 28981 }, { "epoch": 0.846165075472249, "grad_norm": 0.7063554446393862, "learning_rate": 8.546634225466342e-07, "loss": 0.6384, "step": 28982 }, { "epoch": 0.8461942717000963, "grad_norm": 0.7298927648781124, "learning_rate": 8.545012165450122e-07, "loss": 0.6517, "step": 28983 }, { "epoch": 0.8462234679279437, "grad_norm": 0.8407298876440248, "learning_rate": 8.543390105433902e-07, "loss": 0.7647, "step": 28984 }, { "epoch": 0.846252664155791, "grad_norm": 0.6542481704819747, "learning_rate": 8.541768045417681e-07, "loss": 0.5197, "step": 28985 }, { "epoch": 0.8462818603836384, "grad_norm": 0.7653556168077179, "learning_rate": 8.54014598540146e-07, "loss": 0.7251, "step": 28986 }, { "epoch": 0.8463110566114858, "grad_norm": 0.7581510848732225, "learning_rate": 8.53852392538524e-07, "loss": 0.6976, "step": 28987 }, { "epoch": 0.8463402528393331, "grad_norm": 0.7242927134805285, "learning_rate": 8.53690186536902e-07, "loss": 0.672, "step": 28988 }, { "epoch": 0.8463694490671805, "grad_norm": 0.7035892426307365, "learning_rate": 8.535279805352798e-07, "loss": 0.6428, "step": 28989 }, { "epoch": 0.8463986452950278, "grad_norm": 0.8166439159222322, "learning_rate": 8.533657745336577e-07, "loss": 0.681, "step": 28990 }, { "epoch": 0.8464278415228752, "grad_norm": 0.7342042894382028, "learning_rate": 8.532035685320357e-07, "loss": 0.6767, "step": 28991 }, { "epoch": 0.8464570377507226, "grad_norm": 0.6662908483094164, "learning_rate": 8.530413625304138e-07, "loss": 0.5406, "step": 28992 }, { "epoch": 0.8464862339785699, "grad_norm": 0.7718770030954882, "learning_rate": 8.528791565287917e-07, "loss": 0.7394, "step": 28993 }, { "epoch": 0.8465154302064173, "grad_norm": 0.8098314009952624, "learning_rate": 8.527169505271696e-07, "loss": 0.6972, "step": 28994 }, { "epoch": 0.8465446264342646, "grad_norm": 0.7113233863818783, "learning_rate": 8.525547445255475e-07, "loss": 0.6091, "step": 28995 }, { "epoch": 0.846573822662112, "grad_norm": 0.6581011764612184, "learning_rate": 8.523925385239254e-07, "loss": 0.5552, "step": 28996 }, { "epoch": 0.8466030188899594, "grad_norm": 0.6935914036919306, "learning_rate": 8.522303325223035e-07, "loss": 0.5931, "step": 28997 }, { "epoch": 0.8466322151178067, "grad_norm": 0.7333254751332804, "learning_rate": 8.520681265206814e-07, "loss": 0.6804, "step": 28998 }, { "epoch": 0.8466614113456541, "grad_norm": 0.7169762726179926, "learning_rate": 8.519059205190593e-07, "loss": 0.5996, "step": 28999 }, { "epoch": 0.8466906075735015, "grad_norm": 0.7005027908992013, "learning_rate": 8.517437145174372e-07, "loss": 0.6111, "step": 29000 }, { "epoch": 0.8467198038013488, "grad_norm": 0.7178170288458512, "learning_rate": 8.515815085158151e-07, "loss": 0.5435, "step": 29001 }, { "epoch": 0.8467490000291962, "grad_norm": 0.6887030526260182, "learning_rate": 8.514193025141931e-07, "loss": 0.6222, "step": 29002 }, { "epoch": 0.8467781962570435, "grad_norm": 0.6794203394896163, "learning_rate": 8.512570965125711e-07, "loss": 0.6187, "step": 29003 }, { "epoch": 0.8468073924848909, "grad_norm": 0.7465507432304079, "learning_rate": 8.51094890510949e-07, "loss": 0.6856, "step": 29004 }, { "epoch": 0.8468365887127384, "grad_norm": 0.7389267781023235, "learning_rate": 8.509326845093269e-07, "loss": 0.6862, "step": 29005 }, { "epoch": 0.8468657849405857, "grad_norm": 0.7912455787996341, "learning_rate": 8.507704785077049e-07, "loss": 0.6811, "step": 29006 }, { "epoch": 0.8468949811684331, "grad_norm": 0.7249643508161594, "learning_rate": 8.506082725060828e-07, "loss": 0.6078, "step": 29007 }, { "epoch": 0.8469241773962805, "grad_norm": 0.6523582853571845, "learning_rate": 8.504460665044607e-07, "loss": 0.5559, "step": 29008 }, { "epoch": 0.8469533736241278, "grad_norm": 0.7795530381606768, "learning_rate": 8.502838605028386e-07, "loss": 0.6533, "step": 29009 }, { "epoch": 0.8469825698519752, "grad_norm": 0.7516413598209933, "learning_rate": 8.501216545012165e-07, "loss": 0.6497, "step": 29010 }, { "epoch": 0.8470117660798225, "grad_norm": 0.7463967866413609, "learning_rate": 8.499594484995946e-07, "loss": 0.6561, "step": 29011 }, { "epoch": 0.8470409623076699, "grad_norm": 0.7411716465327687, "learning_rate": 8.497972424979725e-07, "loss": 0.6266, "step": 29012 }, { "epoch": 0.8470701585355173, "grad_norm": 0.7710637122304989, "learning_rate": 8.496350364963504e-07, "loss": 0.6819, "step": 29013 }, { "epoch": 0.8470993547633646, "grad_norm": 0.7165077378345348, "learning_rate": 8.494728304947283e-07, "loss": 0.6308, "step": 29014 }, { "epoch": 0.847128550991212, "grad_norm": 0.7326979359338985, "learning_rate": 8.493106244931062e-07, "loss": 0.6364, "step": 29015 }, { "epoch": 0.8471577472190593, "grad_norm": 0.6682992362658927, "learning_rate": 8.491484184914843e-07, "loss": 0.5459, "step": 29016 }, { "epoch": 0.8471869434469067, "grad_norm": 0.6930220387923139, "learning_rate": 8.489862124898622e-07, "loss": 0.5722, "step": 29017 }, { "epoch": 0.8472161396747541, "grad_norm": 0.7338277934884797, "learning_rate": 8.488240064882401e-07, "loss": 0.6851, "step": 29018 }, { "epoch": 0.8472453359026014, "grad_norm": 0.7107756198947768, "learning_rate": 8.48661800486618e-07, "loss": 0.6437, "step": 29019 }, { "epoch": 0.8472745321304488, "grad_norm": 0.6819474615174025, "learning_rate": 8.48499594484996e-07, "loss": 0.5636, "step": 29020 }, { "epoch": 0.8473037283582961, "grad_norm": 0.7011009362856743, "learning_rate": 8.48337388483374e-07, "loss": 0.6686, "step": 29021 }, { "epoch": 0.8473329245861435, "grad_norm": 0.7414264424112362, "learning_rate": 8.481751824817518e-07, "loss": 0.7, "step": 29022 }, { "epoch": 0.8473621208139909, "grad_norm": 0.6745542548656837, "learning_rate": 8.480129764801299e-07, "loss": 0.5602, "step": 29023 }, { "epoch": 0.8473913170418382, "grad_norm": 0.6931654839148224, "learning_rate": 8.478507704785078e-07, "loss": 0.5652, "step": 29024 }, { "epoch": 0.8474205132696856, "grad_norm": 0.7104335092157987, "learning_rate": 8.476885644768858e-07, "loss": 0.6063, "step": 29025 }, { "epoch": 0.847449709497533, "grad_norm": 0.6865825012582757, "learning_rate": 8.475263584752637e-07, "loss": 0.5655, "step": 29026 }, { "epoch": 0.8474789057253803, "grad_norm": 0.7120480030208839, "learning_rate": 8.473641524736416e-07, "loss": 0.6579, "step": 29027 }, { "epoch": 0.8475081019532277, "grad_norm": 0.698301959941445, "learning_rate": 8.472019464720195e-07, "loss": 0.6234, "step": 29028 }, { "epoch": 0.847537298181075, "grad_norm": 0.6904107368513801, "learning_rate": 8.470397404703974e-07, "loss": 0.6038, "step": 29029 }, { "epoch": 0.8475664944089224, "grad_norm": 0.71156194508356, "learning_rate": 8.468775344687755e-07, "loss": 0.6731, "step": 29030 }, { "epoch": 0.8475956906367698, "grad_norm": 0.6441902160471934, "learning_rate": 8.467153284671534e-07, "loss": 0.5282, "step": 29031 }, { "epoch": 0.8476248868646171, "grad_norm": 0.765464055781968, "learning_rate": 8.465531224655313e-07, "loss": 0.6912, "step": 29032 }, { "epoch": 0.8476540830924645, "grad_norm": 0.7112186336166764, "learning_rate": 8.463909164639092e-07, "loss": 0.6164, "step": 29033 }, { "epoch": 0.8476832793203118, "grad_norm": 0.7968443620471982, "learning_rate": 8.462287104622871e-07, "loss": 0.61, "step": 29034 }, { "epoch": 0.8477124755481592, "grad_norm": 0.7636034153272588, "learning_rate": 8.460665044606652e-07, "loss": 0.5803, "step": 29035 }, { "epoch": 0.8477416717760066, "grad_norm": 0.7113664186801595, "learning_rate": 8.459042984590431e-07, "loss": 0.6456, "step": 29036 }, { "epoch": 0.8477708680038539, "grad_norm": 0.7727367516930937, "learning_rate": 8.45742092457421e-07, "loss": 0.7176, "step": 29037 }, { "epoch": 0.8478000642317013, "grad_norm": 0.7036647383825021, "learning_rate": 8.455798864557989e-07, "loss": 0.638, "step": 29038 }, { "epoch": 0.8478292604595487, "grad_norm": 0.6935483324222148, "learning_rate": 8.454176804541769e-07, "loss": 0.5904, "step": 29039 }, { "epoch": 0.847858456687396, "grad_norm": 0.7290180899544818, "learning_rate": 8.452554744525548e-07, "loss": 0.6684, "step": 29040 }, { "epoch": 0.8478876529152434, "grad_norm": 0.695725427241299, "learning_rate": 8.450932684509327e-07, "loss": 0.5595, "step": 29041 }, { "epoch": 0.8479168491430907, "grad_norm": 0.7414218030948199, "learning_rate": 8.449310624493107e-07, "loss": 0.6618, "step": 29042 }, { "epoch": 0.8479460453709381, "grad_norm": 0.7702528644119818, "learning_rate": 8.447688564476886e-07, "loss": 0.7263, "step": 29043 }, { "epoch": 0.8479752415987855, "grad_norm": 0.7636985750582242, "learning_rate": 8.446066504460666e-07, "loss": 0.6003, "step": 29044 }, { "epoch": 0.8480044378266328, "grad_norm": 0.8062734580680893, "learning_rate": 8.444444444444445e-07, "loss": 0.8328, "step": 29045 }, { "epoch": 0.8480336340544802, "grad_norm": 0.7142200104406449, "learning_rate": 8.442822384428224e-07, "loss": 0.6363, "step": 29046 }, { "epoch": 0.8480628302823275, "grad_norm": 0.7271115230800905, "learning_rate": 8.441200324412003e-07, "loss": 0.6712, "step": 29047 }, { "epoch": 0.8480920265101749, "grad_norm": 0.6636330849290563, "learning_rate": 8.439578264395782e-07, "loss": 0.5172, "step": 29048 }, { "epoch": 0.8481212227380223, "grad_norm": 0.7102246991546123, "learning_rate": 8.437956204379563e-07, "loss": 0.6419, "step": 29049 }, { "epoch": 0.8481504189658696, "grad_norm": 0.7401639637555387, "learning_rate": 8.436334144363342e-07, "loss": 0.6327, "step": 29050 }, { "epoch": 0.848179615193717, "grad_norm": 0.7178187048840053, "learning_rate": 8.434712084347121e-07, "loss": 0.5653, "step": 29051 }, { "epoch": 0.8482088114215643, "grad_norm": 0.7089470780109678, "learning_rate": 8.4330900243309e-07, "loss": 0.6153, "step": 29052 }, { "epoch": 0.8482380076494117, "grad_norm": 0.7192513807132276, "learning_rate": 8.43146796431468e-07, "loss": 0.6436, "step": 29053 }, { "epoch": 0.8482672038772591, "grad_norm": 0.7033829214622441, "learning_rate": 8.429845904298461e-07, "loss": 0.6038, "step": 29054 }, { "epoch": 0.8482964001051064, "grad_norm": 0.7226805439936875, "learning_rate": 8.42822384428224e-07, "loss": 0.6403, "step": 29055 }, { "epoch": 0.8483255963329538, "grad_norm": 0.7316127387993867, "learning_rate": 8.426601784266019e-07, "loss": 0.6224, "step": 29056 }, { "epoch": 0.8483547925608012, "grad_norm": 0.6904330069478931, "learning_rate": 8.424979724249798e-07, "loss": 0.5738, "step": 29057 }, { "epoch": 0.8483839887886485, "grad_norm": 0.6810724589089273, "learning_rate": 8.423357664233578e-07, "loss": 0.6229, "step": 29058 }, { "epoch": 0.8484131850164959, "grad_norm": 0.7409390063360155, "learning_rate": 8.421735604217357e-07, "loss": 0.682, "step": 29059 }, { "epoch": 0.8484423812443432, "grad_norm": 0.7170622666335044, "learning_rate": 8.420113544201136e-07, "loss": 0.6021, "step": 29060 }, { "epoch": 0.8484715774721906, "grad_norm": 0.7283354147533375, "learning_rate": 8.418491484184916e-07, "loss": 0.6744, "step": 29061 }, { "epoch": 0.848500773700038, "grad_norm": 0.7134043598983799, "learning_rate": 8.416869424168695e-07, "loss": 0.6339, "step": 29062 }, { "epoch": 0.8485299699278853, "grad_norm": 0.7624299833056517, "learning_rate": 8.415247364152475e-07, "loss": 0.6668, "step": 29063 }, { "epoch": 0.8485591661557327, "grad_norm": 0.737370382364212, "learning_rate": 8.413625304136254e-07, "loss": 0.645, "step": 29064 }, { "epoch": 0.84858836238358, "grad_norm": 0.7405716207927894, "learning_rate": 8.412003244120033e-07, "loss": 0.6335, "step": 29065 }, { "epoch": 0.8486175586114274, "grad_norm": 0.7138541090108295, "learning_rate": 8.410381184103812e-07, "loss": 0.6427, "step": 29066 }, { "epoch": 0.8486467548392748, "grad_norm": 0.751534164991938, "learning_rate": 8.408759124087591e-07, "loss": 0.628, "step": 29067 }, { "epoch": 0.8486759510671221, "grad_norm": 0.6838035924501913, "learning_rate": 8.407137064071372e-07, "loss": 0.5959, "step": 29068 }, { "epoch": 0.8487051472949695, "grad_norm": 0.6840177767079917, "learning_rate": 8.405515004055151e-07, "loss": 0.5787, "step": 29069 }, { "epoch": 0.8487343435228168, "grad_norm": 0.7512714513282465, "learning_rate": 8.40389294403893e-07, "loss": 0.695, "step": 29070 }, { "epoch": 0.8487635397506642, "grad_norm": 0.6878921388536828, "learning_rate": 8.402270884022709e-07, "loss": 0.5679, "step": 29071 }, { "epoch": 0.8487927359785116, "grad_norm": 1.067110718929132, "learning_rate": 8.400648824006489e-07, "loss": 0.7162, "step": 29072 }, { "epoch": 0.8488219322063589, "grad_norm": 0.6662782247244402, "learning_rate": 8.399026763990269e-07, "loss": 0.5237, "step": 29073 }, { "epoch": 0.8488511284342063, "grad_norm": 0.7208644953752806, "learning_rate": 8.397404703974048e-07, "loss": 0.6333, "step": 29074 }, { "epoch": 0.8488803246620537, "grad_norm": 0.7177595231056179, "learning_rate": 8.395782643957827e-07, "loss": 0.6603, "step": 29075 }, { "epoch": 0.848909520889901, "grad_norm": 0.7509072711169018, "learning_rate": 8.394160583941606e-07, "loss": 0.6934, "step": 29076 }, { "epoch": 0.8489387171177484, "grad_norm": 0.80325449891562, "learning_rate": 8.392538523925386e-07, "loss": 0.7141, "step": 29077 }, { "epoch": 0.8489679133455957, "grad_norm": 0.7388655294612, "learning_rate": 8.390916463909165e-07, "loss": 0.6608, "step": 29078 }, { "epoch": 0.8489971095734431, "grad_norm": 0.7054491861540577, "learning_rate": 8.389294403892944e-07, "loss": 0.6281, "step": 29079 }, { "epoch": 0.8490263058012905, "grad_norm": 0.7438451062150979, "learning_rate": 8.387672343876724e-07, "loss": 0.683, "step": 29080 }, { "epoch": 0.8490555020291378, "grad_norm": 0.7182476134076882, "learning_rate": 8.386050283860503e-07, "loss": 0.5966, "step": 29081 }, { "epoch": 0.8490846982569852, "grad_norm": 0.863146883186522, "learning_rate": 8.384428223844283e-07, "loss": 0.6, "step": 29082 }, { "epoch": 0.8491138944848325, "grad_norm": 0.7278012682977071, "learning_rate": 8.382806163828062e-07, "loss": 0.6707, "step": 29083 }, { "epoch": 0.8491430907126799, "grad_norm": 0.7081633582127626, "learning_rate": 8.381184103811841e-07, "loss": 0.6316, "step": 29084 }, { "epoch": 0.8491722869405273, "grad_norm": 0.7418318728915286, "learning_rate": 8.37956204379562e-07, "loss": 0.657, "step": 29085 }, { "epoch": 0.8492014831683746, "grad_norm": 0.682242046027669, "learning_rate": 8.377939983779402e-07, "loss": 0.5587, "step": 29086 }, { "epoch": 0.849230679396222, "grad_norm": 0.6460902339839756, "learning_rate": 8.376317923763181e-07, "loss": 0.4853, "step": 29087 }, { "epoch": 0.8492598756240693, "grad_norm": 0.7006898023862984, "learning_rate": 8.37469586374696e-07, "loss": 0.6039, "step": 29088 }, { "epoch": 0.8492890718519167, "grad_norm": 0.7554986004726006, "learning_rate": 8.373073803730739e-07, "loss": 0.7041, "step": 29089 }, { "epoch": 0.8493182680797641, "grad_norm": 0.7177752099406846, "learning_rate": 8.371451743714518e-07, "loss": 0.6273, "step": 29090 }, { "epoch": 0.8493474643076114, "grad_norm": 0.7143088885440475, "learning_rate": 8.369829683698298e-07, "loss": 0.6359, "step": 29091 }, { "epoch": 0.8493766605354588, "grad_norm": 0.7612529392255795, "learning_rate": 8.368207623682078e-07, "loss": 0.5649, "step": 29092 }, { "epoch": 0.8494058567633062, "grad_norm": 0.7619611393583078, "learning_rate": 8.366585563665857e-07, "loss": 0.6491, "step": 29093 }, { "epoch": 0.8494350529911535, "grad_norm": 0.787024598859322, "learning_rate": 8.364963503649636e-07, "loss": 0.7133, "step": 29094 }, { "epoch": 0.8494642492190009, "grad_norm": 0.7984242558401363, "learning_rate": 8.363341443633415e-07, "loss": 0.7148, "step": 29095 }, { "epoch": 0.8494934454468482, "grad_norm": 0.767902456593118, "learning_rate": 8.361719383617195e-07, "loss": 0.6468, "step": 29096 }, { "epoch": 0.8495226416746956, "grad_norm": 0.9756944050079497, "learning_rate": 8.360097323600974e-07, "loss": 0.8076, "step": 29097 }, { "epoch": 0.849551837902543, "grad_norm": 0.7400933985146726, "learning_rate": 8.358475263584753e-07, "loss": 0.6722, "step": 29098 }, { "epoch": 0.8495810341303903, "grad_norm": 0.7163433832556252, "learning_rate": 8.356853203568532e-07, "loss": 0.6098, "step": 29099 }, { "epoch": 0.8496102303582377, "grad_norm": 0.7135435013402163, "learning_rate": 8.355231143552312e-07, "loss": 0.6091, "step": 29100 }, { "epoch": 0.849639426586085, "grad_norm": 0.6801508531947149, "learning_rate": 8.353609083536092e-07, "loss": 0.6047, "step": 29101 }, { "epoch": 0.8496686228139324, "grad_norm": 0.7607323788175212, "learning_rate": 8.351987023519871e-07, "loss": 0.624, "step": 29102 }, { "epoch": 0.8496978190417798, "grad_norm": 0.7545186130985854, "learning_rate": 8.35036496350365e-07, "loss": 0.7086, "step": 29103 }, { "epoch": 0.8497270152696271, "grad_norm": 0.6811670277117178, "learning_rate": 8.348742903487429e-07, "loss": 0.5501, "step": 29104 }, { "epoch": 0.8497562114974745, "grad_norm": 0.7948344830373664, "learning_rate": 8.34712084347121e-07, "loss": 0.6943, "step": 29105 }, { "epoch": 0.8497854077253219, "grad_norm": 0.7686515094500719, "learning_rate": 8.345498783454989e-07, "loss": 0.7462, "step": 29106 }, { "epoch": 0.8498146039531692, "grad_norm": 0.7457069518785521, "learning_rate": 8.343876723438768e-07, "loss": 0.6653, "step": 29107 }, { "epoch": 0.8498438001810166, "grad_norm": 0.7748780346003908, "learning_rate": 8.342254663422547e-07, "loss": 0.701, "step": 29108 }, { "epoch": 0.8498729964088639, "grad_norm": 0.7646985209301456, "learning_rate": 8.340632603406326e-07, "loss": 0.7503, "step": 29109 }, { "epoch": 0.8499021926367113, "grad_norm": 0.7032233302986782, "learning_rate": 8.339010543390106e-07, "loss": 0.5812, "step": 29110 }, { "epoch": 0.8499313888645587, "grad_norm": 0.7295644231663673, "learning_rate": 8.337388483373886e-07, "loss": 0.6227, "step": 29111 }, { "epoch": 0.849960585092406, "grad_norm": 0.70079402674487, "learning_rate": 8.335766423357665e-07, "loss": 0.6627, "step": 29112 }, { "epoch": 0.8499897813202534, "grad_norm": 0.7985418008578604, "learning_rate": 8.334144363341444e-07, "loss": 0.7003, "step": 29113 }, { "epoch": 0.8500189775481007, "grad_norm": 0.6976492688790141, "learning_rate": 8.332522303325223e-07, "loss": 0.5921, "step": 29114 }, { "epoch": 0.8500481737759481, "grad_norm": 0.7356014968015286, "learning_rate": 8.330900243309003e-07, "loss": 0.6648, "step": 29115 }, { "epoch": 0.8500773700037955, "grad_norm": 0.6970512868752907, "learning_rate": 8.329278183292782e-07, "loss": 0.6218, "step": 29116 }, { "epoch": 0.8501065662316428, "grad_norm": 0.7188440065209194, "learning_rate": 8.327656123276561e-07, "loss": 0.6604, "step": 29117 }, { "epoch": 0.8501357624594902, "grad_norm": 0.7123494132047655, "learning_rate": 8.32603406326034e-07, "loss": 0.616, "step": 29118 }, { "epoch": 0.8501649586873375, "grad_norm": 0.7076060603871231, "learning_rate": 8.32441200324412e-07, "loss": 0.6078, "step": 29119 }, { "epoch": 0.8501941549151849, "grad_norm": 0.7555256757122478, "learning_rate": 8.322789943227901e-07, "loss": 0.6877, "step": 29120 }, { "epoch": 0.8502233511430323, "grad_norm": 0.6689305786108575, "learning_rate": 8.32116788321168e-07, "loss": 0.5454, "step": 29121 }, { "epoch": 0.8502525473708796, "grad_norm": 0.6867613064943365, "learning_rate": 8.319545823195459e-07, "loss": 0.5697, "step": 29122 }, { "epoch": 0.850281743598727, "grad_norm": 0.6903441736855155, "learning_rate": 8.317923763179238e-07, "loss": 0.6181, "step": 29123 }, { "epoch": 0.8503109398265744, "grad_norm": 0.7853282511021125, "learning_rate": 8.316301703163019e-07, "loss": 0.6392, "step": 29124 }, { "epoch": 0.8503401360544217, "grad_norm": 0.7190627242970371, "learning_rate": 8.314679643146798e-07, "loss": 0.6638, "step": 29125 }, { "epoch": 0.8503693322822692, "grad_norm": 0.7679960387269615, "learning_rate": 8.313057583130577e-07, "loss": 0.7048, "step": 29126 }, { "epoch": 0.8503985285101165, "grad_norm": 0.7242921002433231, "learning_rate": 8.311435523114356e-07, "loss": 0.6547, "step": 29127 }, { "epoch": 0.8504277247379639, "grad_norm": 0.6881622329111137, "learning_rate": 8.309813463098135e-07, "loss": 0.5784, "step": 29128 }, { "epoch": 0.8504569209658113, "grad_norm": 0.7283518904406862, "learning_rate": 8.308191403081915e-07, "loss": 0.6534, "step": 29129 }, { "epoch": 0.8504861171936586, "grad_norm": 0.7930760351150923, "learning_rate": 8.306569343065694e-07, "loss": 0.6743, "step": 29130 }, { "epoch": 0.850515313421506, "grad_norm": 0.8195197847499179, "learning_rate": 8.304947283049474e-07, "loss": 0.6491, "step": 29131 }, { "epoch": 0.8505445096493534, "grad_norm": 0.7014416080134993, "learning_rate": 8.303325223033253e-07, "loss": 0.5617, "step": 29132 }, { "epoch": 0.8505737058772007, "grad_norm": 0.7459013173208517, "learning_rate": 8.301703163017032e-07, "loss": 0.6872, "step": 29133 }, { "epoch": 0.8506029021050481, "grad_norm": 0.7222951469298103, "learning_rate": 8.300081103000812e-07, "loss": 0.6768, "step": 29134 }, { "epoch": 0.8506320983328954, "grad_norm": 0.821102596428426, "learning_rate": 8.298459042984591e-07, "loss": 0.6562, "step": 29135 }, { "epoch": 0.8506612945607428, "grad_norm": 0.6717256843410322, "learning_rate": 8.29683698296837e-07, "loss": 0.5626, "step": 29136 }, { "epoch": 0.8506904907885902, "grad_norm": 0.7131456071791804, "learning_rate": 8.295214922952149e-07, "loss": 0.6274, "step": 29137 }, { "epoch": 0.8507196870164375, "grad_norm": 0.7300793200561165, "learning_rate": 8.29359286293593e-07, "loss": 0.6622, "step": 29138 }, { "epoch": 0.8507488832442849, "grad_norm": 0.6931629718135415, "learning_rate": 8.291970802919709e-07, "loss": 0.5719, "step": 29139 }, { "epoch": 0.8507780794721322, "grad_norm": 0.6836122548579597, "learning_rate": 8.290348742903488e-07, "loss": 0.5586, "step": 29140 }, { "epoch": 0.8508072756999796, "grad_norm": 0.7182159066151105, "learning_rate": 8.288726682887267e-07, "loss": 0.6436, "step": 29141 }, { "epoch": 0.850836471927827, "grad_norm": 0.7455614587303424, "learning_rate": 8.287104622871046e-07, "loss": 0.6884, "step": 29142 }, { "epoch": 0.8508656681556743, "grad_norm": 0.6648104244252319, "learning_rate": 8.285482562854827e-07, "loss": 0.5504, "step": 29143 }, { "epoch": 0.8508948643835217, "grad_norm": 0.7347922868957818, "learning_rate": 8.283860502838606e-07, "loss": 0.6697, "step": 29144 }, { "epoch": 0.850924060611369, "grad_norm": 0.665180569462371, "learning_rate": 8.282238442822385e-07, "loss": 0.558, "step": 29145 }, { "epoch": 0.8509532568392164, "grad_norm": 0.7114796247787456, "learning_rate": 8.280616382806164e-07, "loss": 0.6717, "step": 29146 }, { "epoch": 0.8509824530670638, "grad_norm": 0.7075968406498272, "learning_rate": 8.278994322789943e-07, "loss": 0.6545, "step": 29147 }, { "epoch": 0.8510116492949111, "grad_norm": 0.7703418329355465, "learning_rate": 8.277372262773723e-07, "loss": 0.7427, "step": 29148 }, { "epoch": 0.8510408455227585, "grad_norm": 0.7737263274972663, "learning_rate": 8.275750202757502e-07, "loss": 0.6252, "step": 29149 }, { "epoch": 0.8510700417506059, "grad_norm": 0.7431228647411116, "learning_rate": 8.274128142741282e-07, "loss": 0.7332, "step": 29150 }, { "epoch": 0.8510992379784532, "grad_norm": 0.7699291040275934, "learning_rate": 8.272506082725062e-07, "loss": 0.6589, "step": 29151 }, { "epoch": 0.8511284342063006, "grad_norm": 0.7148220923594902, "learning_rate": 8.27088402270884e-07, "loss": 0.6431, "step": 29152 }, { "epoch": 0.8511576304341479, "grad_norm": 0.7422621033292188, "learning_rate": 8.269261962692621e-07, "loss": 0.6812, "step": 29153 }, { "epoch": 0.8511868266619953, "grad_norm": 0.7389653492222902, "learning_rate": 8.2676399026764e-07, "loss": 0.6925, "step": 29154 }, { "epoch": 0.8512160228898427, "grad_norm": 0.7299119787449184, "learning_rate": 8.266017842660179e-07, "loss": 0.6628, "step": 29155 }, { "epoch": 0.85124521911769, "grad_norm": 0.6916591935193168, "learning_rate": 8.264395782643958e-07, "loss": 0.5965, "step": 29156 }, { "epoch": 0.8512744153455374, "grad_norm": 0.7233781429149049, "learning_rate": 8.262773722627739e-07, "loss": 0.6515, "step": 29157 }, { "epoch": 0.8513036115733847, "grad_norm": 0.7038377525642338, "learning_rate": 8.261151662611518e-07, "loss": 0.6035, "step": 29158 }, { "epoch": 0.8513328078012321, "grad_norm": 0.7359733668320534, "learning_rate": 8.259529602595297e-07, "loss": 0.6366, "step": 29159 }, { "epoch": 0.8513620040290795, "grad_norm": 0.6968834190529514, "learning_rate": 8.257907542579076e-07, "loss": 0.569, "step": 29160 }, { "epoch": 0.8513912002569268, "grad_norm": 0.7029408767846592, "learning_rate": 8.256285482562855e-07, "loss": 0.5833, "step": 29161 }, { "epoch": 0.8514203964847742, "grad_norm": 0.7180323049006836, "learning_rate": 8.254663422546636e-07, "loss": 0.6503, "step": 29162 }, { "epoch": 0.8514495927126216, "grad_norm": 0.7466804761298848, "learning_rate": 8.253041362530415e-07, "loss": 0.7091, "step": 29163 }, { "epoch": 0.8514787889404689, "grad_norm": 0.8256755880177313, "learning_rate": 8.251419302514194e-07, "loss": 0.6035, "step": 29164 }, { "epoch": 0.8515079851683163, "grad_norm": 0.7226224135996986, "learning_rate": 8.249797242497973e-07, "loss": 0.6125, "step": 29165 }, { "epoch": 0.8515371813961636, "grad_norm": 0.7146355261777325, "learning_rate": 8.248175182481752e-07, "loss": 0.6176, "step": 29166 }, { "epoch": 0.851566377624011, "grad_norm": 0.7053072427301978, "learning_rate": 8.246553122465532e-07, "loss": 0.592, "step": 29167 }, { "epoch": 0.8515955738518584, "grad_norm": 0.7689669621483974, "learning_rate": 8.244931062449311e-07, "loss": 0.7167, "step": 29168 }, { "epoch": 0.8516247700797057, "grad_norm": 0.6795156395289002, "learning_rate": 8.243309002433091e-07, "loss": 0.5686, "step": 29169 }, { "epoch": 0.8516539663075531, "grad_norm": 0.6724610629302019, "learning_rate": 8.24168694241687e-07, "loss": 0.5809, "step": 29170 }, { "epoch": 0.8516831625354004, "grad_norm": 0.6906773224934795, "learning_rate": 8.24006488240065e-07, "loss": 0.5839, "step": 29171 }, { "epoch": 0.8517123587632478, "grad_norm": 0.7284907525834017, "learning_rate": 8.238442822384429e-07, "loss": 0.5712, "step": 29172 }, { "epoch": 0.8517415549910952, "grad_norm": 0.7301111121202699, "learning_rate": 8.236820762368208e-07, "loss": 0.6607, "step": 29173 }, { "epoch": 0.8517707512189425, "grad_norm": 0.7504416915019332, "learning_rate": 8.235198702351987e-07, "loss": 0.694, "step": 29174 }, { "epoch": 0.8517999474467899, "grad_norm": 0.6785345233674794, "learning_rate": 8.233576642335766e-07, "loss": 0.5892, "step": 29175 }, { "epoch": 0.8518291436746372, "grad_norm": 0.7384726080394346, "learning_rate": 8.231954582319547e-07, "loss": 0.6548, "step": 29176 }, { "epoch": 0.8518583399024846, "grad_norm": 0.7333976158704957, "learning_rate": 8.230332522303326e-07, "loss": 0.6424, "step": 29177 }, { "epoch": 0.851887536130332, "grad_norm": 0.6756688661924366, "learning_rate": 8.228710462287105e-07, "loss": 0.6068, "step": 29178 }, { "epoch": 0.8519167323581793, "grad_norm": 0.7301283265140419, "learning_rate": 8.227088402270884e-07, "loss": 0.5885, "step": 29179 }, { "epoch": 0.8519459285860267, "grad_norm": 0.7324058754594603, "learning_rate": 8.225466342254663e-07, "loss": 0.6454, "step": 29180 }, { "epoch": 0.851975124813874, "grad_norm": 0.7077877388105991, "learning_rate": 8.223844282238444e-07, "loss": 0.6642, "step": 29181 }, { "epoch": 0.8520043210417214, "grad_norm": 0.731816676074268, "learning_rate": 8.222222222222223e-07, "loss": 0.675, "step": 29182 }, { "epoch": 0.8520335172695688, "grad_norm": 0.7021554062923402, "learning_rate": 8.220600162206003e-07, "loss": 0.5075, "step": 29183 }, { "epoch": 0.8520627134974161, "grad_norm": 0.7560720856694401, "learning_rate": 8.218978102189782e-07, "loss": 0.6536, "step": 29184 }, { "epoch": 0.8520919097252635, "grad_norm": 0.6959189008738041, "learning_rate": 8.21735604217356e-07, "loss": 0.6148, "step": 29185 }, { "epoch": 0.8521211059531109, "grad_norm": 0.7110352492579503, "learning_rate": 8.215733982157341e-07, "loss": 0.6086, "step": 29186 }, { "epoch": 0.8521503021809582, "grad_norm": 0.8072260881770369, "learning_rate": 8.21411192214112e-07, "loss": 0.6154, "step": 29187 }, { "epoch": 0.8521794984088056, "grad_norm": 0.6871877951413816, "learning_rate": 8.212489862124899e-07, "loss": 0.5874, "step": 29188 }, { "epoch": 0.8522086946366529, "grad_norm": 0.7892764398710287, "learning_rate": 8.210867802108679e-07, "loss": 0.6781, "step": 29189 }, { "epoch": 0.8522378908645003, "grad_norm": 0.7515413362132409, "learning_rate": 8.209245742092459e-07, "loss": 0.633, "step": 29190 }, { "epoch": 0.8522670870923477, "grad_norm": 0.6929144462144617, "learning_rate": 8.207623682076238e-07, "loss": 0.5943, "step": 29191 }, { "epoch": 0.852296283320195, "grad_norm": 0.7166971960751357, "learning_rate": 8.206001622060017e-07, "loss": 0.5805, "step": 29192 }, { "epoch": 0.8523254795480424, "grad_norm": 0.7365167378345178, "learning_rate": 8.204379562043796e-07, "loss": 0.6808, "step": 29193 }, { "epoch": 0.8523546757758897, "grad_norm": 0.7613136380716411, "learning_rate": 8.202757502027575e-07, "loss": 0.6971, "step": 29194 }, { "epoch": 0.8523838720037371, "grad_norm": 0.6956609377307217, "learning_rate": 8.201135442011356e-07, "loss": 0.5662, "step": 29195 }, { "epoch": 0.8524130682315845, "grad_norm": 0.7559603372675365, "learning_rate": 8.199513381995135e-07, "loss": 0.6633, "step": 29196 }, { "epoch": 0.8524422644594318, "grad_norm": 0.6938254987795043, "learning_rate": 8.197891321978914e-07, "loss": 0.6213, "step": 29197 }, { "epoch": 0.8524714606872792, "grad_norm": 0.7531823345390212, "learning_rate": 8.196269261962693e-07, "loss": 0.6648, "step": 29198 }, { "epoch": 0.8525006569151266, "grad_norm": 0.7417562186668208, "learning_rate": 8.194647201946472e-07, "loss": 0.567, "step": 29199 }, { "epoch": 0.8525298531429739, "grad_norm": 0.6869206752186519, "learning_rate": 8.193025141930253e-07, "loss": 0.5878, "step": 29200 }, { "epoch": 0.8525590493708213, "grad_norm": 0.7177353891982085, "learning_rate": 8.191403081914032e-07, "loss": 0.6646, "step": 29201 }, { "epoch": 0.8525882455986686, "grad_norm": 0.775824299448754, "learning_rate": 8.189781021897811e-07, "loss": 0.7125, "step": 29202 }, { "epoch": 0.852617441826516, "grad_norm": 0.7148656332389293, "learning_rate": 8.18815896188159e-07, "loss": 0.6641, "step": 29203 }, { "epoch": 0.8526466380543634, "grad_norm": 0.7230765129345825, "learning_rate": 8.18653690186537e-07, "loss": 0.6389, "step": 29204 }, { "epoch": 0.8526758342822107, "grad_norm": 0.7109062233262422, "learning_rate": 8.184914841849149e-07, "loss": 0.5878, "step": 29205 }, { "epoch": 0.8527050305100581, "grad_norm": 0.7450218693568273, "learning_rate": 8.183292781832928e-07, "loss": 0.6952, "step": 29206 }, { "epoch": 0.8527342267379054, "grad_norm": 0.7071287341493887, "learning_rate": 8.181670721816707e-07, "loss": 0.5778, "step": 29207 }, { "epoch": 0.8527634229657528, "grad_norm": 0.651488551844841, "learning_rate": 8.180048661800487e-07, "loss": 0.5542, "step": 29208 }, { "epoch": 0.8527926191936002, "grad_norm": 0.7710610052385442, "learning_rate": 8.178426601784267e-07, "loss": 0.6765, "step": 29209 }, { "epoch": 0.8528218154214475, "grad_norm": 0.7394060170961171, "learning_rate": 8.176804541768046e-07, "loss": 0.6125, "step": 29210 }, { "epoch": 0.8528510116492949, "grad_norm": 0.6697549599877198, "learning_rate": 8.175182481751825e-07, "loss": 0.5502, "step": 29211 }, { "epoch": 0.8528802078771422, "grad_norm": 0.6835597561632808, "learning_rate": 8.173560421735604e-07, "loss": 0.5573, "step": 29212 }, { "epoch": 0.8529094041049896, "grad_norm": 0.6665585079805524, "learning_rate": 8.171938361719383e-07, "loss": 0.5761, "step": 29213 }, { "epoch": 0.852938600332837, "grad_norm": 0.6760314505932261, "learning_rate": 8.170316301703164e-07, "loss": 0.5477, "step": 29214 }, { "epoch": 0.8529677965606843, "grad_norm": 0.7614219499740105, "learning_rate": 8.168694241686944e-07, "loss": 0.6361, "step": 29215 }, { "epoch": 0.8529969927885317, "grad_norm": 0.6823682972673377, "learning_rate": 8.167072181670723e-07, "loss": 0.6134, "step": 29216 }, { "epoch": 0.853026189016379, "grad_norm": 0.7816284033752288, "learning_rate": 8.165450121654502e-07, "loss": 0.6857, "step": 29217 }, { "epoch": 0.8530553852442264, "grad_norm": 0.7598023154436884, "learning_rate": 8.163828061638281e-07, "loss": 0.5701, "step": 29218 }, { "epoch": 0.8530845814720738, "grad_norm": 0.7387844044953651, "learning_rate": 8.162206001622061e-07, "loss": 0.6774, "step": 29219 }, { "epoch": 0.8531137776999211, "grad_norm": 0.713507587099409, "learning_rate": 8.160583941605841e-07, "loss": 0.657, "step": 29220 }, { "epoch": 0.8531429739277685, "grad_norm": 0.7296683095324905, "learning_rate": 8.15896188158962e-07, "loss": 0.6082, "step": 29221 }, { "epoch": 0.8531721701556159, "grad_norm": 0.6808457440826806, "learning_rate": 8.157339821573399e-07, "loss": 0.5864, "step": 29222 }, { "epoch": 0.8532013663834632, "grad_norm": 0.7410837722003061, "learning_rate": 8.155717761557179e-07, "loss": 0.6746, "step": 29223 }, { "epoch": 0.8532305626113106, "grad_norm": 0.7201053817074387, "learning_rate": 8.154095701540958e-07, "loss": 0.608, "step": 29224 }, { "epoch": 0.853259758839158, "grad_norm": 0.77096515128972, "learning_rate": 8.152473641524737e-07, "loss": 0.6012, "step": 29225 }, { "epoch": 0.8532889550670053, "grad_norm": 0.7095014038241274, "learning_rate": 8.150851581508516e-07, "loss": 0.6403, "step": 29226 }, { "epoch": 0.8533181512948527, "grad_norm": 0.7913340022872383, "learning_rate": 8.149229521492296e-07, "loss": 0.7683, "step": 29227 }, { "epoch": 0.8533473475227, "grad_norm": 0.7325068035470038, "learning_rate": 8.147607461476076e-07, "loss": 0.6556, "step": 29228 }, { "epoch": 0.8533765437505474, "grad_norm": 0.7025967606015783, "learning_rate": 8.145985401459855e-07, "loss": 0.6541, "step": 29229 }, { "epoch": 0.8534057399783948, "grad_norm": 0.9020303740631802, "learning_rate": 8.144363341443634e-07, "loss": 0.5757, "step": 29230 }, { "epoch": 0.8534349362062421, "grad_norm": 0.8033356471987799, "learning_rate": 8.142741281427413e-07, "loss": 0.7998, "step": 29231 }, { "epoch": 0.8534641324340895, "grad_norm": 0.7352443264189625, "learning_rate": 8.141119221411192e-07, "loss": 0.6277, "step": 29232 }, { "epoch": 0.8534933286619368, "grad_norm": 0.7806672920979438, "learning_rate": 8.139497161394973e-07, "loss": 0.7324, "step": 29233 }, { "epoch": 0.8535225248897842, "grad_norm": 0.6821469499519035, "learning_rate": 8.137875101378752e-07, "loss": 0.5916, "step": 29234 }, { "epoch": 0.8535517211176316, "grad_norm": 0.710383959106119, "learning_rate": 8.136253041362531e-07, "loss": 0.6423, "step": 29235 }, { "epoch": 0.8535809173454789, "grad_norm": 0.7000319124404109, "learning_rate": 8.13463098134631e-07, "loss": 0.5728, "step": 29236 }, { "epoch": 0.8536101135733263, "grad_norm": 0.7794679236001562, "learning_rate": 8.13300892133009e-07, "loss": 0.7073, "step": 29237 }, { "epoch": 0.8536393098011736, "grad_norm": 0.7697681530512636, "learning_rate": 8.131386861313869e-07, "loss": 0.6586, "step": 29238 }, { "epoch": 0.853668506029021, "grad_norm": 0.7910035438697944, "learning_rate": 8.129764801297649e-07, "loss": 0.6974, "step": 29239 }, { "epoch": 0.8536977022568684, "grad_norm": 0.714210230270244, "learning_rate": 8.128142741281428e-07, "loss": 0.6543, "step": 29240 }, { "epoch": 0.8537268984847157, "grad_norm": 0.6799816672248725, "learning_rate": 8.126520681265207e-07, "loss": 0.5404, "step": 29241 }, { "epoch": 0.8537560947125631, "grad_norm": 0.7339535948222728, "learning_rate": 8.124898621248987e-07, "loss": 0.6167, "step": 29242 }, { "epoch": 0.8537852909404104, "grad_norm": 0.7548308565440667, "learning_rate": 8.123276561232766e-07, "loss": 0.7004, "step": 29243 }, { "epoch": 0.8538144871682578, "grad_norm": 0.6938584633320592, "learning_rate": 8.121654501216545e-07, "loss": 0.6397, "step": 29244 }, { "epoch": 0.8538436833961052, "grad_norm": 0.6921065203989992, "learning_rate": 8.120032441200324e-07, "loss": 0.6063, "step": 29245 }, { "epoch": 0.8538728796239525, "grad_norm": 0.7155985710051924, "learning_rate": 8.118410381184103e-07, "loss": 0.6322, "step": 29246 }, { "epoch": 0.8539020758518, "grad_norm": 0.7184076653426533, "learning_rate": 8.116788321167885e-07, "loss": 0.6329, "step": 29247 }, { "epoch": 0.8539312720796474, "grad_norm": 0.7630395095536624, "learning_rate": 8.115166261151664e-07, "loss": 0.6769, "step": 29248 }, { "epoch": 0.8539604683074947, "grad_norm": 0.7269662605486198, "learning_rate": 8.113544201135443e-07, "loss": 0.673, "step": 29249 }, { "epoch": 0.8539896645353421, "grad_norm": 0.6887961310969051, "learning_rate": 8.111922141119222e-07, "loss": 0.5687, "step": 29250 }, { "epoch": 0.8540188607631894, "grad_norm": 0.7104569013330615, "learning_rate": 8.110300081103001e-07, "loss": 0.6096, "step": 29251 }, { "epoch": 0.8540480569910368, "grad_norm": 0.7708340500520839, "learning_rate": 8.108678021086782e-07, "loss": 0.69, "step": 29252 }, { "epoch": 0.8540772532188842, "grad_norm": 0.7435890525797875, "learning_rate": 8.107055961070561e-07, "loss": 0.695, "step": 29253 }, { "epoch": 0.8541064494467315, "grad_norm": 0.725499758801676, "learning_rate": 8.10543390105434e-07, "loss": 0.6526, "step": 29254 }, { "epoch": 0.8541356456745789, "grad_norm": 0.7870542312363472, "learning_rate": 8.103811841038119e-07, "loss": 0.6548, "step": 29255 }, { "epoch": 0.8541648419024263, "grad_norm": 0.7574003915559723, "learning_rate": 8.102189781021899e-07, "loss": 0.6475, "step": 29256 }, { "epoch": 0.8541940381302736, "grad_norm": 0.6710070591599716, "learning_rate": 8.100567721005678e-07, "loss": 0.5423, "step": 29257 }, { "epoch": 0.854223234358121, "grad_norm": 0.7340517894323958, "learning_rate": 8.098945660989458e-07, "loss": 0.6973, "step": 29258 }, { "epoch": 0.8542524305859683, "grad_norm": 0.7096356020171509, "learning_rate": 8.097323600973237e-07, "loss": 0.5851, "step": 29259 }, { "epoch": 0.8542816268138157, "grad_norm": 0.6965869428186785, "learning_rate": 8.095701540957016e-07, "loss": 0.6337, "step": 29260 }, { "epoch": 0.8543108230416631, "grad_norm": 0.717875012305054, "learning_rate": 8.094079480940796e-07, "loss": 0.6239, "step": 29261 }, { "epoch": 0.8543400192695104, "grad_norm": 0.6850983785019569, "learning_rate": 8.092457420924575e-07, "loss": 0.5728, "step": 29262 }, { "epoch": 0.8543692154973578, "grad_norm": 0.708865710708535, "learning_rate": 8.090835360908354e-07, "loss": 0.6262, "step": 29263 }, { "epoch": 0.8543984117252051, "grad_norm": 0.6792093316506803, "learning_rate": 8.089213300892133e-07, "loss": 0.6156, "step": 29264 }, { "epoch": 0.8544276079530525, "grad_norm": 0.7121403647821377, "learning_rate": 8.087591240875912e-07, "loss": 0.6333, "step": 29265 }, { "epoch": 0.8544568041808999, "grad_norm": 0.6872240528053132, "learning_rate": 8.085969180859693e-07, "loss": 0.5722, "step": 29266 }, { "epoch": 0.8544860004087472, "grad_norm": 0.7319015518857043, "learning_rate": 8.084347120843472e-07, "loss": 0.6168, "step": 29267 }, { "epoch": 0.8545151966365946, "grad_norm": 0.7110286761584879, "learning_rate": 8.082725060827251e-07, "loss": 0.6195, "step": 29268 }, { "epoch": 0.854544392864442, "grad_norm": 0.7290920432609383, "learning_rate": 8.08110300081103e-07, "loss": 0.6831, "step": 29269 }, { "epoch": 0.8545735890922893, "grad_norm": 0.7307254671586461, "learning_rate": 8.079480940794809e-07, "loss": 0.6428, "step": 29270 }, { "epoch": 0.8546027853201367, "grad_norm": 0.7277977147715012, "learning_rate": 8.07785888077859e-07, "loss": 0.5827, "step": 29271 }, { "epoch": 0.854631981547984, "grad_norm": 0.6881217891515299, "learning_rate": 8.076236820762369e-07, "loss": 0.6042, "step": 29272 }, { "epoch": 0.8546611777758314, "grad_norm": 0.6920780628667275, "learning_rate": 8.074614760746148e-07, "loss": 0.5752, "step": 29273 }, { "epoch": 0.8546903740036788, "grad_norm": 0.7436327331545521, "learning_rate": 8.072992700729927e-07, "loss": 0.6675, "step": 29274 }, { "epoch": 0.8547195702315261, "grad_norm": 0.7159301061977136, "learning_rate": 8.071370640713707e-07, "loss": 0.6663, "step": 29275 }, { "epoch": 0.8547487664593735, "grad_norm": 0.701198511917719, "learning_rate": 8.069748580697486e-07, "loss": 0.6678, "step": 29276 }, { "epoch": 0.8547779626872208, "grad_norm": 0.7389906484104948, "learning_rate": 8.068126520681265e-07, "loss": 0.6299, "step": 29277 }, { "epoch": 0.8548071589150682, "grad_norm": 0.7258186161105752, "learning_rate": 8.066504460665045e-07, "loss": 0.6318, "step": 29278 }, { "epoch": 0.8548363551429156, "grad_norm": 0.7366139170465414, "learning_rate": 8.064882400648824e-07, "loss": 0.6549, "step": 29279 }, { "epoch": 0.8548655513707629, "grad_norm": 0.7667362329406952, "learning_rate": 8.063260340632605e-07, "loss": 0.6591, "step": 29280 }, { "epoch": 0.8548947475986103, "grad_norm": 0.6700233523260358, "learning_rate": 8.061638280616384e-07, "loss": 0.5577, "step": 29281 }, { "epoch": 0.8549239438264576, "grad_norm": 0.6921159518806117, "learning_rate": 8.060016220600163e-07, "loss": 0.5909, "step": 29282 }, { "epoch": 0.854953140054305, "grad_norm": 0.676542605106978, "learning_rate": 8.058394160583942e-07, "loss": 0.5791, "step": 29283 }, { "epoch": 0.8549823362821524, "grad_norm": 0.7266939814663822, "learning_rate": 8.056772100567721e-07, "loss": 0.6098, "step": 29284 }, { "epoch": 0.8550115325099997, "grad_norm": 0.6863736378898352, "learning_rate": 8.055150040551502e-07, "loss": 0.5997, "step": 29285 }, { "epoch": 0.8550407287378471, "grad_norm": 0.8392087940844544, "learning_rate": 8.053527980535281e-07, "loss": 0.7328, "step": 29286 }, { "epoch": 0.8550699249656945, "grad_norm": 0.7640495681025274, "learning_rate": 8.05190592051906e-07, "loss": 0.6714, "step": 29287 }, { "epoch": 0.8550991211935418, "grad_norm": 0.8300499788548686, "learning_rate": 8.050283860502839e-07, "loss": 0.697, "step": 29288 }, { "epoch": 0.8551283174213892, "grad_norm": 0.742286921801619, "learning_rate": 8.04866180048662e-07, "loss": 0.6328, "step": 29289 }, { "epoch": 0.8551575136492365, "grad_norm": 0.7393087682085003, "learning_rate": 8.047039740470399e-07, "loss": 0.6456, "step": 29290 }, { "epoch": 0.8551867098770839, "grad_norm": 0.7215330606094138, "learning_rate": 8.045417680454178e-07, "loss": 0.6635, "step": 29291 }, { "epoch": 0.8552159061049313, "grad_norm": 0.7226560368673998, "learning_rate": 8.043795620437957e-07, "loss": 0.6888, "step": 29292 }, { "epoch": 0.8552451023327786, "grad_norm": 0.7465539460511436, "learning_rate": 8.042173560421736e-07, "loss": 0.6117, "step": 29293 }, { "epoch": 0.855274298560626, "grad_norm": 0.7220181083829121, "learning_rate": 8.040551500405516e-07, "loss": 0.6199, "step": 29294 }, { "epoch": 0.8553034947884733, "grad_norm": 0.7187558559301522, "learning_rate": 8.038929440389295e-07, "loss": 0.6265, "step": 29295 }, { "epoch": 0.8553326910163207, "grad_norm": 0.79741272237879, "learning_rate": 8.037307380373074e-07, "loss": 0.6347, "step": 29296 }, { "epoch": 0.8553618872441681, "grad_norm": 0.7016888661487773, "learning_rate": 8.035685320356854e-07, "loss": 0.5831, "step": 29297 }, { "epoch": 0.8553910834720154, "grad_norm": 0.7254982200978358, "learning_rate": 8.034063260340633e-07, "loss": 0.6958, "step": 29298 }, { "epoch": 0.8554202796998628, "grad_norm": 0.7254603959273019, "learning_rate": 8.032441200324413e-07, "loss": 0.6683, "step": 29299 }, { "epoch": 0.8554494759277101, "grad_norm": 0.7176632498964044, "learning_rate": 8.030819140308192e-07, "loss": 0.6192, "step": 29300 }, { "epoch": 0.8554786721555575, "grad_norm": 0.7029751107838973, "learning_rate": 8.029197080291971e-07, "loss": 0.5733, "step": 29301 }, { "epoch": 0.8555078683834049, "grad_norm": 0.7544956702475124, "learning_rate": 8.02757502027575e-07, "loss": 0.7143, "step": 29302 }, { "epoch": 0.8555370646112522, "grad_norm": 0.9100109377467147, "learning_rate": 8.025952960259529e-07, "loss": 0.7314, "step": 29303 }, { "epoch": 0.8555662608390996, "grad_norm": 0.7220508246681073, "learning_rate": 8.02433090024331e-07, "loss": 0.6126, "step": 29304 }, { "epoch": 0.855595457066947, "grad_norm": 0.7109551543323662, "learning_rate": 8.022708840227089e-07, "loss": 0.6632, "step": 29305 }, { "epoch": 0.8556246532947943, "grad_norm": 0.7129315931135336, "learning_rate": 8.021086780210868e-07, "loss": 0.6667, "step": 29306 }, { "epoch": 0.8556538495226417, "grad_norm": 0.7626098329764426, "learning_rate": 8.019464720194647e-07, "loss": 0.6904, "step": 29307 }, { "epoch": 0.855683045750489, "grad_norm": 0.7311448738435531, "learning_rate": 8.017842660178427e-07, "loss": 0.6645, "step": 29308 }, { "epoch": 0.8557122419783364, "grad_norm": 0.7438390858665109, "learning_rate": 8.016220600162207e-07, "loss": 0.6626, "step": 29309 }, { "epoch": 0.8557414382061838, "grad_norm": 0.7324149671201925, "learning_rate": 8.014598540145986e-07, "loss": 0.6552, "step": 29310 }, { "epoch": 0.8557706344340311, "grad_norm": 0.7665911664090017, "learning_rate": 8.012976480129765e-07, "loss": 0.6616, "step": 29311 }, { "epoch": 0.8557998306618785, "grad_norm": 0.6732330754794106, "learning_rate": 8.011354420113544e-07, "loss": 0.5776, "step": 29312 }, { "epoch": 0.8558290268897258, "grad_norm": 0.69799333260589, "learning_rate": 8.009732360097325e-07, "loss": 0.6172, "step": 29313 }, { "epoch": 0.8558582231175732, "grad_norm": 0.8114132849305377, "learning_rate": 8.008110300081104e-07, "loss": 0.7628, "step": 29314 }, { "epoch": 0.8558874193454206, "grad_norm": 0.8040064894043237, "learning_rate": 8.006488240064883e-07, "loss": 0.7218, "step": 29315 }, { "epoch": 0.8559166155732679, "grad_norm": 0.6952561568922638, "learning_rate": 8.004866180048663e-07, "loss": 0.5946, "step": 29316 }, { "epoch": 0.8559458118011153, "grad_norm": 0.6830675040781792, "learning_rate": 8.003244120032442e-07, "loss": 0.5978, "step": 29317 }, { "epoch": 0.8559750080289626, "grad_norm": 0.7269267404280516, "learning_rate": 8.001622060016222e-07, "loss": 0.6807, "step": 29318 }, { "epoch": 0.85600420425681, "grad_norm": 0.7849540622159048, "learning_rate": 8.000000000000001e-07, "loss": 0.7872, "step": 29319 }, { "epoch": 0.8560334004846574, "grad_norm": 0.6994737462381122, "learning_rate": 7.99837793998378e-07, "loss": 0.5702, "step": 29320 }, { "epoch": 0.8560625967125047, "grad_norm": 0.7581797284795887, "learning_rate": 7.996755879967559e-07, "loss": 0.6233, "step": 29321 }, { "epoch": 0.8560917929403521, "grad_norm": 0.7364759772011673, "learning_rate": 7.99513381995134e-07, "loss": 0.6621, "step": 29322 }, { "epoch": 0.8561209891681995, "grad_norm": 0.7466087984877539, "learning_rate": 7.993511759935119e-07, "loss": 0.627, "step": 29323 }, { "epoch": 0.8561501853960468, "grad_norm": 0.7008658799182267, "learning_rate": 7.991889699918898e-07, "loss": 0.5972, "step": 29324 }, { "epoch": 0.8561793816238942, "grad_norm": 0.663950029985528, "learning_rate": 7.990267639902677e-07, "loss": 0.5314, "step": 29325 }, { "epoch": 0.8562085778517415, "grad_norm": 0.712303158969457, "learning_rate": 7.988645579886456e-07, "loss": 0.6158, "step": 29326 }, { "epoch": 0.8562377740795889, "grad_norm": 0.7396041657804373, "learning_rate": 7.987023519870236e-07, "loss": 0.6526, "step": 29327 }, { "epoch": 0.8562669703074363, "grad_norm": 0.6419357405876601, "learning_rate": 7.985401459854016e-07, "loss": 0.5395, "step": 29328 }, { "epoch": 0.8562961665352836, "grad_norm": 0.7547968079011936, "learning_rate": 7.983779399837795e-07, "loss": 0.6482, "step": 29329 }, { "epoch": 0.856325362763131, "grad_norm": 0.7410204443131344, "learning_rate": 7.982157339821574e-07, "loss": 0.6259, "step": 29330 }, { "epoch": 0.8563545589909783, "grad_norm": 0.7385593363665314, "learning_rate": 7.980535279805353e-07, "loss": 0.7002, "step": 29331 }, { "epoch": 0.8563837552188257, "grad_norm": 0.7380666891598673, "learning_rate": 7.978913219789133e-07, "loss": 0.6276, "step": 29332 }, { "epoch": 0.8564129514466731, "grad_norm": 0.7703852872150228, "learning_rate": 7.977291159772912e-07, "loss": 0.6794, "step": 29333 }, { "epoch": 0.8564421476745204, "grad_norm": 0.7055310466268276, "learning_rate": 7.975669099756691e-07, "loss": 0.6335, "step": 29334 }, { "epoch": 0.8564713439023678, "grad_norm": 0.7686767831344676, "learning_rate": 7.974047039740471e-07, "loss": 0.6408, "step": 29335 }, { "epoch": 0.8565005401302151, "grad_norm": 0.7541285328130242, "learning_rate": 7.97242497972425e-07, "loss": 0.6911, "step": 29336 }, { "epoch": 0.8565297363580625, "grad_norm": 0.7234471318904763, "learning_rate": 7.97080291970803e-07, "loss": 0.655, "step": 29337 }, { "epoch": 0.8565589325859099, "grad_norm": 0.7496502426712399, "learning_rate": 7.969180859691809e-07, "loss": 0.6954, "step": 29338 }, { "epoch": 0.8565881288137572, "grad_norm": 0.746240734804826, "learning_rate": 7.967558799675588e-07, "loss": 0.6943, "step": 29339 }, { "epoch": 0.8566173250416046, "grad_norm": 0.7065758730588316, "learning_rate": 7.965936739659367e-07, "loss": 0.6001, "step": 29340 }, { "epoch": 0.856646521269452, "grad_norm": 0.6940611122711571, "learning_rate": 7.964314679643148e-07, "loss": 0.6066, "step": 29341 }, { "epoch": 0.8566757174972993, "grad_norm": 0.7709256739880939, "learning_rate": 7.962692619626927e-07, "loss": 0.7171, "step": 29342 }, { "epoch": 0.8567049137251467, "grad_norm": 0.7104288969702675, "learning_rate": 7.961070559610706e-07, "loss": 0.6119, "step": 29343 }, { "epoch": 0.856734109952994, "grad_norm": 0.7369230343142263, "learning_rate": 7.959448499594485e-07, "loss": 0.6576, "step": 29344 }, { "epoch": 0.8567633061808414, "grad_norm": 0.6916112340566133, "learning_rate": 7.957826439578264e-07, "loss": 0.5941, "step": 29345 }, { "epoch": 0.8567925024086888, "grad_norm": 0.7278665172507265, "learning_rate": 7.956204379562045e-07, "loss": 0.6639, "step": 29346 }, { "epoch": 0.8568216986365361, "grad_norm": 0.7338025799677258, "learning_rate": 7.954582319545825e-07, "loss": 0.6422, "step": 29347 }, { "epoch": 0.8568508948643835, "grad_norm": 0.7168973049679819, "learning_rate": 7.952960259529604e-07, "loss": 0.5937, "step": 29348 }, { "epoch": 0.8568800910922308, "grad_norm": 0.6713927481706341, "learning_rate": 7.951338199513383e-07, "loss": 0.5557, "step": 29349 }, { "epoch": 0.8569092873200782, "grad_norm": 0.7844707471882078, "learning_rate": 7.949716139497162e-07, "loss": 0.7745, "step": 29350 }, { "epoch": 0.8569384835479256, "grad_norm": 0.7007862738670073, "learning_rate": 7.948094079480942e-07, "loss": 0.6454, "step": 29351 }, { "epoch": 0.8569676797757729, "grad_norm": 0.6986560647469284, "learning_rate": 7.946472019464721e-07, "loss": 0.6066, "step": 29352 }, { "epoch": 0.8569968760036203, "grad_norm": 0.7211009753531692, "learning_rate": 7.9448499594485e-07, "loss": 0.6206, "step": 29353 }, { "epoch": 0.8570260722314677, "grad_norm": 0.7168637182322828, "learning_rate": 7.943227899432279e-07, "loss": 0.6509, "step": 29354 }, { "epoch": 0.857055268459315, "grad_norm": 0.6641130053443459, "learning_rate": 7.94160583941606e-07, "loss": 0.5548, "step": 29355 }, { "epoch": 0.8570844646871624, "grad_norm": 0.6957603339966513, "learning_rate": 7.939983779399839e-07, "loss": 0.618, "step": 29356 }, { "epoch": 0.8571136609150097, "grad_norm": 0.6625701807698535, "learning_rate": 7.938361719383618e-07, "loss": 0.5156, "step": 29357 }, { "epoch": 0.8571428571428571, "grad_norm": 0.7235383840547318, "learning_rate": 7.936739659367397e-07, "loss": 0.678, "step": 29358 }, { "epoch": 0.8571720533707045, "grad_norm": 0.731581526698569, "learning_rate": 7.935117599351176e-07, "loss": 0.648, "step": 29359 }, { "epoch": 0.8572012495985518, "grad_norm": 0.7204300465182585, "learning_rate": 7.933495539334957e-07, "loss": 0.6542, "step": 29360 }, { "epoch": 0.8572304458263992, "grad_norm": 0.791868991083492, "learning_rate": 7.931873479318736e-07, "loss": 0.7123, "step": 29361 }, { "epoch": 0.8572596420542465, "grad_norm": 0.6783632948764347, "learning_rate": 7.930251419302515e-07, "loss": 0.5355, "step": 29362 }, { "epoch": 0.8572888382820939, "grad_norm": 0.7672717438121903, "learning_rate": 7.928629359286294e-07, "loss": 0.6465, "step": 29363 }, { "epoch": 0.8573180345099413, "grad_norm": 0.7350311569488712, "learning_rate": 7.927007299270073e-07, "loss": 0.6337, "step": 29364 }, { "epoch": 0.8573472307377886, "grad_norm": 0.6749075149621897, "learning_rate": 7.925385239253853e-07, "loss": 0.5812, "step": 29365 }, { "epoch": 0.857376426965636, "grad_norm": 0.7640827633955166, "learning_rate": 7.923763179237633e-07, "loss": 0.7069, "step": 29366 }, { "epoch": 0.8574056231934835, "grad_norm": 0.7417862006095425, "learning_rate": 7.922141119221412e-07, "loss": 0.6312, "step": 29367 }, { "epoch": 0.8574348194213308, "grad_norm": 0.7157915844394467, "learning_rate": 7.920519059205191e-07, "loss": 0.5954, "step": 29368 }, { "epoch": 0.8574640156491782, "grad_norm": 0.7149887507519777, "learning_rate": 7.91889699918897e-07, "loss": 0.6097, "step": 29369 }, { "epoch": 0.8574932118770255, "grad_norm": 0.7245092250735395, "learning_rate": 7.91727493917275e-07, "loss": 0.6088, "step": 29370 }, { "epoch": 0.8575224081048729, "grad_norm": 0.6692718545727532, "learning_rate": 7.915652879156529e-07, "loss": 0.5487, "step": 29371 }, { "epoch": 0.8575516043327203, "grad_norm": 0.7211179387980537, "learning_rate": 7.914030819140308e-07, "loss": 0.6424, "step": 29372 }, { "epoch": 0.8575808005605676, "grad_norm": 0.7547404327598685, "learning_rate": 7.912408759124087e-07, "loss": 0.6892, "step": 29373 }, { "epoch": 0.857609996788415, "grad_norm": 0.6777532847591067, "learning_rate": 7.910786699107868e-07, "loss": 0.596, "step": 29374 }, { "epoch": 0.8576391930162623, "grad_norm": 0.7869257256030915, "learning_rate": 7.909164639091647e-07, "loss": 0.7803, "step": 29375 }, { "epoch": 0.8576683892441097, "grad_norm": 0.6675346239625247, "learning_rate": 7.907542579075426e-07, "loss": 0.5586, "step": 29376 }, { "epoch": 0.8576975854719571, "grad_norm": 0.7317994514969989, "learning_rate": 7.905920519059205e-07, "loss": 0.6203, "step": 29377 }, { "epoch": 0.8577267816998044, "grad_norm": 0.7741654030076104, "learning_rate": 7.904298459042984e-07, "loss": 0.7146, "step": 29378 }, { "epoch": 0.8577559779276518, "grad_norm": 0.7510528520857278, "learning_rate": 7.902676399026766e-07, "loss": 0.6566, "step": 29379 }, { "epoch": 0.8577851741554992, "grad_norm": 0.7068510216020049, "learning_rate": 7.901054339010545e-07, "loss": 0.6275, "step": 29380 }, { "epoch": 0.8578143703833465, "grad_norm": 0.7407698243409813, "learning_rate": 7.899432278994324e-07, "loss": 0.6814, "step": 29381 }, { "epoch": 0.8578435666111939, "grad_norm": 0.7701683118454736, "learning_rate": 7.897810218978103e-07, "loss": 0.699, "step": 29382 }, { "epoch": 0.8578727628390412, "grad_norm": 0.7197506444217577, "learning_rate": 7.896188158961882e-07, "loss": 0.6194, "step": 29383 }, { "epoch": 0.8579019590668886, "grad_norm": 0.7180293528702111, "learning_rate": 7.894566098945662e-07, "loss": 0.6251, "step": 29384 }, { "epoch": 0.857931155294736, "grad_norm": 0.7706023645183729, "learning_rate": 7.892944038929441e-07, "loss": 0.6928, "step": 29385 }, { "epoch": 0.8579603515225833, "grad_norm": 0.6941163159406207, "learning_rate": 7.891321978913221e-07, "loss": 0.613, "step": 29386 }, { "epoch": 0.8579895477504307, "grad_norm": 0.7009860077737217, "learning_rate": 7.889699918897e-07, "loss": 0.6158, "step": 29387 }, { "epoch": 0.858018743978278, "grad_norm": 0.6553842644281095, "learning_rate": 7.88807785888078e-07, "loss": 0.5609, "step": 29388 }, { "epoch": 0.8580479402061254, "grad_norm": 0.7233940148084638, "learning_rate": 7.886455798864559e-07, "loss": 0.6318, "step": 29389 }, { "epoch": 0.8580771364339728, "grad_norm": 0.8610461488348087, "learning_rate": 7.884833738848338e-07, "loss": 0.6771, "step": 29390 }, { "epoch": 0.8581063326618201, "grad_norm": 0.7291686868438165, "learning_rate": 7.883211678832117e-07, "loss": 0.6413, "step": 29391 }, { "epoch": 0.8581355288896675, "grad_norm": 0.8146257345221596, "learning_rate": 7.881589618815896e-07, "loss": 0.7324, "step": 29392 }, { "epoch": 0.8581647251175148, "grad_norm": 0.6692571164186893, "learning_rate": 7.879967558799677e-07, "loss": 0.5856, "step": 29393 }, { "epoch": 0.8581939213453622, "grad_norm": 0.7795619911769976, "learning_rate": 7.878345498783456e-07, "loss": 0.6892, "step": 29394 }, { "epoch": 0.8582231175732096, "grad_norm": 0.7137781320429444, "learning_rate": 7.876723438767235e-07, "loss": 0.6529, "step": 29395 }, { "epoch": 0.8582523138010569, "grad_norm": 0.7762526333450701, "learning_rate": 7.875101378751014e-07, "loss": 0.6622, "step": 29396 }, { "epoch": 0.8582815100289043, "grad_norm": 0.7203154065198475, "learning_rate": 7.873479318734793e-07, "loss": 0.6243, "step": 29397 }, { "epoch": 0.8583107062567517, "grad_norm": 0.7380145777867907, "learning_rate": 7.871857258718574e-07, "loss": 0.6165, "step": 29398 }, { "epoch": 0.858339902484599, "grad_norm": 0.6863282931423353, "learning_rate": 7.870235198702353e-07, "loss": 0.61, "step": 29399 }, { "epoch": 0.8583690987124464, "grad_norm": 0.7368725489500095, "learning_rate": 7.868613138686132e-07, "loss": 0.7023, "step": 29400 }, { "epoch": 0.8583982949402937, "grad_norm": 0.7315716869951048, "learning_rate": 7.866991078669911e-07, "loss": 0.6732, "step": 29401 }, { "epoch": 0.8584274911681411, "grad_norm": 0.7280859541101755, "learning_rate": 7.86536901865369e-07, "loss": 0.6695, "step": 29402 }, { "epoch": 0.8584566873959885, "grad_norm": 0.6928555517479706, "learning_rate": 7.86374695863747e-07, "loss": 0.6003, "step": 29403 }, { "epoch": 0.8584858836238358, "grad_norm": 0.7393211205991496, "learning_rate": 7.862124898621249e-07, "loss": 0.6555, "step": 29404 }, { "epoch": 0.8585150798516832, "grad_norm": 0.7151978136714162, "learning_rate": 7.860502838605029e-07, "loss": 0.6145, "step": 29405 }, { "epoch": 0.8585442760795305, "grad_norm": 0.7311907884324953, "learning_rate": 7.858880778588808e-07, "loss": 0.6406, "step": 29406 }, { "epoch": 0.8585734723073779, "grad_norm": 0.6695293976096733, "learning_rate": 7.857258718572588e-07, "loss": 0.5464, "step": 29407 }, { "epoch": 0.8586026685352253, "grad_norm": 0.711818666298706, "learning_rate": 7.855636658556367e-07, "loss": 0.5763, "step": 29408 }, { "epoch": 0.8586318647630726, "grad_norm": 0.7651214299704293, "learning_rate": 7.854014598540146e-07, "loss": 0.6815, "step": 29409 }, { "epoch": 0.85866106099092, "grad_norm": 0.7392737640648902, "learning_rate": 7.852392538523925e-07, "loss": 0.6857, "step": 29410 }, { "epoch": 0.8586902572187673, "grad_norm": 0.6809013698843889, "learning_rate": 7.850770478507704e-07, "loss": 0.5524, "step": 29411 }, { "epoch": 0.8587194534466147, "grad_norm": 0.7212253666272975, "learning_rate": 7.849148418491486e-07, "loss": 0.6344, "step": 29412 }, { "epoch": 0.8587486496744621, "grad_norm": 0.6408068974613834, "learning_rate": 7.847526358475265e-07, "loss": 0.4914, "step": 29413 }, { "epoch": 0.8587778459023094, "grad_norm": 0.7297085764812382, "learning_rate": 7.845904298459044e-07, "loss": 0.699, "step": 29414 }, { "epoch": 0.8588070421301568, "grad_norm": 0.7437413072321243, "learning_rate": 7.844282238442823e-07, "loss": 0.6418, "step": 29415 }, { "epoch": 0.8588362383580042, "grad_norm": 0.693135991862968, "learning_rate": 7.842660178426602e-07, "loss": 0.6225, "step": 29416 }, { "epoch": 0.8588654345858515, "grad_norm": 0.6761959287733121, "learning_rate": 7.841038118410383e-07, "loss": 0.5892, "step": 29417 }, { "epoch": 0.8588946308136989, "grad_norm": 0.7530983083094693, "learning_rate": 7.839416058394162e-07, "loss": 0.6776, "step": 29418 }, { "epoch": 0.8589238270415462, "grad_norm": 0.708423586039298, "learning_rate": 7.837793998377941e-07, "loss": 0.5708, "step": 29419 }, { "epoch": 0.8589530232693936, "grad_norm": 0.7365391723447744, "learning_rate": 7.83617193836172e-07, "loss": 0.621, "step": 29420 }, { "epoch": 0.858982219497241, "grad_norm": 0.7420763848383506, "learning_rate": 7.834549878345499e-07, "loss": 0.6183, "step": 29421 }, { "epoch": 0.8590114157250883, "grad_norm": 0.632221010200732, "learning_rate": 7.832927818329279e-07, "loss": 0.5173, "step": 29422 }, { "epoch": 0.8590406119529357, "grad_norm": 0.746034425583174, "learning_rate": 7.831305758313058e-07, "loss": 0.6489, "step": 29423 }, { "epoch": 0.859069808180783, "grad_norm": 0.7664154462174555, "learning_rate": 7.829683698296838e-07, "loss": 0.724, "step": 29424 }, { "epoch": 0.8590990044086304, "grad_norm": 0.6970493408636993, "learning_rate": 7.828061638280617e-07, "loss": 0.6061, "step": 29425 }, { "epoch": 0.8591282006364778, "grad_norm": 0.6794058157306629, "learning_rate": 7.826439578264397e-07, "loss": 0.5695, "step": 29426 }, { "epoch": 0.8591573968643251, "grad_norm": 0.7206998562190282, "learning_rate": 7.824817518248176e-07, "loss": 0.5771, "step": 29427 }, { "epoch": 0.8591865930921725, "grad_norm": 0.6946686931360984, "learning_rate": 7.823195458231955e-07, "loss": 0.576, "step": 29428 }, { "epoch": 0.8592157893200199, "grad_norm": 0.7452054916506722, "learning_rate": 7.821573398215734e-07, "loss": 0.6465, "step": 29429 }, { "epoch": 0.8592449855478672, "grad_norm": 0.688717943097964, "learning_rate": 7.819951338199513e-07, "loss": 0.583, "step": 29430 }, { "epoch": 0.8592741817757146, "grad_norm": 0.725200834271083, "learning_rate": 7.818329278183294e-07, "loss": 0.6513, "step": 29431 }, { "epoch": 0.8593033780035619, "grad_norm": 0.6480690620877908, "learning_rate": 7.816707218167073e-07, "loss": 0.5512, "step": 29432 }, { "epoch": 0.8593325742314093, "grad_norm": 0.6966808353783915, "learning_rate": 7.815085158150852e-07, "loss": 0.6295, "step": 29433 }, { "epoch": 0.8593617704592567, "grad_norm": 0.6635165546392063, "learning_rate": 7.813463098134631e-07, "loss": 0.5471, "step": 29434 }, { "epoch": 0.859390966687104, "grad_norm": 0.7649500579344418, "learning_rate": 7.81184103811841e-07, "loss": 0.6877, "step": 29435 }, { "epoch": 0.8594201629149514, "grad_norm": 0.7028113138313142, "learning_rate": 7.810218978102191e-07, "loss": 0.663, "step": 29436 }, { "epoch": 0.8594493591427987, "grad_norm": 0.7193760118114777, "learning_rate": 7.80859691808597e-07, "loss": 0.578, "step": 29437 }, { "epoch": 0.8594785553706461, "grad_norm": 0.6601365714226575, "learning_rate": 7.806974858069749e-07, "loss": 0.5667, "step": 29438 }, { "epoch": 0.8595077515984935, "grad_norm": 0.7452992230040273, "learning_rate": 7.805352798053528e-07, "loss": 0.7219, "step": 29439 }, { "epoch": 0.8595369478263408, "grad_norm": 0.7494972538599488, "learning_rate": 7.803730738037308e-07, "loss": 0.6586, "step": 29440 }, { "epoch": 0.8595661440541882, "grad_norm": 0.6694202074759312, "learning_rate": 7.802108678021087e-07, "loss": 0.5756, "step": 29441 }, { "epoch": 0.8595953402820355, "grad_norm": 0.6996227459772327, "learning_rate": 7.800486618004866e-07, "loss": 0.5825, "step": 29442 }, { "epoch": 0.8596245365098829, "grad_norm": 0.7110005498154703, "learning_rate": 7.798864557988645e-07, "loss": 0.6545, "step": 29443 }, { "epoch": 0.8596537327377303, "grad_norm": 0.7018381009146409, "learning_rate": 7.797242497972426e-07, "loss": 0.5894, "step": 29444 }, { "epoch": 0.8596829289655776, "grad_norm": 0.6757475517756791, "learning_rate": 7.795620437956206e-07, "loss": 0.528, "step": 29445 }, { "epoch": 0.859712125193425, "grad_norm": 0.7697257380767387, "learning_rate": 7.793998377939985e-07, "loss": 0.6531, "step": 29446 }, { "epoch": 0.8597413214212724, "grad_norm": 0.7256573949743159, "learning_rate": 7.792376317923764e-07, "loss": 0.6393, "step": 29447 }, { "epoch": 0.8597705176491197, "grad_norm": 0.6929649826491631, "learning_rate": 7.790754257907543e-07, "loss": 0.5589, "step": 29448 }, { "epoch": 0.8597997138769671, "grad_norm": 0.7937247095149699, "learning_rate": 7.789132197891322e-07, "loss": 0.7391, "step": 29449 }, { "epoch": 0.8598289101048144, "grad_norm": 0.8261194119169707, "learning_rate": 7.787510137875103e-07, "loss": 0.7056, "step": 29450 }, { "epoch": 0.8598581063326618, "grad_norm": 0.7203935938661112, "learning_rate": 7.785888077858882e-07, "loss": 0.7085, "step": 29451 }, { "epoch": 0.8598873025605092, "grad_norm": 0.801448202555418, "learning_rate": 7.784266017842661e-07, "loss": 0.6901, "step": 29452 }, { "epoch": 0.8599164987883565, "grad_norm": 0.7171667638529279, "learning_rate": 7.78264395782644e-07, "loss": 0.6386, "step": 29453 }, { "epoch": 0.8599456950162039, "grad_norm": 0.7756953498856312, "learning_rate": 7.781021897810219e-07, "loss": 0.6923, "step": 29454 }, { "epoch": 0.8599748912440512, "grad_norm": 0.7336793976596231, "learning_rate": 7.779399837794e-07, "loss": 0.5955, "step": 29455 }, { "epoch": 0.8600040874718986, "grad_norm": 0.7090587727527842, "learning_rate": 7.777777777777779e-07, "loss": 0.5897, "step": 29456 }, { "epoch": 0.860033283699746, "grad_norm": 0.6988264051882017, "learning_rate": 7.776155717761558e-07, "loss": 0.6301, "step": 29457 }, { "epoch": 0.8600624799275933, "grad_norm": 0.8130871730419624, "learning_rate": 7.774533657745337e-07, "loss": 0.7484, "step": 29458 }, { "epoch": 0.8600916761554407, "grad_norm": 0.7513477052665183, "learning_rate": 7.772911597729117e-07, "loss": 0.6576, "step": 29459 }, { "epoch": 0.860120872383288, "grad_norm": 0.7429648425877539, "learning_rate": 7.771289537712896e-07, "loss": 0.627, "step": 29460 }, { "epoch": 0.8601500686111354, "grad_norm": 0.739415664045886, "learning_rate": 7.769667477696675e-07, "loss": 0.6287, "step": 29461 }, { "epoch": 0.8601792648389828, "grad_norm": 0.667522777620823, "learning_rate": 7.768045417680454e-07, "loss": 0.5414, "step": 29462 }, { "epoch": 0.8602084610668301, "grad_norm": 0.7158044716298742, "learning_rate": 7.766423357664234e-07, "loss": 0.6575, "step": 29463 }, { "epoch": 0.8602376572946775, "grad_norm": 0.7016549076094853, "learning_rate": 7.764801297648014e-07, "loss": 0.6058, "step": 29464 }, { "epoch": 0.8602668535225249, "grad_norm": 0.7917528934148186, "learning_rate": 7.763179237631793e-07, "loss": 0.6649, "step": 29465 }, { "epoch": 0.8602960497503722, "grad_norm": 0.907546021427235, "learning_rate": 7.761557177615572e-07, "loss": 0.5661, "step": 29466 }, { "epoch": 0.8603252459782196, "grad_norm": 0.7405369101579242, "learning_rate": 7.759935117599351e-07, "loss": 0.6903, "step": 29467 }, { "epoch": 0.8603544422060669, "grad_norm": 0.722377663725, "learning_rate": 7.75831305758313e-07, "loss": 0.6579, "step": 29468 }, { "epoch": 0.8603836384339143, "grad_norm": 0.7089177079915779, "learning_rate": 7.756690997566911e-07, "loss": 0.5481, "step": 29469 }, { "epoch": 0.8604128346617617, "grad_norm": 0.767554172894342, "learning_rate": 7.75506893755069e-07, "loss": 0.6982, "step": 29470 }, { "epoch": 0.860442030889609, "grad_norm": 0.7517939788436288, "learning_rate": 7.753446877534469e-07, "loss": 0.6883, "step": 29471 }, { "epoch": 0.8604712271174564, "grad_norm": 0.9210530602636204, "learning_rate": 7.751824817518248e-07, "loss": 0.8849, "step": 29472 }, { "epoch": 0.8605004233453037, "grad_norm": 0.665917963353531, "learning_rate": 7.750202757502028e-07, "loss": 0.5705, "step": 29473 }, { "epoch": 0.8605296195731511, "grad_norm": 0.6621946804593128, "learning_rate": 7.748580697485807e-07, "loss": 0.5888, "step": 29474 }, { "epoch": 0.8605588158009985, "grad_norm": 0.7813400887450024, "learning_rate": 7.746958637469588e-07, "loss": 0.7482, "step": 29475 }, { "epoch": 0.8605880120288458, "grad_norm": 0.8180654905776563, "learning_rate": 7.745336577453367e-07, "loss": 0.6639, "step": 29476 }, { "epoch": 0.8606172082566932, "grad_norm": 0.7243299260108043, "learning_rate": 7.743714517437146e-07, "loss": 0.6292, "step": 29477 }, { "epoch": 0.8606464044845406, "grad_norm": 0.6981303570447192, "learning_rate": 7.742092457420926e-07, "loss": 0.5525, "step": 29478 }, { "epoch": 0.8606756007123879, "grad_norm": 0.7047549246653176, "learning_rate": 7.740470397404705e-07, "loss": 0.6094, "step": 29479 }, { "epoch": 0.8607047969402353, "grad_norm": 0.7526912605692355, "learning_rate": 7.738848337388484e-07, "loss": 0.6948, "step": 29480 }, { "epoch": 0.8607339931680826, "grad_norm": 0.7623476121066463, "learning_rate": 7.737226277372263e-07, "loss": 0.6906, "step": 29481 }, { "epoch": 0.86076318939593, "grad_norm": 0.6809760350553691, "learning_rate": 7.735604217356043e-07, "loss": 0.5458, "step": 29482 }, { "epoch": 0.8607923856237774, "grad_norm": 0.7311889682675367, "learning_rate": 7.733982157339823e-07, "loss": 0.6135, "step": 29483 }, { "epoch": 0.8608215818516247, "grad_norm": 0.7838668333394949, "learning_rate": 7.732360097323602e-07, "loss": 0.7762, "step": 29484 }, { "epoch": 0.8608507780794721, "grad_norm": 0.6891742882806572, "learning_rate": 7.730738037307381e-07, "loss": 0.5861, "step": 29485 }, { "epoch": 0.8608799743073194, "grad_norm": 0.7294648280752123, "learning_rate": 7.72911597729116e-07, "loss": 0.662, "step": 29486 }, { "epoch": 0.8609091705351668, "grad_norm": 0.7132604597891293, "learning_rate": 7.727493917274939e-07, "loss": 0.6649, "step": 29487 }, { "epoch": 0.8609383667630143, "grad_norm": 0.7214601298256575, "learning_rate": 7.72587185725872e-07, "loss": 0.6016, "step": 29488 }, { "epoch": 0.8609675629908616, "grad_norm": 0.7125725018945552, "learning_rate": 7.724249797242499e-07, "loss": 0.6028, "step": 29489 }, { "epoch": 0.860996759218709, "grad_norm": 0.7368820098638151, "learning_rate": 7.722627737226278e-07, "loss": 0.6326, "step": 29490 }, { "epoch": 0.8610259554465564, "grad_norm": 0.7104390052975966, "learning_rate": 7.721005677210057e-07, "loss": 0.6352, "step": 29491 }, { "epoch": 0.8610551516744037, "grad_norm": 0.6759325730900383, "learning_rate": 7.719383617193837e-07, "loss": 0.5928, "step": 29492 }, { "epoch": 0.8610843479022511, "grad_norm": 0.7701447790079834, "learning_rate": 7.717761557177616e-07, "loss": 0.7291, "step": 29493 }, { "epoch": 0.8611135441300984, "grad_norm": 0.7544666506382494, "learning_rate": 7.716139497161396e-07, "loss": 0.7048, "step": 29494 }, { "epoch": 0.8611427403579458, "grad_norm": 0.7997363458977378, "learning_rate": 7.714517437145175e-07, "loss": 0.7168, "step": 29495 }, { "epoch": 0.8611719365857932, "grad_norm": 0.7157644486763907, "learning_rate": 7.712895377128954e-07, "loss": 0.5785, "step": 29496 }, { "epoch": 0.8612011328136405, "grad_norm": 0.7031001144224697, "learning_rate": 7.711273317112734e-07, "loss": 0.594, "step": 29497 }, { "epoch": 0.8612303290414879, "grad_norm": 0.7248477029060425, "learning_rate": 7.709651257096513e-07, "loss": 0.6218, "step": 29498 }, { "epoch": 0.8612595252693352, "grad_norm": 0.7316552120255795, "learning_rate": 7.708029197080292e-07, "loss": 0.6808, "step": 29499 }, { "epoch": 0.8612887214971826, "grad_norm": 0.7523375505869606, "learning_rate": 7.706407137064071e-07, "loss": 0.6429, "step": 29500 }, { "epoch": 0.86131791772503, "grad_norm": 0.7722106764328839, "learning_rate": 7.704785077047851e-07, "loss": 0.6797, "step": 29501 }, { "epoch": 0.8613471139528773, "grad_norm": 0.7764881486320904, "learning_rate": 7.703163017031631e-07, "loss": 0.7047, "step": 29502 }, { "epoch": 0.8613763101807247, "grad_norm": 0.758687052591978, "learning_rate": 7.70154095701541e-07, "loss": 0.6536, "step": 29503 }, { "epoch": 0.861405506408572, "grad_norm": 0.7059729883639995, "learning_rate": 7.699918896999189e-07, "loss": 0.6355, "step": 29504 }, { "epoch": 0.8614347026364194, "grad_norm": 0.7228592884913728, "learning_rate": 7.698296836982968e-07, "loss": 0.6264, "step": 29505 }, { "epoch": 0.8614638988642668, "grad_norm": 0.6545100751849872, "learning_rate": 7.69667477696675e-07, "loss": 0.5194, "step": 29506 }, { "epoch": 0.8614930950921141, "grad_norm": 0.7569621230075213, "learning_rate": 7.695052716950529e-07, "loss": 0.6514, "step": 29507 }, { "epoch": 0.8615222913199615, "grad_norm": 0.7440283842093929, "learning_rate": 7.693430656934308e-07, "loss": 0.6454, "step": 29508 }, { "epoch": 0.8615514875478089, "grad_norm": 0.7678507962027878, "learning_rate": 7.691808596918087e-07, "loss": 0.7365, "step": 29509 }, { "epoch": 0.8615806837756562, "grad_norm": 0.7319721768363469, "learning_rate": 7.690186536901866e-07, "loss": 0.6345, "step": 29510 }, { "epoch": 0.8616098800035036, "grad_norm": 0.7214844406304516, "learning_rate": 7.688564476885646e-07, "loss": 0.6549, "step": 29511 }, { "epoch": 0.8616390762313509, "grad_norm": 0.7612118976409493, "learning_rate": 7.686942416869425e-07, "loss": 0.7236, "step": 29512 }, { "epoch": 0.8616682724591983, "grad_norm": 0.7704307013484304, "learning_rate": 7.685320356853205e-07, "loss": 0.7265, "step": 29513 }, { "epoch": 0.8616974686870457, "grad_norm": 0.7361775483159939, "learning_rate": 7.683698296836984e-07, "loss": 0.6439, "step": 29514 }, { "epoch": 0.861726664914893, "grad_norm": 0.7225167759875905, "learning_rate": 7.682076236820763e-07, "loss": 0.6187, "step": 29515 }, { "epoch": 0.8617558611427404, "grad_norm": 0.7670163353121459, "learning_rate": 7.680454176804543e-07, "loss": 0.6978, "step": 29516 }, { "epoch": 0.8617850573705877, "grad_norm": 0.7376918506078961, "learning_rate": 7.678832116788322e-07, "loss": 0.6529, "step": 29517 }, { "epoch": 0.8618142535984351, "grad_norm": 0.7772074563846593, "learning_rate": 7.677210056772101e-07, "loss": 0.6476, "step": 29518 }, { "epoch": 0.8618434498262825, "grad_norm": 0.7703398107325967, "learning_rate": 7.67558799675588e-07, "loss": 0.6726, "step": 29519 }, { "epoch": 0.8618726460541298, "grad_norm": 0.7425465088017528, "learning_rate": 7.673965936739659e-07, "loss": 0.6572, "step": 29520 }, { "epoch": 0.8619018422819772, "grad_norm": 0.7251568840551484, "learning_rate": 7.67234387672344e-07, "loss": 0.6525, "step": 29521 }, { "epoch": 0.8619310385098246, "grad_norm": 0.7330101588664069, "learning_rate": 7.670721816707219e-07, "loss": 0.645, "step": 29522 }, { "epoch": 0.8619602347376719, "grad_norm": 0.6919780554170305, "learning_rate": 7.669099756690998e-07, "loss": 0.5813, "step": 29523 }, { "epoch": 0.8619894309655193, "grad_norm": 0.707257626330965, "learning_rate": 7.667477696674777e-07, "loss": 0.5804, "step": 29524 }, { "epoch": 0.8620186271933666, "grad_norm": 0.7272068487746416, "learning_rate": 7.665855636658558e-07, "loss": 0.6432, "step": 29525 }, { "epoch": 0.862047823421214, "grad_norm": 0.7629790520657963, "learning_rate": 7.664233576642337e-07, "loss": 0.6367, "step": 29526 }, { "epoch": 0.8620770196490614, "grad_norm": 0.7493534965703404, "learning_rate": 7.662611516626116e-07, "loss": 0.6988, "step": 29527 }, { "epoch": 0.8621062158769087, "grad_norm": 0.7648325545975576, "learning_rate": 7.660989456609895e-07, "loss": 0.7079, "step": 29528 }, { "epoch": 0.8621354121047561, "grad_norm": 0.7007252194485155, "learning_rate": 7.659367396593674e-07, "loss": 0.6245, "step": 29529 }, { "epoch": 0.8621646083326034, "grad_norm": 0.6967634022121365, "learning_rate": 7.657745336577454e-07, "loss": 0.5832, "step": 29530 }, { "epoch": 0.8621938045604508, "grad_norm": 0.8289809025252498, "learning_rate": 7.656123276561233e-07, "loss": 0.6788, "step": 29531 }, { "epoch": 0.8622230007882982, "grad_norm": 0.6989673048867437, "learning_rate": 7.654501216545012e-07, "loss": 0.5877, "step": 29532 }, { "epoch": 0.8622521970161455, "grad_norm": 0.7299685860930637, "learning_rate": 7.652879156528792e-07, "loss": 0.7087, "step": 29533 }, { "epoch": 0.8622813932439929, "grad_norm": 0.6963213793708255, "learning_rate": 7.651257096512571e-07, "loss": 0.5645, "step": 29534 }, { "epoch": 0.8623105894718402, "grad_norm": 0.6878070205110713, "learning_rate": 7.649635036496351e-07, "loss": 0.5711, "step": 29535 }, { "epoch": 0.8623397856996876, "grad_norm": 0.8689081014540204, "learning_rate": 7.64801297648013e-07, "loss": 0.7091, "step": 29536 }, { "epoch": 0.862368981927535, "grad_norm": 0.7816724314909334, "learning_rate": 7.646390916463909e-07, "loss": 0.709, "step": 29537 }, { "epoch": 0.8623981781553823, "grad_norm": 0.7600305737918382, "learning_rate": 7.644768856447688e-07, "loss": 0.6652, "step": 29538 }, { "epoch": 0.8624273743832297, "grad_norm": 0.6894192702091348, "learning_rate": 7.643146796431467e-07, "loss": 0.6359, "step": 29539 }, { "epoch": 0.8624565706110771, "grad_norm": 0.7427534930971837, "learning_rate": 7.641524736415249e-07, "loss": 0.6571, "step": 29540 }, { "epoch": 0.8624857668389244, "grad_norm": 0.7343247827096454, "learning_rate": 7.639902676399028e-07, "loss": 0.6694, "step": 29541 }, { "epoch": 0.8625149630667718, "grad_norm": 0.736935370234192, "learning_rate": 7.638280616382807e-07, "loss": 0.6396, "step": 29542 }, { "epoch": 0.8625441592946191, "grad_norm": 0.6790894685870278, "learning_rate": 7.636658556366586e-07, "loss": 0.5453, "step": 29543 }, { "epoch": 0.8625733555224665, "grad_norm": 0.7295787300693999, "learning_rate": 7.635036496350367e-07, "loss": 0.6132, "step": 29544 }, { "epoch": 0.8626025517503139, "grad_norm": 0.7197282984719456, "learning_rate": 7.633414436334146e-07, "loss": 0.6634, "step": 29545 }, { "epoch": 0.8626317479781612, "grad_norm": 0.7974899273980198, "learning_rate": 7.631792376317925e-07, "loss": 0.685, "step": 29546 }, { "epoch": 0.8626609442060086, "grad_norm": 0.6450377738212928, "learning_rate": 7.630170316301704e-07, "loss": 0.5283, "step": 29547 }, { "epoch": 0.862690140433856, "grad_norm": 0.701279018363749, "learning_rate": 7.628548256285483e-07, "loss": 0.6221, "step": 29548 }, { "epoch": 0.8627193366617033, "grad_norm": 0.6987730231056702, "learning_rate": 7.626926196269263e-07, "loss": 0.6053, "step": 29549 }, { "epoch": 0.8627485328895507, "grad_norm": 0.6831098289291295, "learning_rate": 7.625304136253042e-07, "loss": 0.5676, "step": 29550 }, { "epoch": 0.862777729117398, "grad_norm": 0.7002169767101497, "learning_rate": 7.623682076236821e-07, "loss": 0.6643, "step": 29551 }, { "epoch": 0.8628069253452454, "grad_norm": 0.7165982308804928, "learning_rate": 7.622060016220601e-07, "loss": 0.6191, "step": 29552 }, { "epoch": 0.8628361215730928, "grad_norm": 0.7091065633849231, "learning_rate": 7.62043795620438e-07, "loss": 0.6639, "step": 29553 }, { "epoch": 0.8628653178009401, "grad_norm": 0.8057055046580953, "learning_rate": 7.61881589618816e-07, "loss": 0.6441, "step": 29554 }, { "epoch": 0.8628945140287875, "grad_norm": 0.7143031835597369, "learning_rate": 7.617193836171939e-07, "loss": 0.6478, "step": 29555 }, { "epoch": 0.8629237102566348, "grad_norm": 0.7144823370718282, "learning_rate": 7.615571776155718e-07, "loss": 0.6428, "step": 29556 }, { "epoch": 0.8629529064844822, "grad_norm": 0.7543112981536572, "learning_rate": 7.613949716139497e-07, "loss": 0.6651, "step": 29557 }, { "epoch": 0.8629821027123296, "grad_norm": 0.7122257949144083, "learning_rate": 7.612327656123278e-07, "loss": 0.651, "step": 29558 }, { "epoch": 0.8630112989401769, "grad_norm": 0.6736102188461315, "learning_rate": 7.610705596107057e-07, "loss": 0.5868, "step": 29559 }, { "epoch": 0.8630404951680243, "grad_norm": 0.7832652694020955, "learning_rate": 7.609083536090836e-07, "loss": 0.7088, "step": 29560 }, { "epoch": 0.8630696913958716, "grad_norm": 0.7633134431506565, "learning_rate": 7.607461476074615e-07, "loss": 0.6679, "step": 29561 }, { "epoch": 0.863098887623719, "grad_norm": 0.6651279609252061, "learning_rate": 7.605839416058394e-07, "loss": 0.5434, "step": 29562 }, { "epoch": 0.8631280838515664, "grad_norm": 0.7781847441103179, "learning_rate": 7.604217356042174e-07, "loss": 0.6769, "step": 29563 }, { "epoch": 0.8631572800794137, "grad_norm": 0.6179952175424285, "learning_rate": 7.602595296025954e-07, "loss": 0.526, "step": 29564 }, { "epoch": 0.8631864763072611, "grad_norm": 0.6828339880579388, "learning_rate": 7.600973236009733e-07, "loss": 0.5864, "step": 29565 }, { "epoch": 0.8632156725351084, "grad_norm": 0.7241273142259501, "learning_rate": 7.599351175993512e-07, "loss": 0.6441, "step": 29566 }, { "epoch": 0.8632448687629558, "grad_norm": 0.6907218834040032, "learning_rate": 7.597729115977291e-07, "loss": 0.5267, "step": 29567 }, { "epoch": 0.8632740649908032, "grad_norm": 0.7675758178777006, "learning_rate": 7.596107055961071e-07, "loss": 0.7127, "step": 29568 }, { "epoch": 0.8633032612186505, "grad_norm": 0.6831867592446604, "learning_rate": 7.59448499594485e-07, "loss": 0.5213, "step": 29569 }, { "epoch": 0.8633324574464979, "grad_norm": 0.6391954035022086, "learning_rate": 7.592862935928629e-07, "loss": 0.5308, "step": 29570 }, { "epoch": 0.8633616536743453, "grad_norm": 0.6902959399728548, "learning_rate": 7.59124087591241e-07, "loss": 0.5414, "step": 29571 }, { "epoch": 0.8633908499021926, "grad_norm": 0.6936926564560499, "learning_rate": 7.589618815896188e-07, "loss": 0.6056, "step": 29572 }, { "epoch": 0.86342004613004, "grad_norm": 0.6492905077387423, "learning_rate": 7.587996755879969e-07, "loss": 0.5264, "step": 29573 }, { "epoch": 0.8634492423578873, "grad_norm": 0.7218569899517546, "learning_rate": 7.586374695863748e-07, "loss": 0.5689, "step": 29574 }, { "epoch": 0.8634784385857347, "grad_norm": 0.7686677800591549, "learning_rate": 7.584752635847527e-07, "loss": 0.7433, "step": 29575 }, { "epoch": 0.8635076348135821, "grad_norm": 0.6825361413584936, "learning_rate": 7.583130575831306e-07, "loss": 0.575, "step": 29576 }, { "epoch": 0.8635368310414294, "grad_norm": 0.6967653918590119, "learning_rate": 7.581508515815087e-07, "loss": 0.6239, "step": 29577 }, { "epoch": 0.8635660272692768, "grad_norm": 0.6778445600494406, "learning_rate": 7.579886455798866e-07, "loss": 0.5814, "step": 29578 }, { "epoch": 0.8635952234971241, "grad_norm": 0.773478681964982, "learning_rate": 7.578264395782645e-07, "loss": 0.6936, "step": 29579 }, { "epoch": 0.8636244197249715, "grad_norm": 0.7402658479782341, "learning_rate": 7.576642335766424e-07, "loss": 0.6805, "step": 29580 }, { "epoch": 0.8636536159528189, "grad_norm": 0.7915752263819027, "learning_rate": 7.575020275750203e-07, "loss": 0.647, "step": 29581 }, { "epoch": 0.8636828121806662, "grad_norm": 0.6991520906449027, "learning_rate": 7.573398215733983e-07, "loss": 0.6053, "step": 29582 }, { "epoch": 0.8637120084085136, "grad_norm": 0.6903117104211043, "learning_rate": 7.571776155717763e-07, "loss": 0.5866, "step": 29583 }, { "epoch": 0.863741204636361, "grad_norm": 0.6759289193946167, "learning_rate": 7.570154095701542e-07, "loss": 0.5665, "step": 29584 }, { "epoch": 0.8637704008642083, "grad_norm": 0.7035701915124619, "learning_rate": 7.568532035685321e-07, "loss": 0.6033, "step": 29585 }, { "epoch": 0.8637995970920557, "grad_norm": 0.7500948118975347, "learning_rate": 7.5669099756691e-07, "loss": 0.6714, "step": 29586 }, { "epoch": 0.863828793319903, "grad_norm": 0.6918922720712585, "learning_rate": 7.56528791565288e-07, "loss": 0.6284, "step": 29587 }, { "epoch": 0.8638579895477504, "grad_norm": 0.7186487450641728, "learning_rate": 7.563665855636659e-07, "loss": 0.5803, "step": 29588 }, { "epoch": 0.8638871857755978, "grad_norm": 0.6982509273940178, "learning_rate": 7.562043795620438e-07, "loss": 0.5778, "step": 29589 }, { "epoch": 0.8639163820034451, "grad_norm": 0.7826069929284852, "learning_rate": 7.560421735604218e-07, "loss": 0.6468, "step": 29590 }, { "epoch": 0.8639455782312925, "grad_norm": 0.7552191672073831, "learning_rate": 7.558799675587998e-07, "loss": 0.627, "step": 29591 }, { "epoch": 0.8639747744591398, "grad_norm": 0.6712354964908407, "learning_rate": 7.557177615571777e-07, "loss": 0.5488, "step": 29592 }, { "epoch": 0.8640039706869872, "grad_norm": 0.6921680797201608, "learning_rate": 7.555555555555556e-07, "loss": 0.5834, "step": 29593 }, { "epoch": 0.8640331669148346, "grad_norm": 0.746518301809665, "learning_rate": 7.553933495539335e-07, "loss": 0.6564, "step": 29594 }, { "epoch": 0.8640623631426819, "grad_norm": 0.7415182375821863, "learning_rate": 7.552311435523114e-07, "loss": 0.6477, "step": 29595 }, { "epoch": 0.8640915593705293, "grad_norm": 0.8170444866906216, "learning_rate": 7.550689375506895e-07, "loss": 0.6925, "step": 29596 }, { "epoch": 0.8641207555983766, "grad_norm": 0.674143330335616, "learning_rate": 7.549067315490674e-07, "loss": 0.5673, "step": 29597 }, { "epoch": 0.864149951826224, "grad_norm": 0.766613878023444, "learning_rate": 7.547445255474453e-07, "loss": 0.6886, "step": 29598 }, { "epoch": 0.8641791480540714, "grad_norm": 0.6675095625745552, "learning_rate": 7.545823195458232e-07, "loss": 0.543, "step": 29599 }, { "epoch": 0.8642083442819187, "grad_norm": 0.7523738338044015, "learning_rate": 7.544201135442011e-07, "loss": 0.6844, "step": 29600 }, { "epoch": 0.8642375405097661, "grad_norm": 0.7052211105035915, "learning_rate": 7.542579075425791e-07, "loss": 0.6238, "step": 29601 }, { "epoch": 0.8642667367376135, "grad_norm": 0.6982344416986215, "learning_rate": 7.540957015409571e-07, "loss": 0.6182, "step": 29602 }, { "epoch": 0.8642959329654608, "grad_norm": 0.7217722910123596, "learning_rate": 7.53933495539335e-07, "loss": 0.6289, "step": 29603 }, { "epoch": 0.8643251291933082, "grad_norm": 0.7628443871316892, "learning_rate": 7.53771289537713e-07, "loss": 0.7272, "step": 29604 }, { "epoch": 0.8643543254211555, "grad_norm": 0.6965576155638885, "learning_rate": 7.536090835360909e-07, "loss": 0.5864, "step": 29605 }, { "epoch": 0.8643835216490029, "grad_norm": 0.7140333995291687, "learning_rate": 7.534468775344689e-07, "loss": 0.5513, "step": 29606 }, { "epoch": 0.8644127178768503, "grad_norm": 0.7155457777382007, "learning_rate": 7.532846715328468e-07, "loss": 0.6256, "step": 29607 }, { "epoch": 0.8644419141046977, "grad_norm": 0.6499102752795196, "learning_rate": 7.531224655312247e-07, "loss": 0.5371, "step": 29608 }, { "epoch": 0.8644711103325451, "grad_norm": 0.6686609191558115, "learning_rate": 7.529602595296026e-07, "loss": 0.5466, "step": 29609 }, { "epoch": 0.8645003065603925, "grad_norm": 0.7040172097746311, "learning_rate": 7.527980535279807e-07, "loss": 0.6546, "step": 29610 }, { "epoch": 0.8645295027882398, "grad_norm": 0.7001011195806349, "learning_rate": 7.526358475263586e-07, "loss": 0.5575, "step": 29611 }, { "epoch": 0.8645586990160872, "grad_norm": 0.6956411068657548, "learning_rate": 7.524736415247365e-07, "loss": 0.589, "step": 29612 }, { "epoch": 0.8645878952439345, "grad_norm": 0.7403133781945122, "learning_rate": 7.523114355231144e-07, "loss": 0.6688, "step": 29613 }, { "epoch": 0.8646170914717819, "grad_norm": 0.7891550752810781, "learning_rate": 7.521492295214923e-07, "loss": 0.6729, "step": 29614 }, { "epoch": 0.8646462876996293, "grad_norm": 0.7484082421387374, "learning_rate": 7.519870235198704e-07, "loss": 0.6996, "step": 29615 }, { "epoch": 0.8646754839274766, "grad_norm": 0.7084711825191301, "learning_rate": 7.518248175182483e-07, "loss": 0.6554, "step": 29616 }, { "epoch": 0.864704680155324, "grad_norm": 0.7370196108053602, "learning_rate": 7.516626115166262e-07, "loss": 0.6616, "step": 29617 }, { "epoch": 0.8647338763831713, "grad_norm": 0.6760671709061116, "learning_rate": 7.515004055150041e-07, "loss": 0.5425, "step": 29618 }, { "epoch": 0.8647630726110187, "grad_norm": 0.7265000362001434, "learning_rate": 7.51338199513382e-07, "loss": 0.6064, "step": 29619 }, { "epoch": 0.8647922688388661, "grad_norm": 0.7530698061051675, "learning_rate": 7.5117599351176e-07, "loss": 0.699, "step": 29620 }, { "epoch": 0.8648214650667134, "grad_norm": 0.7499774162358916, "learning_rate": 7.51013787510138e-07, "loss": 0.6771, "step": 29621 }, { "epoch": 0.8648506612945608, "grad_norm": 0.8014678313394417, "learning_rate": 7.508515815085159e-07, "loss": 0.6968, "step": 29622 }, { "epoch": 0.8648798575224081, "grad_norm": 0.7399269368134561, "learning_rate": 7.506893755068938e-07, "loss": 0.6904, "step": 29623 }, { "epoch": 0.8649090537502555, "grad_norm": 0.698723966008672, "learning_rate": 7.505271695052718e-07, "loss": 0.6278, "step": 29624 }, { "epoch": 0.8649382499781029, "grad_norm": 0.7158329046822766, "learning_rate": 7.503649635036497e-07, "loss": 0.6029, "step": 29625 }, { "epoch": 0.8649674462059502, "grad_norm": 0.7167814229330006, "learning_rate": 7.502027575020276e-07, "loss": 0.6444, "step": 29626 }, { "epoch": 0.8649966424337976, "grad_norm": 0.754914366419566, "learning_rate": 7.500405515004055e-07, "loss": 0.5966, "step": 29627 }, { "epoch": 0.865025838661645, "grad_norm": 0.6982095182068951, "learning_rate": 7.498783454987834e-07, "loss": 0.6044, "step": 29628 }, { "epoch": 0.8650550348894923, "grad_norm": 0.7823648596036052, "learning_rate": 7.497161394971615e-07, "loss": 0.7316, "step": 29629 }, { "epoch": 0.8650842311173397, "grad_norm": 0.7152782966637115, "learning_rate": 7.495539334955394e-07, "loss": 0.6525, "step": 29630 }, { "epoch": 0.865113427345187, "grad_norm": 0.7276755456217584, "learning_rate": 7.493917274939173e-07, "loss": 0.6427, "step": 29631 }, { "epoch": 0.8651426235730344, "grad_norm": 0.7087426381578608, "learning_rate": 7.492295214922952e-07, "loss": 0.6288, "step": 29632 }, { "epoch": 0.8651718198008818, "grad_norm": 0.7481292989606813, "learning_rate": 7.490673154906731e-07, "loss": 0.6705, "step": 29633 }, { "epoch": 0.8652010160287291, "grad_norm": 0.7726492284753615, "learning_rate": 7.489051094890512e-07, "loss": 0.6693, "step": 29634 }, { "epoch": 0.8652302122565765, "grad_norm": 0.782286544978432, "learning_rate": 7.487429034874291e-07, "loss": 0.7019, "step": 29635 }, { "epoch": 0.8652594084844238, "grad_norm": 0.7564008721298788, "learning_rate": 7.48580697485807e-07, "loss": 0.688, "step": 29636 }, { "epoch": 0.8652886047122712, "grad_norm": 0.7372515806932677, "learning_rate": 7.48418491484185e-07, "loss": 0.6665, "step": 29637 }, { "epoch": 0.8653178009401186, "grad_norm": 0.6924568478279367, "learning_rate": 7.482562854825629e-07, "loss": 0.6417, "step": 29638 }, { "epoch": 0.8653469971679659, "grad_norm": 0.8046909716072073, "learning_rate": 7.480940794809409e-07, "loss": 0.5936, "step": 29639 }, { "epoch": 0.8653761933958133, "grad_norm": 0.6839892226520298, "learning_rate": 7.479318734793188e-07, "loss": 0.5914, "step": 29640 }, { "epoch": 0.8654053896236606, "grad_norm": 0.6810476355666963, "learning_rate": 7.477696674776968e-07, "loss": 0.5745, "step": 29641 }, { "epoch": 0.865434585851508, "grad_norm": 0.6744873373782383, "learning_rate": 7.476074614760747e-07, "loss": 0.5398, "step": 29642 }, { "epoch": 0.8654637820793554, "grad_norm": 0.7625080078092628, "learning_rate": 7.474452554744527e-07, "loss": 0.624, "step": 29643 }, { "epoch": 0.8654929783072027, "grad_norm": 0.791595794119262, "learning_rate": 7.472830494728306e-07, "loss": 0.7702, "step": 29644 }, { "epoch": 0.8655221745350501, "grad_norm": 0.6952448866826572, "learning_rate": 7.471208434712085e-07, "loss": 0.5985, "step": 29645 }, { "epoch": 0.8655513707628975, "grad_norm": 0.838861683446881, "learning_rate": 7.469586374695864e-07, "loss": 0.6033, "step": 29646 }, { "epoch": 0.8655805669907448, "grad_norm": 0.7194517825744713, "learning_rate": 7.467964314679643e-07, "loss": 0.6215, "step": 29647 }, { "epoch": 0.8656097632185922, "grad_norm": 0.7273081286816803, "learning_rate": 7.466342254663424e-07, "loss": 0.6595, "step": 29648 }, { "epoch": 0.8656389594464395, "grad_norm": 0.6921428243287675, "learning_rate": 7.464720194647203e-07, "loss": 0.5849, "step": 29649 }, { "epoch": 0.8656681556742869, "grad_norm": 0.6919402320502621, "learning_rate": 7.463098134630982e-07, "loss": 0.5575, "step": 29650 }, { "epoch": 0.8656973519021343, "grad_norm": 0.7391316904172737, "learning_rate": 7.461476074614761e-07, "loss": 0.6296, "step": 29651 }, { "epoch": 0.8657265481299816, "grad_norm": 0.6564273813806388, "learning_rate": 7.45985401459854e-07, "loss": 0.5709, "step": 29652 }, { "epoch": 0.865755744357829, "grad_norm": 0.7197612816195689, "learning_rate": 7.458231954582321e-07, "loss": 0.6652, "step": 29653 }, { "epoch": 0.8657849405856763, "grad_norm": 0.7268443706487902, "learning_rate": 7.4566098945661e-07, "loss": 0.5781, "step": 29654 }, { "epoch": 0.8658141368135237, "grad_norm": 0.6954073144944286, "learning_rate": 7.454987834549879e-07, "loss": 0.5624, "step": 29655 }, { "epoch": 0.8658433330413711, "grad_norm": 0.6776191644645176, "learning_rate": 7.453365774533658e-07, "loss": 0.5447, "step": 29656 }, { "epoch": 0.8658725292692184, "grad_norm": 0.6888899057646579, "learning_rate": 7.451743714517438e-07, "loss": 0.6022, "step": 29657 }, { "epoch": 0.8659017254970658, "grad_norm": 0.6634288427271889, "learning_rate": 7.450121654501217e-07, "loss": 0.5617, "step": 29658 }, { "epoch": 0.8659309217249131, "grad_norm": 0.7112586796447545, "learning_rate": 7.448499594484996e-07, "loss": 0.6114, "step": 29659 }, { "epoch": 0.8659601179527605, "grad_norm": 0.6872288971372192, "learning_rate": 7.446877534468776e-07, "loss": 0.5612, "step": 29660 }, { "epoch": 0.8659893141806079, "grad_norm": 0.6911158369297866, "learning_rate": 7.445255474452555e-07, "loss": 0.5826, "step": 29661 }, { "epoch": 0.8660185104084552, "grad_norm": 0.742786007937499, "learning_rate": 7.443633414436335e-07, "loss": 0.7118, "step": 29662 }, { "epoch": 0.8660477066363026, "grad_norm": 0.7769757330005483, "learning_rate": 7.442011354420114e-07, "loss": 0.6444, "step": 29663 }, { "epoch": 0.86607690286415, "grad_norm": 0.7374896293591084, "learning_rate": 7.440389294403893e-07, "loss": 0.6088, "step": 29664 }, { "epoch": 0.8661060990919973, "grad_norm": 0.6976562014308308, "learning_rate": 7.438767234387672e-07, "loss": 0.578, "step": 29665 }, { "epoch": 0.8661352953198447, "grad_norm": 0.6634915476363131, "learning_rate": 7.437145174371451e-07, "loss": 0.5118, "step": 29666 }, { "epoch": 0.866164491547692, "grad_norm": 0.8788228237499425, "learning_rate": 7.435523114355232e-07, "loss": 0.6449, "step": 29667 }, { "epoch": 0.8661936877755394, "grad_norm": 0.7275493334747697, "learning_rate": 7.433901054339011e-07, "loss": 0.6036, "step": 29668 }, { "epoch": 0.8662228840033868, "grad_norm": 0.727540929545443, "learning_rate": 7.43227899432279e-07, "loss": 0.6483, "step": 29669 }, { "epoch": 0.8662520802312341, "grad_norm": 0.6845990784751198, "learning_rate": 7.43065693430657e-07, "loss": 0.5613, "step": 29670 }, { "epoch": 0.8662812764590815, "grad_norm": 0.7303439720572562, "learning_rate": 7.429034874290349e-07, "loss": 0.6535, "step": 29671 }, { "epoch": 0.8663104726869288, "grad_norm": 0.7602423695297809, "learning_rate": 7.42741281427413e-07, "loss": 0.6374, "step": 29672 }, { "epoch": 0.8663396689147762, "grad_norm": 0.7271227435474487, "learning_rate": 7.425790754257909e-07, "loss": 0.6558, "step": 29673 }, { "epoch": 0.8663688651426236, "grad_norm": 0.8285858305362586, "learning_rate": 7.424168694241688e-07, "loss": 0.6697, "step": 29674 }, { "epoch": 0.8663980613704709, "grad_norm": 0.7244268743053996, "learning_rate": 7.422546634225467e-07, "loss": 0.6691, "step": 29675 }, { "epoch": 0.8664272575983183, "grad_norm": 0.7673179352799927, "learning_rate": 7.420924574209247e-07, "loss": 0.7409, "step": 29676 }, { "epoch": 0.8664564538261657, "grad_norm": 0.7215339374098768, "learning_rate": 7.419302514193026e-07, "loss": 0.6715, "step": 29677 }, { "epoch": 0.866485650054013, "grad_norm": 0.6995979421100439, "learning_rate": 7.417680454176805e-07, "loss": 0.6372, "step": 29678 }, { "epoch": 0.8665148462818604, "grad_norm": 0.6693741789208024, "learning_rate": 7.416058394160585e-07, "loss": 0.5322, "step": 29679 }, { "epoch": 0.8665440425097077, "grad_norm": 0.714068380453807, "learning_rate": 7.414436334144364e-07, "loss": 0.6606, "step": 29680 }, { "epoch": 0.8665732387375551, "grad_norm": 0.7073994773647838, "learning_rate": 7.412814274128144e-07, "loss": 0.5976, "step": 29681 }, { "epoch": 0.8666024349654025, "grad_norm": 0.7216122833490194, "learning_rate": 7.411192214111923e-07, "loss": 0.6406, "step": 29682 }, { "epoch": 0.8666316311932498, "grad_norm": 0.8323502536249137, "learning_rate": 7.409570154095702e-07, "loss": 0.7381, "step": 29683 }, { "epoch": 0.8666608274210972, "grad_norm": 0.6622314691102802, "learning_rate": 7.407948094079481e-07, "loss": 0.5397, "step": 29684 }, { "epoch": 0.8666900236489445, "grad_norm": 0.6988024373545363, "learning_rate": 7.40632603406326e-07, "loss": 0.6392, "step": 29685 }, { "epoch": 0.8667192198767919, "grad_norm": 0.7115830789106536, "learning_rate": 7.404703974047041e-07, "loss": 0.6284, "step": 29686 }, { "epoch": 0.8667484161046393, "grad_norm": 0.6958534793125348, "learning_rate": 7.40308191403082e-07, "loss": 0.6406, "step": 29687 }, { "epoch": 0.8667776123324866, "grad_norm": 0.6949491951706822, "learning_rate": 7.401459854014599e-07, "loss": 0.5618, "step": 29688 }, { "epoch": 0.866806808560334, "grad_norm": 0.6994703437494644, "learning_rate": 7.399837793998378e-07, "loss": 0.5661, "step": 29689 }, { "epoch": 0.8668360047881813, "grad_norm": 0.6371419067765378, "learning_rate": 7.398215733982157e-07, "loss": 0.5279, "step": 29690 }, { "epoch": 0.8668652010160287, "grad_norm": 0.7396683753196404, "learning_rate": 7.396593673965938e-07, "loss": 0.6673, "step": 29691 }, { "epoch": 0.8668943972438761, "grad_norm": 0.7855676740536761, "learning_rate": 7.394971613949717e-07, "loss": 0.7695, "step": 29692 }, { "epoch": 0.8669235934717234, "grad_norm": 0.6848599472039, "learning_rate": 7.393349553933496e-07, "loss": 0.5986, "step": 29693 }, { "epoch": 0.8669527896995708, "grad_norm": 0.7609490233093438, "learning_rate": 7.391727493917275e-07, "loss": 0.6849, "step": 29694 }, { "epoch": 0.8669819859274182, "grad_norm": 0.7250400603697704, "learning_rate": 7.390105433901055e-07, "loss": 0.5895, "step": 29695 }, { "epoch": 0.8670111821552655, "grad_norm": 0.7149892078082228, "learning_rate": 7.388483373884834e-07, "loss": 0.6016, "step": 29696 }, { "epoch": 0.8670403783831129, "grad_norm": 0.7219123580203699, "learning_rate": 7.386861313868613e-07, "loss": 0.6506, "step": 29697 }, { "epoch": 0.8670695746109602, "grad_norm": 0.7618438281725228, "learning_rate": 7.385239253852392e-07, "loss": 0.6713, "step": 29698 }, { "epoch": 0.8670987708388076, "grad_norm": 0.7104820407269181, "learning_rate": 7.383617193836172e-07, "loss": 0.639, "step": 29699 }, { "epoch": 0.867127967066655, "grad_norm": 0.7447174943541774, "learning_rate": 7.381995133819952e-07, "loss": 0.6477, "step": 29700 }, { "epoch": 0.8671571632945023, "grad_norm": 0.6722337686420621, "learning_rate": 7.380373073803731e-07, "loss": 0.5534, "step": 29701 }, { "epoch": 0.8671863595223497, "grad_norm": 0.6641847522642454, "learning_rate": 7.37875101378751e-07, "loss": 0.566, "step": 29702 }, { "epoch": 0.867215555750197, "grad_norm": 0.7120769471092003, "learning_rate": 7.37712895377129e-07, "loss": 0.5838, "step": 29703 }, { "epoch": 0.8672447519780444, "grad_norm": 0.6833501403720613, "learning_rate": 7.375506893755069e-07, "loss": 0.6173, "step": 29704 }, { "epoch": 0.8672739482058918, "grad_norm": 1.0504465337216997, "learning_rate": 7.37388483373885e-07, "loss": 0.6258, "step": 29705 }, { "epoch": 0.8673031444337391, "grad_norm": 0.7212352725663472, "learning_rate": 7.372262773722629e-07, "loss": 0.6602, "step": 29706 }, { "epoch": 0.8673323406615865, "grad_norm": 0.7014924191141413, "learning_rate": 7.370640713706408e-07, "loss": 0.626, "step": 29707 }, { "epoch": 0.8673615368894338, "grad_norm": 0.7572198636420697, "learning_rate": 7.369018653690187e-07, "loss": 0.6341, "step": 29708 }, { "epoch": 0.8673907331172812, "grad_norm": 0.6721372147815871, "learning_rate": 7.367396593673967e-07, "loss": 0.591, "step": 29709 }, { "epoch": 0.8674199293451286, "grad_norm": 0.7514187332748835, "learning_rate": 7.365774533657747e-07, "loss": 0.677, "step": 29710 }, { "epoch": 0.8674491255729759, "grad_norm": 0.7200240129433622, "learning_rate": 7.364152473641526e-07, "loss": 0.6411, "step": 29711 }, { "epoch": 0.8674783218008233, "grad_norm": 0.7455255740775555, "learning_rate": 7.362530413625305e-07, "loss": 0.6667, "step": 29712 }, { "epoch": 0.8675075180286707, "grad_norm": 0.7356987387086266, "learning_rate": 7.360908353609084e-07, "loss": 0.6083, "step": 29713 }, { "epoch": 0.867536714256518, "grad_norm": 0.620854522341999, "learning_rate": 7.359286293592864e-07, "loss": 0.4968, "step": 29714 }, { "epoch": 0.8675659104843654, "grad_norm": 0.6988044839281468, "learning_rate": 7.357664233576643e-07, "loss": 0.5939, "step": 29715 }, { "epoch": 0.8675951067122127, "grad_norm": 0.6668682451773228, "learning_rate": 7.356042173560422e-07, "loss": 0.5199, "step": 29716 }, { "epoch": 0.8676243029400601, "grad_norm": 0.7618117948946697, "learning_rate": 7.354420113544201e-07, "loss": 0.6931, "step": 29717 }, { "epoch": 0.8676534991679075, "grad_norm": 0.7134254786620038, "learning_rate": 7.352798053527981e-07, "loss": 0.6124, "step": 29718 }, { "epoch": 0.8676826953957548, "grad_norm": 0.6696391095053214, "learning_rate": 7.351175993511761e-07, "loss": 0.5096, "step": 29719 }, { "epoch": 0.8677118916236022, "grad_norm": 0.7104655816564631, "learning_rate": 7.34955393349554e-07, "loss": 0.6335, "step": 29720 }, { "epoch": 0.8677410878514495, "grad_norm": 0.7060173576471037, "learning_rate": 7.347931873479319e-07, "loss": 0.6082, "step": 29721 }, { "epoch": 0.8677702840792969, "grad_norm": 0.6773111321549827, "learning_rate": 7.346309813463098e-07, "loss": 0.5701, "step": 29722 }, { "epoch": 0.8677994803071443, "grad_norm": 0.718067594906584, "learning_rate": 7.344687753446877e-07, "loss": 0.6226, "step": 29723 }, { "epoch": 0.8678286765349916, "grad_norm": 0.7476615562431642, "learning_rate": 7.343065693430658e-07, "loss": 0.694, "step": 29724 }, { "epoch": 0.867857872762839, "grad_norm": 0.811862939266851, "learning_rate": 7.341443633414437e-07, "loss": 0.7033, "step": 29725 }, { "epoch": 0.8678870689906864, "grad_norm": 0.9074229277958975, "learning_rate": 7.339821573398216e-07, "loss": 0.639, "step": 29726 }, { "epoch": 0.8679162652185337, "grad_norm": 0.7448534137632868, "learning_rate": 7.338199513381995e-07, "loss": 0.6095, "step": 29727 }, { "epoch": 0.8679454614463811, "grad_norm": 0.7087906719930857, "learning_rate": 7.336577453365775e-07, "loss": 0.6063, "step": 29728 }, { "epoch": 0.8679746576742285, "grad_norm": 0.6877425730280812, "learning_rate": 7.334955393349554e-07, "loss": 0.5818, "step": 29729 }, { "epoch": 0.8680038539020759, "grad_norm": 0.7453439424610601, "learning_rate": 7.333333333333334e-07, "loss": 0.6378, "step": 29730 }, { "epoch": 0.8680330501299233, "grad_norm": 0.7396104603053416, "learning_rate": 7.331711273317113e-07, "loss": 0.6719, "step": 29731 }, { "epoch": 0.8680622463577706, "grad_norm": 0.7242849233213059, "learning_rate": 7.330089213300892e-07, "loss": 0.6105, "step": 29732 }, { "epoch": 0.868091442585618, "grad_norm": 0.804441924026071, "learning_rate": 7.328467153284672e-07, "loss": 0.7063, "step": 29733 }, { "epoch": 0.8681206388134654, "grad_norm": 0.6988893631399639, "learning_rate": 7.326845093268452e-07, "loss": 0.5681, "step": 29734 }, { "epoch": 0.8681498350413127, "grad_norm": 0.7002066238352769, "learning_rate": 7.32522303325223e-07, "loss": 0.6135, "step": 29735 }, { "epoch": 0.8681790312691601, "grad_norm": 0.6815527037406202, "learning_rate": 7.32360097323601e-07, "loss": 0.5313, "step": 29736 }, { "epoch": 0.8682082274970074, "grad_norm": 0.7101990720553875, "learning_rate": 7.32197891321979e-07, "loss": 0.5417, "step": 29737 }, { "epoch": 0.8682374237248548, "grad_norm": 0.7454663017396282, "learning_rate": 7.32035685320357e-07, "loss": 0.6372, "step": 29738 }, { "epoch": 0.8682666199527022, "grad_norm": 0.7295523717335031, "learning_rate": 7.318734793187349e-07, "loss": 0.6404, "step": 29739 }, { "epoch": 0.8682958161805495, "grad_norm": 0.6823298452824021, "learning_rate": 7.317112733171128e-07, "loss": 0.5797, "step": 29740 }, { "epoch": 0.8683250124083969, "grad_norm": 0.7396202988822921, "learning_rate": 7.315490673154907e-07, "loss": 0.5947, "step": 29741 }, { "epoch": 0.8683542086362442, "grad_norm": 0.6683249168031069, "learning_rate": 7.313868613138688e-07, "loss": 0.5708, "step": 29742 }, { "epoch": 0.8683834048640916, "grad_norm": 0.7790604785172455, "learning_rate": 7.312246553122467e-07, "loss": 0.7173, "step": 29743 }, { "epoch": 0.868412601091939, "grad_norm": 0.7028596811769409, "learning_rate": 7.310624493106246e-07, "loss": 0.5997, "step": 29744 }, { "epoch": 0.8684417973197863, "grad_norm": 0.6772603077658691, "learning_rate": 7.309002433090025e-07, "loss": 0.5877, "step": 29745 }, { "epoch": 0.8684709935476337, "grad_norm": 0.6624961326292332, "learning_rate": 7.307380373073804e-07, "loss": 0.5345, "step": 29746 }, { "epoch": 0.868500189775481, "grad_norm": 0.7163615693664459, "learning_rate": 7.305758313057584e-07, "loss": 0.6456, "step": 29747 }, { "epoch": 0.8685293860033284, "grad_norm": 0.7049668088653939, "learning_rate": 7.304136253041363e-07, "loss": 0.6158, "step": 29748 }, { "epoch": 0.8685585822311758, "grad_norm": 0.6709155127705905, "learning_rate": 7.302514193025143e-07, "loss": 0.558, "step": 29749 }, { "epoch": 0.8685877784590231, "grad_norm": 0.6544848005232071, "learning_rate": 7.300892133008922e-07, "loss": 0.5732, "step": 29750 }, { "epoch": 0.8686169746868705, "grad_norm": 0.6780044080643507, "learning_rate": 7.299270072992701e-07, "loss": 0.5656, "step": 29751 }, { "epoch": 0.8686461709147179, "grad_norm": 0.7221202741289048, "learning_rate": 7.297648012976481e-07, "loss": 0.7012, "step": 29752 }, { "epoch": 0.8686753671425652, "grad_norm": 0.7590843106468634, "learning_rate": 7.29602595296026e-07, "loss": 0.6262, "step": 29753 }, { "epoch": 0.8687045633704126, "grad_norm": 0.7521681092126252, "learning_rate": 7.294403892944039e-07, "loss": 0.6661, "step": 29754 }, { "epoch": 0.8687337595982599, "grad_norm": 0.7434203176672837, "learning_rate": 7.292781832927818e-07, "loss": 0.6886, "step": 29755 }, { "epoch": 0.8687629558261073, "grad_norm": 0.6935452107595629, "learning_rate": 7.291159772911598e-07, "loss": 0.5672, "step": 29756 }, { "epoch": 0.8687921520539547, "grad_norm": 0.7298801217990558, "learning_rate": 7.289537712895378e-07, "loss": 0.6289, "step": 29757 }, { "epoch": 0.868821348281802, "grad_norm": 0.7551270230219159, "learning_rate": 7.287915652879157e-07, "loss": 0.6875, "step": 29758 }, { "epoch": 0.8688505445096494, "grad_norm": 0.7470502921514711, "learning_rate": 7.286293592862936e-07, "loss": 0.6974, "step": 29759 }, { "epoch": 0.8688797407374967, "grad_norm": 0.6985725025232962, "learning_rate": 7.284671532846715e-07, "loss": 0.6405, "step": 29760 }, { "epoch": 0.8689089369653441, "grad_norm": 0.7730481158107688, "learning_rate": 7.283049472830496e-07, "loss": 0.747, "step": 29761 }, { "epoch": 0.8689381331931915, "grad_norm": 0.8043353738889617, "learning_rate": 7.281427412814275e-07, "loss": 0.7992, "step": 29762 }, { "epoch": 0.8689673294210388, "grad_norm": 0.7686588022097984, "learning_rate": 7.279805352798054e-07, "loss": 0.6619, "step": 29763 }, { "epoch": 0.8689965256488862, "grad_norm": 0.7406208067621368, "learning_rate": 7.278183292781833e-07, "loss": 0.679, "step": 29764 }, { "epoch": 0.8690257218767335, "grad_norm": 0.6985872178279716, "learning_rate": 7.276561232765612e-07, "loss": 0.5999, "step": 29765 }, { "epoch": 0.8690549181045809, "grad_norm": 0.6934504155252694, "learning_rate": 7.274939172749393e-07, "loss": 0.5581, "step": 29766 }, { "epoch": 0.8690841143324283, "grad_norm": 0.7155829390607205, "learning_rate": 7.273317112733172e-07, "loss": 0.6164, "step": 29767 }, { "epoch": 0.8691133105602756, "grad_norm": 0.6894197207778551, "learning_rate": 7.271695052716952e-07, "loss": 0.627, "step": 29768 }, { "epoch": 0.869142506788123, "grad_norm": 0.7662282337511162, "learning_rate": 7.270072992700731e-07, "loss": 0.695, "step": 29769 }, { "epoch": 0.8691717030159704, "grad_norm": 0.7806246592177027, "learning_rate": 7.26845093268451e-07, "loss": 0.6725, "step": 29770 }, { "epoch": 0.8692008992438177, "grad_norm": 0.7985035780184812, "learning_rate": 7.26682887266829e-07, "loss": 0.7481, "step": 29771 }, { "epoch": 0.8692300954716651, "grad_norm": 0.699512876888685, "learning_rate": 7.265206812652069e-07, "loss": 0.5776, "step": 29772 }, { "epoch": 0.8692592916995124, "grad_norm": 0.7201854083360739, "learning_rate": 7.263584752635848e-07, "loss": 0.6223, "step": 29773 }, { "epoch": 0.8692884879273598, "grad_norm": 0.7462648724971588, "learning_rate": 7.261962692619627e-07, "loss": 0.6694, "step": 29774 }, { "epoch": 0.8693176841552072, "grad_norm": 0.860940454811999, "learning_rate": 7.260340632603408e-07, "loss": 0.6451, "step": 29775 }, { "epoch": 0.8693468803830545, "grad_norm": 0.6926648971861084, "learning_rate": 7.258718572587187e-07, "loss": 0.595, "step": 29776 }, { "epoch": 0.8693760766109019, "grad_norm": 0.657757415945601, "learning_rate": 7.257096512570966e-07, "loss": 0.5142, "step": 29777 }, { "epoch": 0.8694052728387492, "grad_norm": 0.7536931215434641, "learning_rate": 7.255474452554745e-07, "loss": 0.6267, "step": 29778 }, { "epoch": 0.8694344690665966, "grad_norm": 0.7181432098490647, "learning_rate": 7.253852392538524e-07, "loss": 0.6172, "step": 29779 }, { "epoch": 0.869463665294444, "grad_norm": 0.6782860304499021, "learning_rate": 7.252230332522305e-07, "loss": 0.5771, "step": 29780 }, { "epoch": 0.8694928615222913, "grad_norm": 0.7174410981217393, "learning_rate": 7.250608272506084e-07, "loss": 0.5907, "step": 29781 }, { "epoch": 0.8695220577501387, "grad_norm": 0.770890705360093, "learning_rate": 7.248986212489863e-07, "loss": 0.6539, "step": 29782 }, { "epoch": 0.869551253977986, "grad_norm": 0.7497032330943414, "learning_rate": 7.247364152473642e-07, "loss": 0.6543, "step": 29783 }, { "epoch": 0.8695804502058334, "grad_norm": 0.7672199332349374, "learning_rate": 7.245742092457421e-07, "loss": 0.7245, "step": 29784 }, { "epoch": 0.8696096464336808, "grad_norm": 0.7737479021424483, "learning_rate": 7.244120032441201e-07, "loss": 0.6334, "step": 29785 }, { "epoch": 0.8696388426615281, "grad_norm": 0.767139746747464, "learning_rate": 7.24249797242498e-07, "loss": 0.7415, "step": 29786 }, { "epoch": 0.8696680388893755, "grad_norm": 0.7193944130084574, "learning_rate": 7.24087591240876e-07, "loss": 0.632, "step": 29787 }, { "epoch": 0.8696972351172229, "grad_norm": 0.7402326626855495, "learning_rate": 7.239253852392539e-07, "loss": 0.6904, "step": 29788 }, { "epoch": 0.8697264313450702, "grad_norm": 0.7815719028380369, "learning_rate": 7.237631792376318e-07, "loss": 0.7455, "step": 29789 }, { "epoch": 0.8697556275729176, "grad_norm": 0.7201837431495721, "learning_rate": 7.236009732360098e-07, "loss": 0.6064, "step": 29790 }, { "epoch": 0.8697848238007649, "grad_norm": 0.751446562635531, "learning_rate": 7.234387672343877e-07, "loss": 0.7143, "step": 29791 }, { "epoch": 0.8698140200286123, "grad_norm": 0.68795942502382, "learning_rate": 7.232765612327656e-07, "loss": 0.5908, "step": 29792 }, { "epoch": 0.8698432162564597, "grad_norm": 0.6709955137024496, "learning_rate": 7.231143552311435e-07, "loss": 0.5661, "step": 29793 }, { "epoch": 0.869872412484307, "grad_norm": 0.7258927736561002, "learning_rate": 7.229521492295216e-07, "loss": 0.6162, "step": 29794 }, { "epoch": 0.8699016087121544, "grad_norm": 0.678020253246644, "learning_rate": 7.227899432278995e-07, "loss": 0.6066, "step": 29795 }, { "epoch": 0.8699308049400017, "grad_norm": 0.7282740468683448, "learning_rate": 7.226277372262774e-07, "loss": 0.6228, "step": 29796 }, { "epoch": 0.8699600011678491, "grad_norm": 0.713685619315101, "learning_rate": 7.224655312246553e-07, "loss": 0.6404, "step": 29797 }, { "epoch": 0.8699891973956965, "grad_norm": 0.7485674510069332, "learning_rate": 7.223033252230332e-07, "loss": 0.6438, "step": 29798 }, { "epoch": 0.8700183936235438, "grad_norm": 0.7623541187151923, "learning_rate": 7.221411192214114e-07, "loss": 0.7362, "step": 29799 }, { "epoch": 0.8700475898513912, "grad_norm": 0.7153730538427828, "learning_rate": 7.219789132197893e-07, "loss": 0.6272, "step": 29800 }, { "epoch": 0.8700767860792386, "grad_norm": 0.7856435541744259, "learning_rate": 7.218167072181672e-07, "loss": 0.7246, "step": 29801 }, { "epoch": 0.8701059823070859, "grad_norm": 0.7171141505699143, "learning_rate": 7.216545012165451e-07, "loss": 0.6574, "step": 29802 }, { "epoch": 0.8701351785349333, "grad_norm": 0.6754832374853562, "learning_rate": 7.21492295214923e-07, "loss": 0.5334, "step": 29803 }, { "epoch": 0.8701643747627806, "grad_norm": 0.671806095428065, "learning_rate": 7.21330089213301e-07, "loss": 0.6033, "step": 29804 }, { "epoch": 0.870193570990628, "grad_norm": 0.7566145646422183, "learning_rate": 7.211678832116789e-07, "loss": 0.719, "step": 29805 }, { "epoch": 0.8702227672184754, "grad_norm": 0.7303966468491022, "learning_rate": 7.210056772100568e-07, "loss": 0.6452, "step": 29806 }, { "epoch": 0.8702519634463227, "grad_norm": 0.7381058321984747, "learning_rate": 7.208434712084348e-07, "loss": 0.7005, "step": 29807 }, { "epoch": 0.8702811596741701, "grad_norm": 0.6981705485660599, "learning_rate": 7.206812652068128e-07, "loss": 0.5854, "step": 29808 }, { "epoch": 0.8703103559020174, "grad_norm": 0.7482712610670749, "learning_rate": 7.205190592051907e-07, "loss": 0.6889, "step": 29809 }, { "epoch": 0.8703395521298648, "grad_norm": 0.751806392032164, "learning_rate": 7.203568532035686e-07, "loss": 0.6853, "step": 29810 }, { "epoch": 0.8703687483577122, "grad_norm": 0.7411572416904286, "learning_rate": 7.201946472019465e-07, "loss": 0.6586, "step": 29811 }, { "epoch": 0.8703979445855595, "grad_norm": 0.71762629331324, "learning_rate": 7.200324412003244e-07, "loss": 0.6525, "step": 29812 }, { "epoch": 0.8704271408134069, "grad_norm": 0.7149803192093535, "learning_rate": 7.198702351987025e-07, "loss": 0.6302, "step": 29813 }, { "epoch": 0.8704563370412542, "grad_norm": 0.6598044399734623, "learning_rate": 7.197080291970804e-07, "loss": 0.5427, "step": 29814 }, { "epoch": 0.8704855332691016, "grad_norm": 0.7378119972128574, "learning_rate": 7.195458231954583e-07, "loss": 0.6463, "step": 29815 }, { "epoch": 0.870514729496949, "grad_norm": 0.7631494836221375, "learning_rate": 7.193836171938362e-07, "loss": 0.652, "step": 29816 }, { "epoch": 0.8705439257247963, "grad_norm": 0.7365682195646184, "learning_rate": 7.192214111922141e-07, "loss": 0.5895, "step": 29817 }, { "epoch": 0.8705731219526437, "grad_norm": 0.7036706286064098, "learning_rate": 7.190592051905921e-07, "loss": 0.5817, "step": 29818 }, { "epoch": 0.870602318180491, "grad_norm": 0.7230261990079452, "learning_rate": 7.188969991889701e-07, "loss": 0.6192, "step": 29819 }, { "epoch": 0.8706315144083384, "grad_norm": 0.7146609603168707, "learning_rate": 7.18734793187348e-07, "loss": 0.6297, "step": 29820 }, { "epoch": 0.8706607106361858, "grad_norm": 0.7495947898541934, "learning_rate": 7.185725871857259e-07, "loss": 0.6357, "step": 29821 }, { "epoch": 0.8706899068640331, "grad_norm": 0.7225069924743145, "learning_rate": 7.184103811841038e-07, "loss": 0.6143, "step": 29822 }, { "epoch": 0.8707191030918805, "grad_norm": 0.729149998992132, "learning_rate": 7.182481751824818e-07, "loss": 0.6835, "step": 29823 }, { "epoch": 0.8707482993197279, "grad_norm": 0.6643513327769265, "learning_rate": 7.180859691808597e-07, "loss": 0.552, "step": 29824 }, { "epoch": 0.8707774955475752, "grad_norm": 0.7104105804045233, "learning_rate": 7.179237631792376e-07, "loss": 0.5392, "step": 29825 }, { "epoch": 0.8708066917754226, "grad_norm": 0.7597916116947196, "learning_rate": 7.177615571776156e-07, "loss": 0.5994, "step": 29826 }, { "epoch": 0.8708358880032699, "grad_norm": 0.7297924479774793, "learning_rate": 7.175993511759936e-07, "loss": 0.6286, "step": 29827 }, { "epoch": 0.8708650842311173, "grad_norm": 0.7263772777113905, "learning_rate": 7.174371451743715e-07, "loss": 0.629, "step": 29828 }, { "epoch": 0.8708942804589647, "grad_norm": 0.7518092929799306, "learning_rate": 7.172749391727494e-07, "loss": 0.6382, "step": 29829 }, { "epoch": 0.870923476686812, "grad_norm": 0.7348062470104523, "learning_rate": 7.171127331711273e-07, "loss": 0.6773, "step": 29830 }, { "epoch": 0.8709526729146594, "grad_norm": 0.7311003705468401, "learning_rate": 7.169505271695052e-07, "loss": 0.6488, "step": 29831 }, { "epoch": 0.8709818691425067, "grad_norm": 0.7311519148494213, "learning_rate": 7.167883211678834e-07, "loss": 0.6826, "step": 29832 }, { "epoch": 0.8710110653703541, "grad_norm": 0.727671754755017, "learning_rate": 7.166261151662613e-07, "loss": 0.5925, "step": 29833 }, { "epoch": 0.8710402615982015, "grad_norm": 0.6984968933699925, "learning_rate": 7.164639091646392e-07, "loss": 0.5753, "step": 29834 }, { "epoch": 0.8710694578260488, "grad_norm": 0.7003868699761588, "learning_rate": 7.163017031630171e-07, "loss": 0.619, "step": 29835 }, { "epoch": 0.8710986540538962, "grad_norm": 0.7164144741703873, "learning_rate": 7.16139497161395e-07, "loss": 0.6188, "step": 29836 }, { "epoch": 0.8711278502817436, "grad_norm": 0.644822234094961, "learning_rate": 7.15977291159773e-07, "loss": 0.5281, "step": 29837 }, { "epoch": 0.8711570465095909, "grad_norm": 0.7028837760216182, "learning_rate": 7.15815085158151e-07, "loss": 0.5788, "step": 29838 }, { "epoch": 0.8711862427374383, "grad_norm": 0.7385887355964719, "learning_rate": 7.156528791565289e-07, "loss": 0.6823, "step": 29839 }, { "epoch": 0.8712154389652856, "grad_norm": 0.7206745344056531, "learning_rate": 7.154906731549068e-07, "loss": 0.634, "step": 29840 }, { "epoch": 0.871244635193133, "grad_norm": 0.7489234571203313, "learning_rate": 7.153284671532847e-07, "loss": 0.6192, "step": 29841 }, { "epoch": 0.8712738314209804, "grad_norm": 0.7309549020987081, "learning_rate": 7.151662611516627e-07, "loss": 0.6326, "step": 29842 }, { "epoch": 0.8713030276488277, "grad_norm": 0.7291593358629382, "learning_rate": 7.150040551500406e-07, "loss": 0.6136, "step": 29843 }, { "epoch": 0.8713322238766751, "grad_norm": 0.7302764619106946, "learning_rate": 7.148418491484185e-07, "loss": 0.6122, "step": 29844 }, { "epoch": 0.8713614201045224, "grad_norm": 0.7359968694555709, "learning_rate": 7.146796431467965e-07, "loss": 0.686, "step": 29845 }, { "epoch": 0.8713906163323698, "grad_norm": 0.6747205334615364, "learning_rate": 7.145174371451745e-07, "loss": 0.5497, "step": 29846 }, { "epoch": 0.8714198125602172, "grad_norm": 0.7164387535357529, "learning_rate": 7.143552311435524e-07, "loss": 0.6027, "step": 29847 }, { "epoch": 0.8714490087880645, "grad_norm": 0.7012199629975052, "learning_rate": 7.141930251419303e-07, "loss": 0.5726, "step": 29848 }, { "epoch": 0.8714782050159119, "grad_norm": 0.7645987595505693, "learning_rate": 7.140308191403082e-07, "loss": 0.7157, "step": 29849 }, { "epoch": 0.8715074012437594, "grad_norm": 0.6880017489234721, "learning_rate": 7.138686131386861e-07, "loss": 0.5653, "step": 29850 }, { "epoch": 0.8715365974716067, "grad_norm": 0.7429631298715346, "learning_rate": 7.137064071370642e-07, "loss": 0.7233, "step": 29851 }, { "epoch": 0.8715657936994541, "grad_norm": 0.6808183696655701, "learning_rate": 7.135442011354421e-07, "loss": 0.5712, "step": 29852 }, { "epoch": 0.8715949899273014, "grad_norm": 0.6853728991471276, "learning_rate": 7.1338199513382e-07, "loss": 0.5903, "step": 29853 }, { "epoch": 0.8716241861551488, "grad_norm": 0.6742571809725476, "learning_rate": 7.132197891321979e-07, "loss": 0.5394, "step": 29854 }, { "epoch": 0.8716533823829962, "grad_norm": 0.7234109715101356, "learning_rate": 7.130575831305758e-07, "loss": 0.6709, "step": 29855 }, { "epoch": 0.8716825786108435, "grad_norm": 0.7755472555318832, "learning_rate": 7.128953771289538e-07, "loss": 0.7162, "step": 29856 }, { "epoch": 0.8717117748386909, "grad_norm": 0.711884032013454, "learning_rate": 7.127331711273318e-07, "loss": 0.6108, "step": 29857 }, { "epoch": 0.8717409710665383, "grad_norm": 0.7014378927201652, "learning_rate": 7.125709651257097e-07, "loss": 0.642, "step": 29858 }, { "epoch": 0.8717701672943856, "grad_norm": 0.7201949680142558, "learning_rate": 7.124087591240876e-07, "loss": 0.6109, "step": 29859 }, { "epoch": 0.871799363522233, "grad_norm": 0.7101737011932502, "learning_rate": 7.122465531224656e-07, "loss": 0.6373, "step": 29860 }, { "epoch": 0.8718285597500803, "grad_norm": 0.7807125130352671, "learning_rate": 7.120843471208435e-07, "loss": 0.6212, "step": 29861 }, { "epoch": 0.8718577559779277, "grad_norm": 0.7139223675736445, "learning_rate": 7.119221411192214e-07, "loss": 0.6268, "step": 29862 }, { "epoch": 0.8718869522057751, "grad_norm": 1.058661167800621, "learning_rate": 7.117599351175993e-07, "loss": 0.5443, "step": 29863 }, { "epoch": 0.8719161484336224, "grad_norm": 0.6355552785351533, "learning_rate": 7.115977291159772e-07, "loss": 0.5255, "step": 29864 }, { "epoch": 0.8719453446614698, "grad_norm": 0.7822512535322925, "learning_rate": 7.114355231143554e-07, "loss": 0.6747, "step": 29865 }, { "epoch": 0.8719745408893171, "grad_norm": 0.7187235334938469, "learning_rate": 7.112733171127333e-07, "loss": 0.6385, "step": 29866 }, { "epoch": 0.8720037371171645, "grad_norm": 0.6868418254416143, "learning_rate": 7.111111111111112e-07, "loss": 0.601, "step": 29867 }, { "epoch": 0.8720329333450119, "grad_norm": 0.7173172458402687, "learning_rate": 7.109489051094891e-07, "loss": 0.6084, "step": 29868 }, { "epoch": 0.8720621295728592, "grad_norm": 0.745965842191234, "learning_rate": 7.10786699107867e-07, "loss": 0.6245, "step": 29869 }, { "epoch": 0.8720913258007066, "grad_norm": 0.7780105379116377, "learning_rate": 7.106244931062451e-07, "loss": 0.7207, "step": 29870 }, { "epoch": 0.872120522028554, "grad_norm": 0.7160792364234941, "learning_rate": 7.10462287104623e-07, "loss": 0.6728, "step": 29871 }, { "epoch": 0.8721497182564013, "grad_norm": 0.7271411034421542, "learning_rate": 7.103000811030009e-07, "loss": 0.6727, "step": 29872 }, { "epoch": 0.8721789144842487, "grad_norm": 0.674204497784827, "learning_rate": 7.101378751013788e-07, "loss": 0.5876, "step": 29873 }, { "epoch": 0.872208110712096, "grad_norm": 0.713122143913198, "learning_rate": 7.099756690997567e-07, "loss": 0.5695, "step": 29874 }, { "epoch": 0.8722373069399434, "grad_norm": 0.716509778742848, "learning_rate": 7.098134630981347e-07, "loss": 0.6262, "step": 29875 }, { "epoch": 0.8722665031677908, "grad_norm": 0.6649307210153457, "learning_rate": 7.096512570965127e-07, "loss": 0.52, "step": 29876 }, { "epoch": 0.8722956993956381, "grad_norm": 0.7506307160734655, "learning_rate": 7.094890510948906e-07, "loss": 0.6388, "step": 29877 }, { "epoch": 0.8723248956234855, "grad_norm": 0.7214589659436762, "learning_rate": 7.093268450932685e-07, "loss": 0.5937, "step": 29878 }, { "epoch": 0.8723540918513328, "grad_norm": 0.7431538149681965, "learning_rate": 7.091646390916465e-07, "loss": 0.7072, "step": 29879 }, { "epoch": 0.8723832880791802, "grad_norm": 0.7564870310147014, "learning_rate": 7.090024330900244e-07, "loss": 0.6963, "step": 29880 }, { "epoch": 0.8724124843070276, "grad_norm": 0.7206245173765605, "learning_rate": 7.088402270884023e-07, "loss": 0.6102, "step": 29881 }, { "epoch": 0.8724416805348749, "grad_norm": 0.7681277068714232, "learning_rate": 7.086780210867802e-07, "loss": 0.7596, "step": 29882 }, { "epoch": 0.8724708767627223, "grad_norm": 1.6599966093748453, "learning_rate": 7.085158150851581e-07, "loss": 0.8232, "step": 29883 }, { "epoch": 0.8725000729905696, "grad_norm": 0.7252326078761752, "learning_rate": 7.083536090835362e-07, "loss": 0.6377, "step": 29884 }, { "epoch": 0.872529269218417, "grad_norm": 0.6982696452986165, "learning_rate": 7.081914030819141e-07, "loss": 0.6061, "step": 29885 }, { "epoch": 0.8725584654462644, "grad_norm": 0.702448889756877, "learning_rate": 7.08029197080292e-07, "loss": 0.6504, "step": 29886 }, { "epoch": 0.8725876616741117, "grad_norm": 0.6715737901614314, "learning_rate": 7.078669910786699e-07, "loss": 0.558, "step": 29887 }, { "epoch": 0.8726168579019591, "grad_norm": 0.7297675514388616, "learning_rate": 7.077047850770478e-07, "loss": 0.6227, "step": 29888 }, { "epoch": 0.8726460541298064, "grad_norm": 0.7397932029409334, "learning_rate": 7.075425790754259e-07, "loss": 0.6707, "step": 29889 }, { "epoch": 0.8726752503576538, "grad_norm": 0.7657174636684086, "learning_rate": 7.073803730738038e-07, "loss": 0.6627, "step": 29890 }, { "epoch": 0.8727044465855012, "grad_norm": 0.7670026989565168, "learning_rate": 7.072181670721817e-07, "loss": 0.727, "step": 29891 }, { "epoch": 0.8727336428133485, "grad_norm": 0.7190093680666826, "learning_rate": 7.070559610705596e-07, "loss": 0.6415, "step": 29892 }, { "epoch": 0.8727628390411959, "grad_norm": 0.7513286839555259, "learning_rate": 7.068937550689376e-07, "loss": 0.6462, "step": 29893 }, { "epoch": 0.8727920352690433, "grad_norm": 0.6831127324054254, "learning_rate": 7.067315490673155e-07, "loss": 0.598, "step": 29894 }, { "epoch": 0.8728212314968906, "grad_norm": 0.7803402037798206, "learning_rate": 7.065693430656934e-07, "loss": 0.7326, "step": 29895 }, { "epoch": 0.872850427724738, "grad_norm": 0.7487727077466447, "learning_rate": 7.064071370640715e-07, "loss": 0.638, "step": 29896 }, { "epoch": 0.8728796239525853, "grad_norm": 0.7501712111144679, "learning_rate": 7.062449310624494e-07, "loss": 0.614, "step": 29897 }, { "epoch": 0.8729088201804327, "grad_norm": 0.8158191632974575, "learning_rate": 7.060827250608274e-07, "loss": 0.6608, "step": 29898 }, { "epoch": 0.8729380164082801, "grad_norm": 0.6854439446769917, "learning_rate": 7.059205190592053e-07, "loss": 0.5503, "step": 29899 }, { "epoch": 0.8729672126361274, "grad_norm": 0.6963810425031509, "learning_rate": 7.057583130575832e-07, "loss": 0.5873, "step": 29900 }, { "epoch": 0.8729964088639748, "grad_norm": 0.7278111180178563, "learning_rate": 7.055961070559611e-07, "loss": 0.7064, "step": 29901 }, { "epoch": 0.8730256050918221, "grad_norm": 0.7586425353389947, "learning_rate": 7.05433901054339e-07, "loss": 0.7144, "step": 29902 }, { "epoch": 0.8730548013196695, "grad_norm": 0.7131342493001211, "learning_rate": 7.052716950527171e-07, "loss": 0.6071, "step": 29903 }, { "epoch": 0.8730839975475169, "grad_norm": 0.6659485395979267, "learning_rate": 7.05109489051095e-07, "loss": 0.5561, "step": 29904 }, { "epoch": 0.8731131937753642, "grad_norm": 0.7603224142202974, "learning_rate": 7.049472830494729e-07, "loss": 0.6021, "step": 29905 }, { "epoch": 0.8731423900032116, "grad_norm": 0.7463940515460208, "learning_rate": 7.047850770478508e-07, "loss": 0.7003, "step": 29906 }, { "epoch": 0.873171586231059, "grad_norm": 0.7212797074101447, "learning_rate": 7.046228710462287e-07, "loss": 0.594, "step": 29907 }, { "epoch": 0.8732007824589063, "grad_norm": 0.7686542565574803, "learning_rate": 7.044606650446068e-07, "loss": 0.7521, "step": 29908 }, { "epoch": 0.8732299786867537, "grad_norm": 0.7541030902262912, "learning_rate": 7.042984590429847e-07, "loss": 0.7086, "step": 29909 }, { "epoch": 0.873259174914601, "grad_norm": 0.7337415895319966, "learning_rate": 7.041362530413626e-07, "loss": 0.6609, "step": 29910 }, { "epoch": 0.8732883711424484, "grad_norm": 0.8098930292758202, "learning_rate": 7.039740470397405e-07, "loss": 0.7457, "step": 29911 }, { "epoch": 0.8733175673702958, "grad_norm": 0.7587895220666683, "learning_rate": 7.038118410381185e-07, "loss": 0.7035, "step": 29912 }, { "epoch": 0.8733467635981431, "grad_norm": 0.7298986451372176, "learning_rate": 7.036496350364964e-07, "loss": 0.6399, "step": 29913 }, { "epoch": 0.8733759598259905, "grad_norm": 0.7297167625232451, "learning_rate": 7.034874290348743e-07, "loss": 0.5888, "step": 29914 }, { "epoch": 0.8734051560538378, "grad_norm": 0.7250055815284429, "learning_rate": 7.033252230332523e-07, "loss": 0.6502, "step": 29915 }, { "epoch": 0.8734343522816852, "grad_norm": 0.7003558744295091, "learning_rate": 7.031630170316302e-07, "loss": 0.6016, "step": 29916 }, { "epoch": 0.8734635485095326, "grad_norm": 0.6823798701078381, "learning_rate": 7.030008110300082e-07, "loss": 0.6043, "step": 29917 }, { "epoch": 0.8734927447373799, "grad_norm": 0.6868298424319313, "learning_rate": 7.028386050283861e-07, "loss": 0.559, "step": 29918 }, { "epoch": 0.8735219409652273, "grad_norm": 0.7454405723751479, "learning_rate": 7.02676399026764e-07, "loss": 0.6314, "step": 29919 }, { "epoch": 0.8735511371930746, "grad_norm": 0.7257342519181321, "learning_rate": 7.025141930251419e-07, "loss": 0.6139, "step": 29920 }, { "epoch": 0.873580333420922, "grad_norm": 0.7034618501372705, "learning_rate": 7.023519870235198e-07, "loss": 0.601, "step": 29921 }, { "epoch": 0.8736095296487694, "grad_norm": 0.7263440777186012, "learning_rate": 7.021897810218979e-07, "loss": 0.6738, "step": 29922 }, { "epoch": 0.8736387258766167, "grad_norm": 0.7168117596783191, "learning_rate": 7.020275750202758e-07, "loss": 0.6337, "step": 29923 }, { "epoch": 0.8736679221044641, "grad_norm": 0.7076864148150336, "learning_rate": 7.018653690186537e-07, "loss": 0.5595, "step": 29924 }, { "epoch": 0.8736971183323115, "grad_norm": 0.7051730374408138, "learning_rate": 7.017031630170316e-07, "loss": 0.608, "step": 29925 }, { "epoch": 0.8737263145601588, "grad_norm": 0.6773800783579388, "learning_rate": 7.015409570154096e-07, "loss": 0.5838, "step": 29926 }, { "epoch": 0.8737555107880062, "grad_norm": 0.7571188859221071, "learning_rate": 7.013787510137877e-07, "loss": 0.5302, "step": 29927 }, { "epoch": 0.8737847070158535, "grad_norm": 0.747926333884189, "learning_rate": 7.012165450121656e-07, "loss": 0.5917, "step": 29928 }, { "epoch": 0.8738139032437009, "grad_norm": 0.6562049605126095, "learning_rate": 7.010543390105435e-07, "loss": 0.5315, "step": 29929 }, { "epoch": 0.8738430994715483, "grad_norm": 0.6877396290452007, "learning_rate": 7.008921330089214e-07, "loss": 0.5711, "step": 29930 }, { "epoch": 0.8738722956993956, "grad_norm": 0.710231857527551, "learning_rate": 7.007299270072994e-07, "loss": 0.6273, "step": 29931 }, { "epoch": 0.873901491927243, "grad_norm": 0.7365171884549152, "learning_rate": 7.005677210056773e-07, "loss": 0.6178, "step": 29932 }, { "epoch": 0.8739306881550903, "grad_norm": 0.6805760970454946, "learning_rate": 7.004055150040552e-07, "loss": 0.583, "step": 29933 }, { "epoch": 0.8739598843829377, "grad_norm": 0.7565168049952603, "learning_rate": 7.002433090024332e-07, "loss": 0.6867, "step": 29934 }, { "epoch": 0.8739890806107851, "grad_norm": 0.7071318437330237, "learning_rate": 7.000811030008111e-07, "loss": 0.6087, "step": 29935 }, { "epoch": 0.8740182768386324, "grad_norm": 0.6826873675737165, "learning_rate": 6.999188969991891e-07, "loss": 0.6244, "step": 29936 }, { "epoch": 0.8740474730664798, "grad_norm": 0.7336170362624687, "learning_rate": 6.99756690997567e-07, "loss": 0.6174, "step": 29937 }, { "epoch": 0.8740766692943271, "grad_norm": 0.7273723544628566, "learning_rate": 6.995944849959449e-07, "loss": 0.6425, "step": 29938 }, { "epoch": 0.8741058655221745, "grad_norm": 0.7585127893255952, "learning_rate": 6.994322789943228e-07, "loss": 0.7225, "step": 29939 }, { "epoch": 0.8741350617500219, "grad_norm": 0.7454967360962641, "learning_rate": 6.992700729927007e-07, "loss": 0.6626, "step": 29940 }, { "epoch": 0.8741642579778692, "grad_norm": 0.6450826593194632, "learning_rate": 6.991078669910788e-07, "loss": 0.5371, "step": 29941 }, { "epoch": 0.8741934542057166, "grad_norm": 0.646921046859384, "learning_rate": 6.989456609894567e-07, "loss": 0.531, "step": 29942 }, { "epoch": 0.874222650433564, "grad_norm": 0.6989450276520257, "learning_rate": 6.987834549878346e-07, "loss": 0.559, "step": 29943 }, { "epoch": 0.8742518466614113, "grad_norm": 0.6597863597178811, "learning_rate": 6.986212489862125e-07, "loss": 0.5402, "step": 29944 }, { "epoch": 0.8742810428892587, "grad_norm": 0.7314204603523332, "learning_rate": 6.984590429845905e-07, "loss": 0.6452, "step": 29945 }, { "epoch": 0.874310239117106, "grad_norm": 0.6704052648099776, "learning_rate": 6.982968369829685e-07, "loss": 0.6211, "step": 29946 }, { "epoch": 0.8743394353449534, "grad_norm": 0.6575131938938893, "learning_rate": 6.981346309813464e-07, "loss": 0.5407, "step": 29947 }, { "epoch": 0.8743686315728008, "grad_norm": 0.7133237370396839, "learning_rate": 6.979724249797243e-07, "loss": 0.6042, "step": 29948 }, { "epoch": 0.8743978278006481, "grad_norm": 0.6997715832842385, "learning_rate": 6.978102189781022e-07, "loss": 0.6186, "step": 29949 }, { "epoch": 0.8744270240284955, "grad_norm": 0.6876555885058914, "learning_rate": 6.976480129764802e-07, "loss": 0.5919, "step": 29950 }, { "epoch": 0.8744562202563428, "grad_norm": 0.7289821750690724, "learning_rate": 6.974858069748581e-07, "loss": 0.6254, "step": 29951 }, { "epoch": 0.8744854164841902, "grad_norm": 0.7187738234491579, "learning_rate": 6.97323600973236e-07, "loss": 0.6352, "step": 29952 }, { "epoch": 0.8745146127120376, "grad_norm": 0.7138732706904635, "learning_rate": 6.971613949716139e-07, "loss": 0.6225, "step": 29953 }, { "epoch": 0.8745438089398849, "grad_norm": 0.6973232297591075, "learning_rate": 6.969991889699919e-07, "loss": 0.613, "step": 29954 }, { "epoch": 0.8745730051677323, "grad_norm": 0.7617901174095929, "learning_rate": 6.968369829683699e-07, "loss": 0.6702, "step": 29955 }, { "epoch": 0.8746022013955796, "grad_norm": 0.749686186354156, "learning_rate": 6.966747769667478e-07, "loss": 0.6582, "step": 29956 }, { "epoch": 0.874631397623427, "grad_norm": 0.7441108924810249, "learning_rate": 6.965125709651257e-07, "loss": 0.6124, "step": 29957 }, { "epoch": 0.8746605938512744, "grad_norm": 0.7838025749532913, "learning_rate": 6.963503649635036e-07, "loss": 0.6381, "step": 29958 }, { "epoch": 0.8746897900791217, "grad_norm": 0.6867910543009299, "learning_rate": 6.961881589618815e-07, "loss": 0.5835, "step": 29959 }, { "epoch": 0.8747189863069691, "grad_norm": 0.7462502578578425, "learning_rate": 6.960259529602597e-07, "loss": 0.6547, "step": 29960 }, { "epoch": 0.8747481825348165, "grad_norm": 0.5862538979131132, "learning_rate": 6.958637469586376e-07, "loss": 0.4061, "step": 29961 }, { "epoch": 0.8747773787626638, "grad_norm": 0.7726901594940399, "learning_rate": 6.957015409570155e-07, "loss": 0.6214, "step": 29962 }, { "epoch": 0.8748065749905112, "grad_norm": 0.6998304639459969, "learning_rate": 6.955393349553934e-07, "loss": 0.6183, "step": 29963 }, { "epoch": 0.8748357712183585, "grad_norm": 0.7081545913725883, "learning_rate": 6.953771289537714e-07, "loss": 0.6315, "step": 29964 }, { "epoch": 0.8748649674462059, "grad_norm": 0.7526488919505907, "learning_rate": 6.952149229521494e-07, "loss": 0.6839, "step": 29965 }, { "epoch": 0.8748941636740533, "grad_norm": 0.7290083634404385, "learning_rate": 6.950527169505273e-07, "loss": 0.6418, "step": 29966 }, { "epoch": 0.8749233599019006, "grad_norm": 0.6664423229456868, "learning_rate": 6.948905109489052e-07, "loss": 0.5779, "step": 29967 }, { "epoch": 0.874952556129748, "grad_norm": 0.7547468905422009, "learning_rate": 6.947283049472831e-07, "loss": 0.6324, "step": 29968 }, { "epoch": 0.8749817523575953, "grad_norm": 0.7506713976487658, "learning_rate": 6.945660989456611e-07, "loss": 0.6472, "step": 29969 }, { "epoch": 0.8750109485854428, "grad_norm": 0.7483046875060508, "learning_rate": 6.94403892944039e-07, "loss": 0.6301, "step": 29970 }, { "epoch": 0.8750401448132902, "grad_norm": 0.7262019613434461, "learning_rate": 6.942416869424169e-07, "loss": 0.6489, "step": 29971 }, { "epoch": 0.8750693410411375, "grad_norm": 0.6963090786108999, "learning_rate": 6.940794809407948e-07, "loss": 0.6044, "step": 29972 }, { "epoch": 0.8750985372689849, "grad_norm": 0.7193415713840173, "learning_rate": 6.939172749391728e-07, "loss": 0.6488, "step": 29973 }, { "epoch": 0.8751277334968323, "grad_norm": 0.790860243164706, "learning_rate": 6.937550689375508e-07, "loss": 0.6957, "step": 29974 }, { "epoch": 0.8751569297246796, "grad_norm": 0.7413269257883085, "learning_rate": 6.935928629359287e-07, "loss": 0.6672, "step": 29975 }, { "epoch": 0.875186125952527, "grad_norm": 0.767666437063505, "learning_rate": 6.934306569343066e-07, "loss": 0.7085, "step": 29976 }, { "epoch": 0.8752153221803743, "grad_norm": 0.7510271521389982, "learning_rate": 6.932684509326845e-07, "loss": 0.6995, "step": 29977 }, { "epoch": 0.8752445184082217, "grad_norm": 0.7650046112271066, "learning_rate": 6.931062449310626e-07, "loss": 0.564, "step": 29978 }, { "epoch": 0.8752737146360691, "grad_norm": 0.6840770754048885, "learning_rate": 6.929440389294405e-07, "loss": 0.5761, "step": 29979 }, { "epoch": 0.8753029108639164, "grad_norm": 0.675828481109065, "learning_rate": 6.927818329278184e-07, "loss": 0.5542, "step": 29980 }, { "epoch": 0.8753321070917638, "grad_norm": 0.6779519592904871, "learning_rate": 6.926196269261963e-07, "loss": 0.5948, "step": 29981 }, { "epoch": 0.8753613033196112, "grad_norm": 0.7570569438030383, "learning_rate": 6.924574209245742e-07, "loss": 0.6513, "step": 29982 }, { "epoch": 0.8753904995474585, "grad_norm": 0.6901464196873134, "learning_rate": 6.922952149229522e-07, "loss": 0.6766, "step": 29983 }, { "epoch": 0.8754196957753059, "grad_norm": 0.728350380916006, "learning_rate": 6.921330089213301e-07, "loss": 0.6292, "step": 29984 }, { "epoch": 0.8754488920031532, "grad_norm": 0.76334378560935, "learning_rate": 6.919708029197081e-07, "loss": 0.712, "step": 29985 }, { "epoch": 0.8754780882310006, "grad_norm": 0.7736869648241693, "learning_rate": 6.91808596918086e-07, "loss": 0.6274, "step": 29986 }, { "epoch": 0.875507284458848, "grad_norm": 0.6819401282679444, "learning_rate": 6.916463909164639e-07, "loss": 0.5253, "step": 29987 }, { "epoch": 0.8755364806866953, "grad_norm": 0.7134236337384576, "learning_rate": 6.914841849148419e-07, "loss": 0.7008, "step": 29988 }, { "epoch": 0.8755656769145427, "grad_norm": 0.7009679285684896, "learning_rate": 6.913219789132198e-07, "loss": 0.5564, "step": 29989 }, { "epoch": 0.87559487314239, "grad_norm": 0.692500452245501, "learning_rate": 6.911597729115977e-07, "loss": 0.6358, "step": 29990 }, { "epoch": 0.8756240693702374, "grad_norm": 0.7056645193998393, "learning_rate": 6.909975669099756e-07, "loss": 0.6132, "step": 29991 }, { "epoch": 0.8756532655980848, "grad_norm": 0.7265879088986575, "learning_rate": 6.908353609083536e-07, "loss": 0.6445, "step": 29992 }, { "epoch": 0.8756824618259321, "grad_norm": 0.7200801992404507, "learning_rate": 6.906731549067317e-07, "loss": 0.6351, "step": 29993 }, { "epoch": 0.8757116580537795, "grad_norm": 0.6963154130817887, "learning_rate": 6.905109489051096e-07, "loss": 0.5834, "step": 29994 }, { "epoch": 0.8757408542816268, "grad_norm": 0.7562889398511169, "learning_rate": 6.903487429034875e-07, "loss": 0.6451, "step": 29995 }, { "epoch": 0.8757700505094742, "grad_norm": 0.6582569013695456, "learning_rate": 6.901865369018654e-07, "loss": 0.5395, "step": 29996 }, { "epoch": 0.8757992467373216, "grad_norm": 0.6949966163933871, "learning_rate": 6.900243309002435e-07, "loss": 0.5927, "step": 29997 }, { "epoch": 0.8758284429651689, "grad_norm": 0.7168932387595371, "learning_rate": 6.898621248986214e-07, "loss": 0.6501, "step": 29998 }, { "epoch": 0.8758576391930163, "grad_norm": 0.7153484884027398, "learning_rate": 6.896999188969993e-07, "loss": 0.6384, "step": 29999 }, { "epoch": 0.8758868354208637, "grad_norm": 1.613617789667783, "learning_rate": 6.895377128953772e-07, "loss": 0.6391, "step": 30000 }, { "epoch": 0.875916031648711, "grad_norm": 0.6892610146798474, "learning_rate": 6.893755068937551e-07, "loss": 0.5976, "step": 30001 }, { "epoch": 0.8759452278765584, "grad_norm": 0.7175606869493261, "learning_rate": 6.892133008921331e-07, "loss": 0.6506, "step": 30002 }, { "epoch": 0.8759744241044057, "grad_norm": 0.7332212107951243, "learning_rate": 6.89051094890511e-07, "loss": 0.6375, "step": 30003 }, { "epoch": 0.8760036203322531, "grad_norm": 0.7330458755383643, "learning_rate": 6.88888888888889e-07, "loss": 0.6232, "step": 30004 }, { "epoch": 0.8760328165601005, "grad_norm": 0.7416758965176676, "learning_rate": 6.887266828872669e-07, "loss": 0.6902, "step": 30005 }, { "epoch": 0.8760620127879478, "grad_norm": 0.7021928804823332, "learning_rate": 6.885644768856448e-07, "loss": 0.6355, "step": 30006 }, { "epoch": 0.8760912090157952, "grad_norm": 0.7334798416436742, "learning_rate": 6.884022708840228e-07, "loss": 0.645, "step": 30007 }, { "epoch": 0.8761204052436425, "grad_norm": 0.7740038152771707, "learning_rate": 6.882400648824007e-07, "loss": 0.7385, "step": 30008 }, { "epoch": 0.8761496014714899, "grad_norm": 0.7824129375451762, "learning_rate": 6.880778588807786e-07, "loss": 0.707, "step": 30009 }, { "epoch": 0.8761787976993373, "grad_norm": 0.7130398693789792, "learning_rate": 6.879156528791565e-07, "loss": 0.5968, "step": 30010 }, { "epoch": 0.8762079939271846, "grad_norm": 0.7805963687974734, "learning_rate": 6.877534468775346e-07, "loss": 0.722, "step": 30011 }, { "epoch": 0.876237190155032, "grad_norm": 0.7456629271687893, "learning_rate": 6.875912408759125e-07, "loss": 0.7051, "step": 30012 }, { "epoch": 0.8762663863828793, "grad_norm": 0.7180080595589304, "learning_rate": 6.874290348742904e-07, "loss": 0.6575, "step": 30013 }, { "epoch": 0.8762955826107267, "grad_norm": 0.7544114881825065, "learning_rate": 6.872668288726683e-07, "loss": 0.6518, "step": 30014 }, { "epoch": 0.8763247788385741, "grad_norm": 0.6367573798291465, "learning_rate": 6.871046228710462e-07, "loss": 0.5368, "step": 30015 }, { "epoch": 0.8763539750664214, "grad_norm": 0.7137711802187067, "learning_rate": 6.869424168694243e-07, "loss": 0.664, "step": 30016 }, { "epoch": 0.8763831712942688, "grad_norm": 0.745494193029511, "learning_rate": 6.867802108678022e-07, "loss": 0.6115, "step": 30017 }, { "epoch": 0.8764123675221162, "grad_norm": 0.7783728327552253, "learning_rate": 6.866180048661801e-07, "loss": 0.6909, "step": 30018 }, { "epoch": 0.8764415637499635, "grad_norm": 0.6732088482214753, "learning_rate": 6.86455798864558e-07, "loss": 0.526, "step": 30019 }, { "epoch": 0.8764707599778109, "grad_norm": 0.7864413858518118, "learning_rate": 6.862935928629359e-07, "loss": 0.7539, "step": 30020 }, { "epoch": 0.8764999562056582, "grad_norm": 0.7308298327621755, "learning_rate": 6.861313868613139e-07, "loss": 0.648, "step": 30021 }, { "epoch": 0.8765291524335056, "grad_norm": 0.7346405432618678, "learning_rate": 6.859691808596918e-07, "loss": 0.6748, "step": 30022 }, { "epoch": 0.876558348661353, "grad_norm": 0.7050927332611647, "learning_rate": 6.858069748580698e-07, "loss": 0.6027, "step": 30023 }, { "epoch": 0.8765875448892003, "grad_norm": 0.6614164723189275, "learning_rate": 6.856447688564477e-07, "loss": 0.5581, "step": 30024 }, { "epoch": 0.8766167411170477, "grad_norm": 0.7227994393303312, "learning_rate": 6.854825628548256e-07, "loss": 0.6231, "step": 30025 }, { "epoch": 0.876645937344895, "grad_norm": 0.7159335670561304, "learning_rate": 6.853203568532037e-07, "loss": 0.602, "step": 30026 }, { "epoch": 0.8766751335727424, "grad_norm": 0.6921199303430206, "learning_rate": 6.851581508515816e-07, "loss": 0.5742, "step": 30027 }, { "epoch": 0.8767043298005898, "grad_norm": 0.702423693059361, "learning_rate": 6.849959448499595e-07, "loss": 0.6213, "step": 30028 }, { "epoch": 0.8767335260284371, "grad_norm": 0.7545202297977737, "learning_rate": 6.848337388483374e-07, "loss": 0.6392, "step": 30029 }, { "epoch": 0.8767627222562845, "grad_norm": 0.7033950220388914, "learning_rate": 6.846715328467155e-07, "loss": 0.6377, "step": 30030 }, { "epoch": 0.8767919184841318, "grad_norm": 0.741284151761943, "learning_rate": 6.845093268450934e-07, "loss": 0.7232, "step": 30031 }, { "epoch": 0.8768211147119792, "grad_norm": 0.7779198658086636, "learning_rate": 6.843471208434713e-07, "loss": 0.7504, "step": 30032 }, { "epoch": 0.8768503109398266, "grad_norm": 0.7606008099274952, "learning_rate": 6.841849148418492e-07, "loss": 0.6649, "step": 30033 }, { "epoch": 0.8768795071676739, "grad_norm": 0.7189852704158282, "learning_rate": 6.840227088402271e-07, "loss": 0.6057, "step": 30034 }, { "epoch": 0.8769087033955213, "grad_norm": 0.6610115937498963, "learning_rate": 6.838605028386052e-07, "loss": 0.5531, "step": 30035 }, { "epoch": 0.8769378996233687, "grad_norm": 0.852022693765213, "learning_rate": 6.836982968369831e-07, "loss": 0.6136, "step": 30036 }, { "epoch": 0.876967095851216, "grad_norm": 0.7099629350086322, "learning_rate": 6.83536090835361e-07, "loss": 0.6545, "step": 30037 }, { "epoch": 0.8769962920790634, "grad_norm": 0.7008267430893247, "learning_rate": 6.833738848337389e-07, "loss": 0.6064, "step": 30038 }, { "epoch": 0.8770254883069107, "grad_norm": 0.6986427343696061, "learning_rate": 6.832116788321168e-07, "loss": 0.622, "step": 30039 }, { "epoch": 0.8770546845347581, "grad_norm": 0.6590653543530759, "learning_rate": 6.830494728304948e-07, "loss": 0.5139, "step": 30040 }, { "epoch": 0.8770838807626055, "grad_norm": 0.7415987738212119, "learning_rate": 6.828872668288727e-07, "loss": 0.6476, "step": 30041 }, { "epoch": 0.8771130769904528, "grad_norm": 0.6811105727393619, "learning_rate": 6.827250608272507e-07, "loss": 0.5927, "step": 30042 }, { "epoch": 0.8771422732183002, "grad_norm": 0.7454024217539578, "learning_rate": 6.825628548256286e-07, "loss": 0.6548, "step": 30043 }, { "epoch": 0.8771714694461475, "grad_norm": 0.6837729937618755, "learning_rate": 6.824006488240066e-07, "loss": 0.569, "step": 30044 }, { "epoch": 0.8772006656739949, "grad_norm": 0.7513060002987509, "learning_rate": 6.822384428223845e-07, "loss": 0.6789, "step": 30045 }, { "epoch": 0.8772298619018423, "grad_norm": 0.6778813258558956, "learning_rate": 6.820762368207624e-07, "loss": 0.58, "step": 30046 }, { "epoch": 0.8772590581296896, "grad_norm": 0.7218931248611289, "learning_rate": 6.819140308191403e-07, "loss": 0.5952, "step": 30047 }, { "epoch": 0.877288254357537, "grad_norm": 0.6939226198600407, "learning_rate": 6.817518248175182e-07, "loss": 0.6183, "step": 30048 }, { "epoch": 0.8773174505853844, "grad_norm": 0.727208894215859, "learning_rate": 6.815896188158963e-07, "loss": 0.7076, "step": 30049 }, { "epoch": 0.8773466468132317, "grad_norm": 0.7354781983367491, "learning_rate": 6.814274128142742e-07, "loss": 0.6396, "step": 30050 }, { "epoch": 0.8773758430410791, "grad_norm": 0.7069094454646786, "learning_rate": 6.812652068126521e-07, "loss": 0.5791, "step": 30051 }, { "epoch": 0.8774050392689264, "grad_norm": 0.724899761360871, "learning_rate": 6.8110300081103e-07, "loss": 0.6215, "step": 30052 }, { "epoch": 0.8774342354967738, "grad_norm": 0.7523925463022135, "learning_rate": 6.809407948094079e-07, "loss": 0.7306, "step": 30053 }, { "epoch": 0.8774634317246212, "grad_norm": 0.7193434675902334, "learning_rate": 6.80778588807786e-07, "loss": 0.6318, "step": 30054 }, { "epoch": 0.8774926279524685, "grad_norm": 0.6548087667711898, "learning_rate": 6.806163828061639e-07, "loss": 0.5617, "step": 30055 }, { "epoch": 0.8775218241803159, "grad_norm": 0.7196301932895324, "learning_rate": 6.804541768045418e-07, "loss": 0.6239, "step": 30056 }, { "epoch": 0.8775510204081632, "grad_norm": 0.7252830265386797, "learning_rate": 6.802919708029197e-07, "loss": 0.5933, "step": 30057 }, { "epoch": 0.8775802166360106, "grad_norm": 0.6971497523004007, "learning_rate": 6.801297648012976e-07, "loss": 0.6172, "step": 30058 }, { "epoch": 0.877609412863858, "grad_norm": 0.7907785614794971, "learning_rate": 6.799675587996757e-07, "loss": 0.5885, "step": 30059 }, { "epoch": 0.8776386090917053, "grad_norm": 0.7132642148734335, "learning_rate": 6.798053527980536e-07, "loss": 0.5829, "step": 30060 }, { "epoch": 0.8776678053195527, "grad_norm": 0.7700968732579325, "learning_rate": 6.796431467964315e-07, "loss": 0.7124, "step": 30061 }, { "epoch": 0.8776970015474, "grad_norm": 0.6940803576952874, "learning_rate": 6.794809407948095e-07, "loss": 0.6117, "step": 30062 }, { "epoch": 0.8777261977752474, "grad_norm": 0.7173771189038814, "learning_rate": 6.793187347931875e-07, "loss": 0.6215, "step": 30063 }, { "epoch": 0.8777553940030948, "grad_norm": 0.749909281887964, "learning_rate": 6.791565287915654e-07, "loss": 0.6439, "step": 30064 }, { "epoch": 0.8777845902309421, "grad_norm": 0.6887948246256617, "learning_rate": 6.789943227899433e-07, "loss": 0.6015, "step": 30065 }, { "epoch": 0.8778137864587895, "grad_norm": 0.7153727293272325, "learning_rate": 6.788321167883212e-07, "loss": 0.5816, "step": 30066 }, { "epoch": 0.8778429826866369, "grad_norm": 0.7617717166325269, "learning_rate": 6.786699107866991e-07, "loss": 0.7432, "step": 30067 }, { "epoch": 0.8778721789144842, "grad_norm": 0.730716538659017, "learning_rate": 6.785077047850772e-07, "loss": 0.6599, "step": 30068 }, { "epoch": 0.8779013751423316, "grad_norm": 0.6517280909822399, "learning_rate": 6.783454987834551e-07, "loss": 0.5601, "step": 30069 }, { "epoch": 0.8779305713701789, "grad_norm": 0.7113470311049588, "learning_rate": 6.78183292781833e-07, "loss": 0.6243, "step": 30070 }, { "epoch": 0.8779597675980263, "grad_norm": 0.7356965222972554, "learning_rate": 6.780210867802109e-07, "loss": 0.6753, "step": 30071 }, { "epoch": 0.8779889638258737, "grad_norm": 0.7996288025820015, "learning_rate": 6.778588807785888e-07, "loss": 0.7587, "step": 30072 }, { "epoch": 0.878018160053721, "grad_norm": 0.6888790785294382, "learning_rate": 6.776966747769669e-07, "loss": 0.5854, "step": 30073 }, { "epoch": 0.8780473562815684, "grad_norm": 0.6962556718028582, "learning_rate": 6.775344687753448e-07, "loss": 0.5752, "step": 30074 }, { "epoch": 0.8780765525094157, "grad_norm": 0.6963341873231008, "learning_rate": 6.773722627737227e-07, "loss": 0.6652, "step": 30075 }, { "epoch": 0.8781057487372631, "grad_norm": 0.7119399402558275, "learning_rate": 6.772100567721006e-07, "loss": 0.6165, "step": 30076 }, { "epoch": 0.8781349449651105, "grad_norm": 0.7371699789103119, "learning_rate": 6.770478507704786e-07, "loss": 0.6673, "step": 30077 }, { "epoch": 0.8781641411929578, "grad_norm": 0.7066524965299646, "learning_rate": 6.768856447688565e-07, "loss": 0.6352, "step": 30078 }, { "epoch": 0.8781933374208052, "grad_norm": 0.7193865354922373, "learning_rate": 6.767234387672344e-07, "loss": 0.6329, "step": 30079 }, { "epoch": 0.8782225336486525, "grad_norm": 0.7219596426340603, "learning_rate": 6.765612327656123e-07, "loss": 0.6174, "step": 30080 }, { "epoch": 0.8782517298764999, "grad_norm": 0.7250049290102643, "learning_rate": 6.763990267639903e-07, "loss": 0.6468, "step": 30081 }, { "epoch": 0.8782809261043473, "grad_norm": 0.6892684045268876, "learning_rate": 6.762368207623683e-07, "loss": 0.6095, "step": 30082 }, { "epoch": 0.8783101223321946, "grad_norm": 0.740394558515296, "learning_rate": 6.760746147607462e-07, "loss": 0.5342, "step": 30083 }, { "epoch": 0.878339318560042, "grad_norm": 0.6385262484387285, "learning_rate": 6.759124087591241e-07, "loss": 0.5487, "step": 30084 }, { "epoch": 0.8783685147878894, "grad_norm": 0.740246955019139, "learning_rate": 6.75750202757502e-07, "loss": 0.6857, "step": 30085 }, { "epoch": 0.8783977110157367, "grad_norm": 0.8451056321395105, "learning_rate": 6.755879967558799e-07, "loss": 0.6565, "step": 30086 }, { "epoch": 0.8784269072435841, "grad_norm": 0.7072858731458975, "learning_rate": 6.75425790754258e-07, "loss": 0.6016, "step": 30087 }, { "epoch": 0.8784561034714314, "grad_norm": 0.7373408488645012, "learning_rate": 6.752635847526359e-07, "loss": 0.6515, "step": 30088 }, { "epoch": 0.8784852996992788, "grad_norm": 0.7222670166406873, "learning_rate": 6.751013787510138e-07, "loss": 0.5982, "step": 30089 }, { "epoch": 0.8785144959271262, "grad_norm": 0.7029212845762467, "learning_rate": 6.749391727493917e-07, "loss": 0.5666, "step": 30090 }, { "epoch": 0.8785436921549736, "grad_norm": 0.7454613026518472, "learning_rate": 6.747769667477696e-07, "loss": 0.6699, "step": 30091 }, { "epoch": 0.878572888382821, "grad_norm": 0.6698793447987984, "learning_rate": 6.746147607461477e-07, "loss": 0.5768, "step": 30092 }, { "epoch": 0.8786020846106684, "grad_norm": 0.7028387912674429, "learning_rate": 6.744525547445257e-07, "loss": 0.5827, "step": 30093 }, { "epoch": 0.8786312808385157, "grad_norm": 0.7141632390969349, "learning_rate": 6.742903487429036e-07, "loss": 0.6174, "step": 30094 }, { "epoch": 0.8786604770663631, "grad_norm": 0.7475528843051139, "learning_rate": 6.741281427412815e-07, "loss": 0.664, "step": 30095 }, { "epoch": 0.8786896732942104, "grad_norm": 0.7821570660985855, "learning_rate": 6.739659367396595e-07, "loss": 0.7083, "step": 30096 }, { "epoch": 0.8787188695220578, "grad_norm": 0.703234572612578, "learning_rate": 6.738037307380374e-07, "loss": 0.6327, "step": 30097 }, { "epoch": 0.8787480657499052, "grad_norm": 0.699287222273888, "learning_rate": 6.736415247364153e-07, "loss": 0.6322, "step": 30098 }, { "epoch": 0.8787772619777525, "grad_norm": 0.6985454373047375, "learning_rate": 6.734793187347932e-07, "loss": 0.587, "step": 30099 }, { "epoch": 0.8788064582055999, "grad_norm": 0.6801175960270357, "learning_rate": 6.733171127331712e-07, "loss": 0.583, "step": 30100 }, { "epoch": 0.8788356544334472, "grad_norm": 0.7316329090454029, "learning_rate": 6.731549067315492e-07, "loss": 0.6005, "step": 30101 }, { "epoch": 0.8788648506612946, "grad_norm": 0.7436400739754254, "learning_rate": 6.729927007299271e-07, "loss": 0.6027, "step": 30102 }, { "epoch": 0.878894046889142, "grad_norm": 0.7398488330201817, "learning_rate": 6.72830494728305e-07, "loss": 0.6693, "step": 30103 }, { "epoch": 0.8789232431169893, "grad_norm": 0.7422860143650265, "learning_rate": 6.726682887266829e-07, "loss": 0.6427, "step": 30104 }, { "epoch": 0.8789524393448367, "grad_norm": 0.7368048661908274, "learning_rate": 6.725060827250608e-07, "loss": 0.6341, "step": 30105 }, { "epoch": 0.878981635572684, "grad_norm": 0.7433380631393738, "learning_rate": 6.723438767234389e-07, "loss": 0.6668, "step": 30106 }, { "epoch": 0.8790108318005314, "grad_norm": 0.7168691551514393, "learning_rate": 6.721816707218168e-07, "loss": 0.6391, "step": 30107 }, { "epoch": 0.8790400280283788, "grad_norm": 0.7413212821065088, "learning_rate": 6.720194647201947e-07, "loss": 0.5938, "step": 30108 }, { "epoch": 0.8790692242562261, "grad_norm": 0.7123753775024174, "learning_rate": 6.718572587185726e-07, "loss": 0.6032, "step": 30109 }, { "epoch": 0.8790984204840735, "grad_norm": 0.7300793325066022, "learning_rate": 6.716950527169505e-07, "loss": 0.6448, "step": 30110 }, { "epoch": 0.8791276167119209, "grad_norm": 0.6455765770431948, "learning_rate": 6.715328467153285e-07, "loss": 0.5144, "step": 30111 }, { "epoch": 0.8791568129397682, "grad_norm": 0.7355467010886767, "learning_rate": 6.713706407137065e-07, "loss": 0.6463, "step": 30112 }, { "epoch": 0.8791860091676156, "grad_norm": 0.7340812370793817, "learning_rate": 6.712084347120844e-07, "loss": 0.6403, "step": 30113 }, { "epoch": 0.8792152053954629, "grad_norm": 0.6753694060047746, "learning_rate": 6.710462287104623e-07, "loss": 0.5724, "step": 30114 }, { "epoch": 0.8792444016233103, "grad_norm": 0.7223588038110124, "learning_rate": 6.708840227088403e-07, "loss": 0.6244, "step": 30115 }, { "epoch": 0.8792735978511577, "grad_norm": 0.7893189252469318, "learning_rate": 6.707218167072182e-07, "loss": 0.8004, "step": 30116 }, { "epoch": 0.879302794079005, "grad_norm": 0.7403557761012417, "learning_rate": 6.705596107055961e-07, "loss": 0.7079, "step": 30117 }, { "epoch": 0.8793319903068524, "grad_norm": 0.718030244065766, "learning_rate": 6.70397404703974e-07, "loss": 0.6072, "step": 30118 }, { "epoch": 0.8793611865346997, "grad_norm": 0.8104354438718351, "learning_rate": 6.702351987023519e-07, "loss": 0.6455, "step": 30119 }, { "epoch": 0.8793903827625471, "grad_norm": 0.6720835864039112, "learning_rate": 6.7007299270073e-07, "loss": 0.5766, "step": 30120 }, { "epoch": 0.8794195789903945, "grad_norm": 0.6667469189729508, "learning_rate": 6.69910786699108e-07, "loss": 0.566, "step": 30121 }, { "epoch": 0.8794487752182418, "grad_norm": 0.6837956163885516, "learning_rate": 6.697485806974858e-07, "loss": 0.5742, "step": 30122 }, { "epoch": 0.8794779714460892, "grad_norm": 0.7257034457668943, "learning_rate": 6.695863746958637e-07, "loss": 0.6558, "step": 30123 }, { "epoch": 0.8795071676739366, "grad_norm": 0.8036259731102078, "learning_rate": 6.694241686942417e-07, "loss": 0.6979, "step": 30124 }, { "epoch": 0.8795363639017839, "grad_norm": 0.714699523037366, "learning_rate": 6.692619626926198e-07, "loss": 0.605, "step": 30125 }, { "epoch": 0.8795655601296313, "grad_norm": 0.7075442347774186, "learning_rate": 6.690997566909977e-07, "loss": 0.641, "step": 30126 }, { "epoch": 0.8795947563574786, "grad_norm": 0.6983880449908628, "learning_rate": 6.689375506893756e-07, "loss": 0.5855, "step": 30127 }, { "epoch": 0.879623952585326, "grad_norm": 0.7276401207003991, "learning_rate": 6.687753446877535e-07, "loss": 0.6883, "step": 30128 }, { "epoch": 0.8796531488131734, "grad_norm": 0.7350204369025349, "learning_rate": 6.686131386861315e-07, "loss": 0.7398, "step": 30129 }, { "epoch": 0.8796823450410207, "grad_norm": 0.7042283430000412, "learning_rate": 6.684509326845094e-07, "loss": 0.5889, "step": 30130 }, { "epoch": 0.8797115412688681, "grad_norm": 0.6801486602757582, "learning_rate": 6.682887266828874e-07, "loss": 0.5688, "step": 30131 }, { "epoch": 0.8797407374967154, "grad_norm": 0.6673416615019073, "learning_rate": 6.681265206812653e-07, "loss": 0.524, "step": 30132 }, { "epoch": 0.8797699337245628, "grad_norm": 0.7639893217761494, "learning_rate": 6.679643146796432e-07, "loss": 0.7144, "step": 30133 }, { "epoch": 0.8797991299524102, "grad_norm": 0.731185810748653, "learning_rate": 6.678021086780212e-07, "loss": 0.6891, "step": 30134 }, { "epoch": 0.8798283261802575, "grad_norm": 0.7215329517479641, "learning_rate": 6.676399026763991e-07, "loss": 0.6575, "step": 30135 }, { "epoch": 0.8798575224081049, "grad_norm": 0.764018025616022, "learning_rate": 6.67477696674777e-07, "loss": 0.7447, "step": 30136 }, { "epoch": 0.8798867186359522, "grad_norm": 0.7219954856374806, "learning_rate": 6.673154906731549e-07, "loss": 0.6169, "step": 30137 }, { "epoch": 0.8799159148637996, "grad_norm": 0.6539682285836866, "learning_rate": 6.671532846715328e-07, "loss": 0.511, "step": 30138 }, { "epoch": 0.879945111091647, "grad_norm": 0.7166025365769515, "learning_rate": 6.669910786699109e-07, "loss": 0.6172, "step": 30139 }, { "epoch": 0.8799743073194943, "grad_norm": 0.7300852228251973, "learning_rate": 6.668288726682888e-07, "loss": 0.6483, "step": 30140 }, { "epoch": 0.8800035035473417, "grad_norm": 0.6930934889929314, "learning_rate": 6.666666666666667e-07, "loss": 0.5669, "step": 30141 }, { "epoch": 0.880032699775189, "grad_norm": 0.7474006890816827, "learning_rate": 6.665044606650446e-07, "loss": 0.6893, "step": 30142 }, { "epoch": 0.8800618960030364, "grad_norm": 0.7817277515208609, "learning_rate": 6.663422546634225e-07, "loss": 0.6602, "step": 30143 }, { "epoch": 0.8800910922308838, "grad_norm": 0.7190444866535028, "learning_rate": 6.661800486618006e-07, "loss": 0.6083, "step": 30144 }, { "epoch": 0.8801202884587311, "grad_norm": 0.7106003960511061, "learning_rate": 6.660178426601785e-07, "loss": 0.6527, "step": 30145 }, { "epoch": 0.8801494846865785, "grad_norm": 0.7178960500216782, "learning_rate": 6.658556366585564e-07, "loss": 0.6027, "step": 30146 }, { "epoch": 0.8801786809144259, "grad_norm": 0.7535635178150148, "learning_rate": 6.656934306569343e-07, "loss": 0.6514, "step": 30147 }, { "epoch": 0.8802078771422732, "grad_norm": 0.6913953883763082, "learning_rate": 6.655312246553123e-07, "loss": 0.5921, "step": 30148 }, { "epoch": 0.8802370733701206, "grad_norm": 0.7113375184560237, "learning_rate": 6.653690186536902e-07, "loss": 0.6241, "step": 30149 }, { "epoch": 0.8802662695979679, "grad_norm": 0.7399123557364318, "learning_rate": 6.652068126520681e-07, "loss": 0.6873, "step": 30150 }, { "epoch": 0.8802954658258153, "grad_norm": 0.7446356751543611, "learning_rate": 6.650446066504461e-07, "loss": 0.6679, "step": 30151 }, { "epoch": 0.8803246620536627, "grad_norm": 0.6696562095078606, "learning_rate": 6.64882400648824e-07, "loss": 0.5467, "step": 30152 }, { "epoch": 0.88035385828151, "grad_norm": 0.6926849172101696, "learning_rate": 6.64720194647202e-07, "loss": 0.587, "step": 30153 }, { "epoch": 0.8803830545093574, "grad_norm": 0.7921311518740675, "learning_rate": 6.6455798864558e-07, "loss": 0.629, "step": 30154 }, { "epoch": 0.8804122507372047, "grad_norm": 0.6110871685933725, "learning_rate": 6.643957826439578e-07, "loss": 0.4923, "step": 30155 }, { "epoch": 0.8804414469650521, "grad_norm": 0.7186946662501544, "learning_rate": 6.642335766423358e-07, "loss": 0.6392, "step": 30156 }, { "epoch": 0.8804706431928995, "grad_norm": 0.6944948328452942, "learning_rate": 6.640713706407137e-07, "loss": 0.6323, "step": 30157 }, { "epoch": 0.8804998394207468, "grad_norm": 0.7233506670207184, "learning_rate": 6.639091646390918e-07, "loss": 0.6232, "step": 30158 }, { "epoch": 0.8805290356485942, "grad_norm": 0.7042971648973116, "learning_rate": 6.637469586374697e-07, "loss": 0.5824, "step": 30159 }, { "epoch": 0.8805582318764416, "grad_norm": 0.6730587416246296, "learning_rate": 6.635847526358476e-07, "loss": 0.5328, "step": 30160 }, { "epoch": 0.8805874281042889, "grad_norm": 0.7908721050102695, "learning_rate": 6.634225466342255e-07, "loss": 0.7624, "step": 30161 }, { "epoch": 0.8806166243321363, "grad_norm": 0.7341689520824917, "learning_rate": 6.632603406326036e-07, "loss": 0.6845, "step": 30162 }, { "epoch": 0.8806458205599836, "grad_norm": 0.7263459742574739, "learning_rate": 6.630981346309815e-07, "loss": 0.6919, "step": 30163 }, { "epoch": 0.880675016787831, "grad_norm": 0.7331956975964066, "learning_rate": 6.629359286293594e-07, "loss": 0.6403, "step": 30164 }, { "epoch": 0.8807042130156784, "grad_norm": 0.630407307925468, "learning_rate": 6.627737226277373e-07, "loss": 0.4907, "step": 30165 }, { "epoch": 0.8807334092435257, "grad_norm": 0.6718419826889401, "learning_rate": 6.626115166261152e-07, "loss": 0.5795, "step": 30166 }, { "epoch": 0.8807626054713731, "grad_norm": 0.7082278268478442, "learning_rate": 6.624493106244932e-07, "loss": 0.6248, "step": 30167 }, { "epoch": 0.8807918016992204, "grad_norm": 0.7024626206497606, "learning_rate": 6.622871046228711e-07, "loss": 0.6257, "step": 30168 }, { "epoch": 0.8808209979270678, "grad_norm": 0.7010751047842588, "learning_rate": 6.62124898621249e-07, "loss": 0.5697, "step": 30169 }, { "epoch": 0.8808501941549152, "grad_norm": 0.753094094389714, "learning_rate": 6.61962692619627e-07, "loss": 0.674, "step": 30170 }, { "epoch": 0.8808793903827625, "grad_norm": 0.7072190000464172, "learning_rate": 6.618004866180049e-07, "loss": 0.6956, "step": 30171 }, { "epoch": 0.8809085866106099, "grad_norm": 1.0002817731251936, "learning_rate": 6.616382806163829e-07, "loss": 0.7629, "step": 30172 }, { "epoch": 0.8809377828384573, "grad_norm": 0.711824034356753, "learning_rate": 6.614760746147608e-07, "loss": 0.5681, "step": 30173 }, { "epoch": 0.8809669790663046, "grad_norm": 0.6803530432968578, "learning_rate": 6.613138686131387e-07, "loss": 0.5599, "step": 30174 }, { "epoch": 0.880996175294152, "grad_norm": 0.6975154579290607, "learning_rate": 6.611516626115166e-07, "loss": 0.6065, "step": 30175 }, { "epoch": 0.8810253715219993, "grad_norm": 0.7578574959738168, "learning_rate": 6.609894566098945e-07, "loss": 0.651, "step": 30176 }, { "epoch": 0.8810545677498467, "grad_norm": 0.7054385244822552, "learning_rate": 6.608272506082726e-07, "loss": 0.6195, "step": 30177 }, { "epoch": 0.8810837639776941, "grad_norm": 0.7674049943138479, "learning_rate": 6.606650446066505e-07, "loss": 0.6595, "step": 30178 }, { "epoch": 0.8811129602055414, "grad_norm": 0.7430958026317412, "learning_rate": 6.605028386050284e-07, "loss": 0.6326, "step": 30179 }, { "epoch": 0.8811421564333888, "grad_norm": 0.6834642905363064, "learning_rate": 6.603406326034063e-07, "loss": 0.5909, "step": 30180 }, { "epoch": 0.8811713526612361, "grad_norm": 0.7368923654082701, "learning_rate": 6.601784266017843e-07, "loss": 0.6617, "step": 30181 }, { "epoch": 0.8812005488890835, "grad_norm": 0.7485969131188114, "learning_rate": 6.600162206001623e-07, "loss": 0.6726, "step": 30182 }, { "epoch": 0.8812297451169309, "grad_norm": 0.7251289215621993, "learning_rate": 6.598540145985402e-07, "loss": 0.6341, "step": 30183 }, { "epoch": 0.8812589413447782, "grad_norm": 0.7166621667803772, "learning_rate": 6.596918085969181e-07, "loss": 0.6304, "step": 30184 }, { "epoch": 0.8812881375726256, "grad_norm": 0.7440724654236329, "learning_rate": 6.59529602595296e-07, "loss": 0.6735, "step": 30185 }, { "epoch": 0.881317333800473, "grad_norm": 0.7492780037387587, "learning_rate": 6.59367396593674e-07, "loss": 0.6537, "step": 30186 }, { "epoch": 0.8813465300283203, "grad_norm": 0.7279369657877229, "learning_rate": 6.59205190592052e-07, "loss": 0.6512, "step": 30187 }, { "epoch": 0.8813757262561677, "grad_norm": 0.7270616763800409, "learning_rate": 6.590429845904299e-07, "loss": 0.6313, "step": 30188 }, { "epoch": 0.881404922484015, "grad_norm": 1.0124596803693966, "learning_rate": 6.588807785888079e-07, "loss": 0.7305, "step": 30189 }, { "epoch": 0.8814341187118624, "grad_norm": 0.7186816550612043, "learning_rate": 6.587185725871858e-07, "loss": 0.6416, "step": 30190 }, { "epoch": 0.8814633149397098, "grad_norm": 0.7574873251824612, "learning_rate": 6.585563665855638e-07, "loss": 0.6506, "step": 30191 }, { "epoch": 0.8814925111675571, "grad_norm": 0.7822144474117964, "learning_rate": 6.583941605839417e-07, "loss": 0.7398, "step": 30192 }, { "epoch": 0.8815217073954045, "grad_norm": 0.7507109112249233, "learning_rate": 6.582319545823196e-07, "loss": 0.656, "step": 30193 }, { "epoch": 0.8815509036232518, "grad_norm": 0.7796860995156047, "learning_rate": 6.580697485806975e-07, "loss": 0.646, "step": 30194 }, { "epoch": 0.8815800998510992, "grad_norm": 0.7567506645010803, "learning_rate": 6.579075425790756e-07, "loss": 0.6302, "step": 30195 }, { "epoch": 0.8816092960789466, "grad_norm": 0.730974600538255, "learning_rate": 6.577453365774535e-07, "loss": 0.637, "step": 30196 }, { "epoch": 0.8816384923067939, "grad_norm": 0.708703928465633, "learning_rate": 6.575831305758314e-07, "loss": 0.636, "step": 30197 }, { "epoch": 0.8816676885346413, "grad_norm": 0.783545855496004, "learning_rate": 6.574209245742093e-07, "loss": 0.6034, "step": 30198 }, { "epoch": 0.8816968847624886, "grad_norm": 0.6994313654871608, "learning_rate": 6.572587185725872e-07, "loss": 0.5499, "step": 30199 }, { "epoch": 0.881726080990336, "grad_norm": 0.7613357746226609, "learning_rate": 6.570965125709652e-07, "loss": 0.6962, "step": 30200 }, { "epoch": 0.8817552772181834, "grad_norm": 0.7208998115666789, "learning_rate": 6.569343065693432e-07, "loss": 0.6089, "step": 30201 }, { "epoch": 0.8817844734460307, "grad_norm": 0.7597459699039395, "learning_rate": 6.567721005677211e-07, "loss": 0.7569, "step": 30202 }, { "epoch": 0.8818136696738781, "grad_norm": 0.8182371767149053, "learning_rate": 6.56609894566099e-07, "loss": 0.7085, "step": 30203 }, { "epoch": 0.8818428659017254, "grad_norm": 0.693349604828886, "learning_rate": 6.564476885644769e-07, "loss": 0.6156, "step": 30204 }, { "epoch": 0.8818720621295728, "grad_norm": 0.747403520252859, "learning_rate": 6.562854825628549e-07, "loss": 0.6301, "step": 30205 }, { "epoch": 0.8819012583574202, "grad_norm": 0.7055991449155611, "learning_rate": 6.561232765612328e-07, "loss": 0.6467, "step": 30206 }, { "epoch": 0.8819304545852675, "grad_norm": 0.7103294155736888, "learning_rate": 6.559610705596107e-07, "loss": 0.5857, "step": 30207 }, { "epoch": 0.8819596508131149, "grad_norm": 0.7424645657690689, "learning_rate": 6.557988645579886e-07, "loss": 0.627, "step": 30208 }, { "epoch": 0.8819888470409623, "grad_norm": 0.7448040632015347, "learning_rate": 6.556366585563666e-07, "loss": 0.6802, "step": 30209 }, { "epoch": 0.8820180432688096, "grad_norm": 0.7634099927543474, "learning_rate": 6.554744525547446e-07, "loss": 0.6888, "step": 30210 }, { "epoch": 0.8820472394966571, "grad_norm": 0.6985146368921369, "learning_rate": 6.553122465531225e-07, "loss": 0.5907, "step": 30211 }, { "epoch": 0.8820764357245044, "grad_norm": 0.7357640196833509, "learning_rate": 6.551500405515004e-07, "loss": 0.6568, "step": 30212 }, { "epoch": 0.8821056319523518, "grad_norm": 0.7036046400864007, "learning_rate": 6.549878345498783e-07, "loss": 0.6666, "step": 30213 }, { "epoch": 0.8821348281801992, "grad_norm": 0.7527712834607576, "learning_rate": 6.548256285482564e-07, "loss": 0.6613, "step": 30214 }, { "epoch": 0.8821640244080465, "grad_norm": 0.774835818742533, "learning_rate": 6.546634225466343e-07, "loss": 0.7046, "step": 30215 }, { "epoch": 0.8821932206358939, "grad_norm": 0.6912460117108875, "learning_rate": 6.545012165450122e-07, "loss": 0.6276, "step": 30216 }, { "epoch": 0.8822224168637413, "grad_norm": 0.7825987807465135, "learning_rate": 6.543390105433901e-07, "loss": 0.7108, "step": 30217 }, { "epoch": 0.8822516130915886, "grad_norm": 0.6831383928255831, "learning_rate": 6.54176804541768e-07, "loss": 0.6168, "step": 30218 }, { "epoch": 0.882280809319436, "grad_norm": 0.7653925519872178, "learning_rate": 6.54014598540146e-07, "loss": 0.6605, "step": 30219 }, { "epoch": 0.8823100055472833, "grad_norm": 0.8267938857811461, "learning_rate": 6.538523925385241e-07, "loss": 0.6488, "step": 30220 }, { "epoch": 0.8823392017751307, "grad_norm": 0.6918859631811443, "learning_rate": 6.53690186536902e-07, "loss": 0.6113, "step": 30221 }, { "epoch": 0.8823683980029781, "grad_norm": 0.7272151433334458, "learning_rate": 6.535279805352799e-07, "loss": 0.6103, "step": 30222 }, { "epoch": 0.8823975942308254, "grad_norm": 0.7127256259560539, "learning_rate": 6.533657745336578e-07, "loss": 0.6127, "step": 30223 }, { "epoch": 0.8824267904586728, "grad_norm": 0.7567796571679138, "learning_rate": 6.532035685320358e-07, "loss": 0.6936, "step": 30224 }, { "epoch": 0.8824559866865201, "grad_norm": 0.7860764995996029, "learning_rate": 6.530413625304137e-07, "loss": 0.6696, "step": 30225 }, { "epoch": 0.8824851829143675, "grad_norm": 0.7499705928861748, "learning_rate": 6.528791565287916e-07, "loss": 0.5862, "step": 30226 }, { "epoch": 0.8825143791422149, "grad_norm": 0.8614007529843496, "learning_rate": 6.527169505271695e-07, "loss": 0.6831, "step": 30227 }, { "epoch": 0.8825435753700622, "grad_norm": 0.6518804650689204, "learning_rate": 6.525547445255476e-07, "loss": 0.5166, "step": 30228 }, { "epoch": 0.8825727715979096, "grad_norm": 0.7473430479180059, "learning_rate": 6.523925385239255e-07, "loss": 0.6514, "step": 30229 }, { "epoch": 0.882601967825757, "grad_norm": 0.6877415209784294, "learning_rate": 6.522303325223034e-07, "loss": 0.5744, "step": 30230 }, { "epoch": 0.8826311640536043, "grad_norm": 0.7668455713616905, "learning_rate": 6.520681265206813e-07, "loss": 0.7105, "step": 30231 }, { "epoch": 0.8826603602814517, "grad_norm": 0.7385553197526119, "learning_rate": 6.519059205190592e-07, "loss": 0.6988, "step": 30232 }, { "epoch": 0.882689556509299, "grad_norm": 0.7432140618527924, "learning_rate": 6.517437145174373e-07, "loss": 0.6762, "step": 30233 }, { "epoch": 0.8827187527371464, "grad_norm": 0.7226462000221687, "learning_rate": 6.515815085158152e-07, "loss": 0.5973, "step": 30234 }, { "epoch": 0.8827479489649938, "grad_norm": 0.726404421185444, "learning_rate": 6.514193025141931e-07, "loss": 0.6762, "step": 30235 }, { "epoch": 0.8827771451928411, "grad_norm": 0.7408554892606628, "learning_rate": 6.51257096512571e-07, "loss": 0.6633, "step": 30236 }, { "epoch": 0.8828063414206885, "grad_norm": 0.837199035635889, "learning_rate": 6.510948905109489e-07, "loss": 0.7118, "step": 30237 }, { "epoch": 0.8828355376485358, "grad_norm": 0.7150628095367142, "learning_rate": 6.509326845093269e-07, "loss": 0.6177, "step": 30238 }, { "epoch": 0.8828647338763832, "grad_norm": 0.6695346517808474, "learning_rate": 6.507704785077048e-07, "loss": 0.5857, "step": 30239 }, { "epoch": 0.8828939301042306, "grad_norm": 0.67795799416229, "learning_rate": 6.506082725060828e-07, "loss": 0.5757, "step": 30240 }, { "epoch": 0.8829231263320779, "grad_norm": 0.6596300204901018, "learning_rate": 6.504460665044607e-07, "loss": 0.5291, "step": 30241 }, { "epoch": 0.8829523225599253, "grad_norm": 0.7118256072178636, "learning_rate": 6.502838605028386e-07, "loss": 0.6226, "step": 30242 }, { "epoch": 0.8829815187877726, "grad_norm": 0.7327518845924554, "learning_rate": 6.501216545012166e-07, "loss": 0.7229, "step": 30243 }, { "epoch": 0.88301071501562, "grad_norm": 0.7343653103576534, "learning_rate": 6.499594484995945e-07, "loss": 0.642, "step": 30244 }, { "epoch": 0.8830399112434674, "grad_norm": 0.738608081906324, "learning_rate": 6.497972424979724e-07, "loss": 0.6778, "step": 30245 }, { "epoch": 0.8830691074713147, "grad_norm": 0.7519734522360243, "learning_rate": 6.496350364963503e-07, "loss": 0.6575, "step": 30246 }, { "epoch": 0.8830983036991621, "grad_norm": 0.7443275126428838, "learning_rate": 6.494728304947284e-07, "loss": 0.7353, "step": 30247 }, { "epoch": 0.8831274999270095, "grad_norm": 0.7229150723075977, "learning_rate": 6.493106244931063e-07, "loss": 0.5985, "step": 30248 }, { "epoch": 0.8831566961548568, "grad_norm": 0.7368209428337394, "learning_rate": 6.491484184914842e-07, "loss": 0.7038, "step": 30249 }, { "epoch": 0.8831858923827042, "grad_norm": 0.7166740591420608, "learning_rate": 6.489862124898621e-07, "loss": 0.6228, "step": 30250 }, { "epoch": 0.8832150886105515, "grad_norm": 0.6494382230590461, "learning_rate": 6.4882400648824e-07, "loss": 0.5486, "step": 30251 }, { "epoch": 0.8832442848383989, "grad_norm": 0.6973668149063246, "learning_rate": 6.486618004866182e-07, "loss": 0.6411, "step": 30252 }, { "epoch": 0.8832734810662463, "grad_norm": 0.7041340956858834, "learning_rate": 6.484995944849961e-07, "loss": 0.6078, "step": 30253 }, { "epoch": 0.8833026772940936, "grad_norm": 0.7590536835823224, "learning_rate": 6.48337388483374e-07, "loss": 0.7014, "step": 30254 }, { "epoch": 0.883331873521941, "grad_norm": 0.7743948012101024, "learning_rate": 6.481751824817519e-07, "loss": 0.7305, "step": 30255 }, { "epoch": 0.8833610697497883, "grad_norm": 0.7100170053734378, "learning_rate": 6.480129764801298e-07, "loss": 0.6083, "step": 30256 }, { "epoch": 0.8833902659776357, "grad_norm": 0.6843423864004675, "learning_rate": 6.478507704785078e-07, "loss": 0.5721, "step": 30257 }, { "epoch": 0.8834194622054831, "grad_norm": 0.7292750064311494, "learning_rate": 6.476885644768857e-07, "loss": 0.6405, "step": 30258 }, { "epoch": 0.8834486584333304, "grad_norm": 0.6640290313384367, "learning_rate": 6.475263584752637e-07, "loss": 0.5675, "step": 30259 }, { "epoch": 0.8834778546611778, "grad_norm": 0.7361672707176276, "learning_rate": 6.473641524736416e-07, "loss": 0.6853, "step": 30260 }, { "epoch": 0.8835070508890251, "grad_norm": 0.7488355898153531, "learning_rate": 6.472019464720195e-07, "loss": 0.6728, "step": 30261 }, { "epoch": 0.8835362471168725, "grad_norm": 0.710802732562225, "learning_rate": 6.470397404703975e-07, "loss": 0.6154, "step": 30262 }, { "epoch": 0.8835654433447199, "grad_norm": 0.7311135369706465, "learning_rate": 6.468775344687754e-07, "loss": 0.666, "step": 30263 }, { "epoch": 0.8835946395725672, "grad_norm": 0.728447293772287, "learning_rate": 6.467153284671533e-07, "loss": 0.6686, "step": 30264 }, { "epoch": 0.8836238358004146, "grad_norm": 0.7420414280208149, "learning_rate": 6.465531224655312e-07, "loss": 0.6982, "step": 30265 }, { "epoch": 0.883653032028262, "grad_norm": 0.7972914517244308, "learning_rate": 6.463909164639093e-07, "loss": 0.7202, "step": 30266 }, { "epoch": 0.8836822282561093, "grad_norm": 0.8251094733692395, "learning_rate": 6.462287104622872e-07, "loss": 0.6391, "step": 30267 }, { "epoch": 0.8837114244839567, "grad_norm": 0.6994410858978893, "learning_rate": 6.460665044606651e-07, "loss": 0.6017, "step": 30268 }, { "epoch": 0.883740620711804, "grad_norm": 0.7698194086831935, "learning_rate": 6.45904298459043e-07, "loss": 0.7029, "step": 30269 }, { "epoch": 0.8837698169396514, "grad_norm": 0.7203977608985069, "learning_rate": 6.457420924574209e-07, "loss": 0.6342, "step": 30270 }, { "epoch": 0.8837990131674988, "grad_norm": 0.6971222259352482, "learning_rate": 6.45579886455799e-07, "loss": 0.612, "step": 30271 }, { "epoch": 0.8838282093953461, "grad_norm": 0.8246286410771255, "learning_rate": 6.454176804541769e-07, "loss": 0.667, "step": 30272 }, { "epoch": 0.8838574056231935, "grad_norm": 0.7056197084033462, "learning_rate": 6.452554744525548e-07, "loss": 0.635, "step": 30273 }, { "epoch": 0.8838866018510408, "grad_norm": 0.6758148706937643, "learning_rate": 6.450932684509327e-07, "loss": 0.5814, "step": 30274 }, { "epoch": 0.8839157980788882, "grad_norm": 0.7284882441369842, "learning_rate": 6.449310624493106e-07, "loss": 0.6831, "step": 30275 }, { "epoch": 0.8839449943067356, "grad_norm": 0.6842956355880079, "learning_rate": 6.447688564476886e-07, "loss": 0.5955, "step": 30276 }, { "epoch": 0.8839741905345829, "grad_norm": 0.7172634189527859, "learning_rate": 6.446066504460665e-07, "loss": 0.6458, "step": 30277 }, { "epoch": 0.8840033867624303, "grad_norm": 0.7195610005331157, "learning_rate": 6.444444444444445e-07, "loss": 0.6346, "step": 30278 }, { "epoch": 0.8840325829902776, "grad_norm": 0.7642350175747293, "learning_rate": 6.442822384428224e-07, "loss": 0.7197, "step": 30279 }, { "epoch": 0.884061779218125, "grad_norm": 0.7050811317608812, "learning_rate": 6.441200324412004e-07, "loss": 0.5473, "step": 30280 }, { "epoch": 0.8840909754459724, "grad_norm": 0.7242711110859711, "learning_rate": 6.439578264395783e-07, "loss": 0.6565, "step": 30281 }, { "epoch": 0.8841201716738197, "grad_norm": 0.6954576373239572, "learning_rate": 6.437956204379562e-07, "loss": 0.58, "step": 30282 }, { "epoch": 0.8841493679016671, "grad_norm": 0.7057788579030787, "learning_rate": 6.436334144363341e-07, "loss": 0.5576, "step": 30283 }, { "epoch": 0.8841785641295145, "grad_norm": 0.7346836351908087, "learning_rate": 6.43471208434712e-07, "loss": 0.6741, "step": 30284 }, { "epoch": 0.8842077603573618, "grad_norm": 0.7145464612608151, "learning_rate": 6.433090024330902e-07, "loss": 0.6088, "step": 30285 }, { "epoch": 0.8842369565852092, "grad_norm": 0.7746467169454843, "learning_rate": 6.431467964314681e-07, "loss": 0.7264, "step": 30286 }, { "epoch": 0.8842661528130565, "grad_norm": 0.7143669815106343, "learning_rate": 6.42984590429846e-07, "loss": 0.633, "step": 30287 }, { "epoch": 0.8842953490409039, "grad_norm": 0.6791438582126499, "learning_rate": 6.428223844282239e-07, "loss": 0.5643, "step": 30288 }, { "epoch": 0.8843245452687513, "grad_norm": 0.6577784291761771, "learning_rate": 6.426601784266018e-07, "loss": 0.5454, "step": 30289 }, { "epoch": 0.8843537414965986, "grad_norm": 0.728777951788979, "learning_rate": 6.424979724249799e-07, "loss": 0.6231, "step": 30290 }, { "epoch": 0.884382937724446, "grad_norm": 0.7222305650062808, "learning_rate": 6.423357664233578e-07, "loss": 0.6532, "step": 30291 }, { "epoch": 0.8844121339522933, "grad_norm": 0.7512085713828175, "learning_rate": 6.421735604217357e-07, "loss": 0.6715, "step": 30292 }, { "epoch": 0.8844413301801407, "grad_norm": 0.7320096556901045, "learning_rate": 6.420113544201136e-07, "loss": 0.6621, "step": 30293 }, { "epoch": 0.8844705264079881, "grad_norm": 0.6915232701740394, "learning_rate": 6.418491484184915e-07, "loss": 0.5998, "step": 30294 }, { "epoch": 0.8844997226358354, "grad_norm": 0.7158199770959139, "learning_rate": 6.416869424168695e-07, "loss": 0.6067, "step": 30295 }, { "epoch": 0.8845289188636828, "grad_norm": 0.6707731349616691, "learning_rate": 6.415247364152474e-07, "loss": 0.5843, "step": 30296 }, { "epoch": 0.8845581150915302, "grad_norm": 0.6891599552379529, "learning_rate": 6.413625304136254e-07, "loss": 0.6142, "step": 30297 }, { "epoch": 0.8845873113193775, "grad_norm": 0.6842609503000175, "learning_rate": 6.412003244120033e-07, "loss": 0.5837, "step": 30298 }, { "epoch": 0.8846165075472249, "grad_norm": 0.7226243547827205, "learning_rate": 6.410381184103813e-07, "loss": 0.6516, "step": 30299 }, { "epoch": 0.8846457037750722, "grad_norm": 0.7128835074934753, "learning_rate": 6.408759124087592e-07, "loss": 0.6208, "step": 30300 }, { "epoch": 0.8846749000029196, "grad_norm": 0.7355105891508824, "learning_rate": 6.407137064071371e-07, "loss": 0.6392, "step": 30301 }, { "epoch": 0.884704096230767, "grad_norm": 0.7971521011584864, "learning_rate": 6.40551500405515e-07, "loss": 0.7179, "step": 30302 }, { "epoch": 0.8847332924586143, "grad_norm": 0.7175601578096156, "learning_rate": 6.403892944038929e-07, "loss": 0.6312, "step": 30303 }, { "epoch": 0.8847624886864617, "grad_norm": 0.8516206862878481, "learning_rate": 6.40227088402271e-07, "loss": 0.6904, "step": 30304 }, { "epoch": 0.884791684914309, "grad_norm": 0.6890731899333121, "learning_rate": 6.400648824006489e-07, "loss": 0.6057, "step": 30305 }, { "epoch": 0.8848208811421564, "grad_norm": 0.6951902530453722, "learning_rate": 6.399026763990268e-07, "loss": 0.5957, "step": 30306 }, { "epoch": 0.8848500773700038, "grad_norm": 0.7685435067826164, "learning_rate": 6.397404703974047e-07, "loss": 0.6834, "step": 30307 }, { "epoch": 0.8848792735978511, "grad_norm": 0.7043987170227856, "learning_rate": 6.395782643957826e-07, "loss": 0.6323, "step": 30308 }, { "epoch": 0.8849084698256985, "grad_norm": 0.7770560710896245, "learning_rate": 6.394160583941607e-07, "loss": 0.7385, "step": 30309 }, { "epoch": 0.8849376660535458, "grad_norm": 0.7561973044410397, "learning_rate": 6.392538523925386e-07, "loss": 0.6472, "step": 30310 }, { "epoch": 0.8849668622813932, "grad_norm": 0.6939674122953033, "learning_rate": 6.390916463909165e-07, "loss": 0.5902, "step": 30311 }, { "epoch": 0.8849960585092406, "grad_norm": 0.718898678660866, "learning_rate": 6.389294403892944e-07, "loss": 0.6651, "step": 30312 }, { "epoch": 0.8850252547370879, "grad_norm": 0.7108940611737187, "learning_rate": 6.387672343876724e-07, "loss": 0.5762, "step": 30313 }, { "epoch": 0.8850544509649353, "grad_norm": 0.714077530238425, "learning_rate": 6.386050283860503e-07, "loss": 0.6311, "step": 30314 }, { "epoch": 0.8850836471927827, "grad_norm": 0.7444243768492453, "learning_rate": 6.384428223844282e-07, "loss": 0.6964, "step": 30315 }, { "epoch": 0.88511284342063, "grad_norm": 0.7004029713377617, "learning_rate": 6.382806163828061e-07, "loss": 0.6237, "step": 30316 }, { "epoch": 0.8851420396484774, "grad_norm": 0.7484502604762642, "learning_rate": 6.381184103811842e-07, "loss": 0.6255, "step": 30317 }, { "epoch": 0.8851712358763247, "grad_norm": 0.8149321716346347, "learning_rate": 6.379562043795622e-07, "loss": 0.5795, "step": 30318 }, { "epoch": 0.8852004321041721, "grad_norm": 0.7503561543634093, "learning_rate": 6.377939983779401e-07, "loss": 0.6587, "step": 30319 }, { "epoch": 0.8852296283320195, "grad_norm": 0.7428810301965579, "learning_rate": 6.37631792376318e-07, "loss": 0.7001, "step": 30320 }, { "epoch": 0.8852588245598668, "grad_norm": 0.6859428659311476, "learning_rate": 6.374695863746959e-07, "loss": 0.622, "step": 30321 }, { "epoch": 0.8852880207877142, "grad_norm": 0.7474776670156906, "learning_rate": 6.373073803730738e-07, "loss": 0.717, "step": 30322 }, { "epoch": 0.8853172170155615, "grad_norm": 0.6723257321882625, "learning_rate": 6.371451743714519e-07, "loss": 0.5876, "step": 30323 }, { "epoch": 0.8853464132434089, "grad_norm": 0.6789153210020231, "learning_rate": 6.369829683698298e-07, "loss": 0.5844, "step": 30324 }, { "epoch": 0.8853756094712563, "grad_norm": 0.6791408969589869, "learning_rate": 6.368207623682077e-07, "loss": 0.6204, "step": 30325 }, { "epoch": 0.8854048056991036, "grad_norm": 0.709071017934755, "learning_rate": 6.366585563665856e-07, "loss": 0.6099, "step": 30326 }, { "epoch": 0.885434001926951, "grad_norm": 0.7406199106327925, "learning_rate": 6.364963503649635e-07, "loss": 0.6588, "step": 30327 }, { "epoch": 0.8854631981547983, "grad_norm": 1.0068575439977943, "learning_rate": 6.363341443633416e-07, "loss": 0.5286, "step": 30328 }, { "epoch": 0.8854923943826457, "grad_norm": 0.685789894532222, "learning_rate": 6.361719383617195e-07, "loss": 0.586, "step": 30329 }, { "epoch": 0.8855215906104931, "grad_norm": 0.6721821899575257, "learning_rate": 6.360097323600974e-07, "loss": 0.5506, "step": 30330 }, { "epoch": 0.8855507868383404, "grad_norm": 0.717351496728677, "learning_rate": 6.358475263584753e-07, "loss": 0.6371, "step": 30331 }, { "epoch": 0.8855799830661879, "grad_norm": 0.6770971387786513, "learning_rate": 6.356853203568533e-07, "loss": 0.5631, "step": 30332 }, { "epoch": 0.8856091792940353, "grad_norm": 0.7163813681617266, "learning_rate": 6.355231143552312e-07, "loss": 0.6741, "step": 30333 }, { "epoch": 0.8856383755218826, "grad_norm": 0.8030025007706353, "learning_rate": 6.353609083536091e-07, "loss": 0.7237, "step": 30334 }, { "epoch": 0.88566757174973, "grad_norm": 0.7621526084639685, "learning_rate": 6.35198702351987e-07, "loss": 0.6846, "step": 30335 }, { "epoch": 0.8856967679775773, "grad_norm": 0.671400190065741, "learning_rate": 6.35036496350365e-07, "loss": 0.5624, "step": 30336 }, { "epoch": 0.8857259642054247, "grad_norm": 0.7504302785693935, "learning_rate": 6.34874290348743e-07, "loss": 0.7116, "step": 30337 }, { "epoch": 0.8857551604332721, "grad_norm": 0.7371936889803486, "learning_rate": 6.347120843471209e-07, "loss": 0.6319, "step": 30338 }, { "epoch": 0.8857843566611194, "grad_norm": 0.6760134076316686, "learning_rate": 6.345498783454988e-07, "loss": 0.5765, "step": 30339 }, { "epoch": 0.8858135528889668, "grad_norm": 0.7739464860358924, "learning_rate": 6.343876723438767e-07, "loss": 0.6886, "step": 30340 }, { "epoch": 0.8858427491168142, "grad_norm": 0.7184785529508826, "learning_rate": 6.342254663422546e-07, "loss": 0.6277, "step": 30341 }, { "epoch": 0.8858719453446615, "grad_norm": 0.7008544212728428, "learning_rate": 6.340632603406327e-07, "loss": 0.5757, "step": 30342 }, { "epoch": 0.8859011415725089, "grad_norm": 0.7567699351772273, "learning_rate": 6.339010543390106e-07, "loss": 0.668, "step": 30343 }, { "epoch": 0.8859303378003562, "grad_norm": 0.732718963868038, "learning_rate": 6.337388483373885e-07, "loss": 0.6592, "step": 30344 }, { "epoch": 0.8859595340282036, "grad_norm": 0.714185473233256, "learning_rate": 6.335766423357664e-07, "loss": 0.6309, "step": 30345 }, { "epoch": 0.885988730256051, "grad_norm": 0.7101276766457689, "learning_rate": 6.334144363341444e-07, "loss": 0.6294, "step": 30346 }, { "epoch": 0.8860179264838983, "grad_norm": 0.731974440291407, "learning_rate": 6.332522303325223e-07, "loss": 0.6316, "step": 30347 }, { "epoch": 0.8860471227117457, "grad_norm": 0.7341120471935543, "learning_rate": 6.330900243309004e-07, "loss": 0.6704, "step": 30348 }, { "epoch": 0.886076318939593, "grad_norm": 0.7276910494631742, "learning_rate": 6.329278183292783e-07, "loss": 0.6672, "step": 30349 }, { "epoch": 0.8861055151674404, "grad_norm": 0.758690936846442, "learning_rate": 6.327656123276562e-07, "loss": 0.7167, "step": 30350 }, { "epoch": 0.8861347113952878, "grad_norm": 0.7002146006667775, "learning_rate": 6.326034063260342e-07, "loss": 0.5962, "step": 30351 }, { "epoch": 0.8861639076231351, "grad_norm": 0.7300487776076129, "learning_rate": 6.324412003244121e-07, "loss": 0.6692, "step": 30352 }, { "epoch": 0.8861931038509825, "grad_norm": 0.7302543010165141, "learning_rate": 6.3227899432279e-07, "loss": 0.6538, "step": 30353 }, { "epoch": 0.8862223000788298, "grad_norm": 0.7409320711821857, "learning_rate": 6.321167883211679e-07, "loss": 0.6637, "step": 30354 }, { "epoch": 0.8862514963066772, "grad_norm": 0.7306267969994316, "learning_rate": 6.319545823195459e-07, "loss": 0.6322, "step": 30355 }, { "epoch": 0.8862806925345246, "grad_norm": 0.691995305665539, "learning_rate": 6.317923763179239e-07, "loss": 0.5575, "step": 30356 }, { "epoch": 0.8863098887623719, "grad_norm": 0.6847090125313974, "learning_rate": 6.316301703163018e-07, "loss": 0.58, "step": 30357 }, { "epoch": 0.8863390849902193, "grad_norm": 0.7875988627742996, "learning_rate": 6.314679643146797e-07, "loss": 0.6115, "step": 30358 }, { "epoch": 0.8863682812180667, "grad_norm": 0.6995221866110458, "learning_rate": 6.313057583130576e-07, "loss": 0.5849, "step": 30359 }, { "epoch": 0.886397477445914, "grad_norm": 0.7319036773687511, "learning_rate": 6.311435523114355e-07, "loss": 0.6826, "step": 30360 }, { "epoch": 0.8864266736737614, "grad_norm": 0.702493617646665, "learning_rate": 6.309813463098136e-07, "loss": 0.64, "step": 30361 }, { "epoch": 0.8864558699016087, "grad_norm": 0.7458553194753477, "learning_rate": 6.308191403081915e-07, "loss": 0.6556, "step": 30362 }, { "epoch": 0.8864850661294561, "grad_norm": 0.7331897765589126, "learning_rate": 6.306569343065694e-07, "loss": 0.6588, "step": 30363 }, { "epoch": 0.8865142623573035, "grad_norm": 0.6913964228305183, "learning_rate": 6.304947283049473e-07, "loss": 0.5717, "step": 30364 }, { "epoch": 0.8865434585851508, "grad_norm": 0.7074235402054694, "learning_rate": 6.303325223033253e-07, "loss": 0.5949, "step": 30365 }, { "epoch": 0.8865726548129982, "grad_norm": 0.7360875366974955, "learning_rate": 6.301703163017032e-07, "loss": 0.6602, "step": 30366 }, { "epoch": 0.8866018510408455, "grad_norm": 0.8676933468100895, "learning_rate": 6.300081103000812e-07, "loss": 0.8445, "step": 30367 }, { "epoch": 0.8866310472686929, "grad_norm": 0.7055044383907975, "learning_rate": 6.298459042984591e-07, "loss": 0.6803, "step": 30368 }, { "epoch": 0.8866602434965403, "grad_norm": 0.7282188896472, "learning_rate": 6.29683698296837e-07, "loss": 0.6138, "step": 30369 }, { "epoch": 0.8866894397243876, "grad_norm": 0.7124506934457381, "learning_rate": 6.29521492295215e-07, "loss": 0.699, "step": 30370 }, { "epoch": 0.886718635952235, "grad_norm": 0.6856032107797828, "learning_rate": 6.293592862935929e-07, "loss": 0.599, "step": 30371 }, { "epoch": 0.8867478321800824, "grad_norm": 0.7229984414651363, "learning_rate": 6.291970802919708e-07, "loss": 0.6766, "step": 30372 }, { "epoch": 0.8867770284079297, "grad_norm": 0.7583882774769726, "learning_rate": 6.290348742903487e-07, "loss": 0.7283, "step": 30373 }, { "epoch": 0.8868062246357771, "grad_norm": 0.6869712966976744, "learning_rate": 6.288726682887266e-07, "loss": 0.5769, "step": 30374 }, { "epoch": 0.8868354208636244, "grad_norm": 0.7642585683698057, "learning_rate": 6.287104622871047e-07, "loss": 0.6939, "step": 30375 }, { "epoch": 0.8868646170914718, "grad_norm": 0.7274739601160545, "learning_rate": 6.285482562854826e-07, "loss": 0.6387, "step": 30376 }, { "epoch": 0.8868938133193192, "grad_norm": 0.793727868715186, "learning_rate": 6.283860502838605e-07, "loss": 0.7153, "step": 30377 }, { "epoch": 0.8869230095471665, "grad_norm": 0.7101644799971296, "learning_rate": 6.282238442822384e-07, "loss": 0.6051, "step": 30378 }, { "epoch": 0.8869522057750139, "grad_norm": 0.705682032762325, "learning_rate": 6.280616382806165e-07, "loss": 0.5915, "step": 30379 }, { "epoch": 0.8869814020028612, "grad_norm": 0.7179253351512342, "learning_rate": 6.278994322789945e-07, "loss": 0.6131, "step": 30380 }, { "epoch": 0.8870105982307086, "grad_norm": 0.668147646565858, "learning_rate": 6.277372262773724e-07, "loss": 0.5863, "step": 30381 }, { "epoch": 0.887039794458556, "grad_norm": 0.7116443346064463, "learning_rate": 6.275750202757503e-07, "loss": 0.6459, "step": 30382 }, { "epoch": 0.8870689906864033, "grad_norm": 0.7654686388607224, "learning_rate": 6.274128142741282e-07, "loss": 0.6455, "step": 30383 }, { "epoch": 0.8870981869142507, "grad_norm": 0.723657080408497, "learning_rate": 6.272506082725062e-07, "loss": 0.5874, "step": 30384 }, { "epoch": 0.887127383142098, "grad_norm": 0.7497947917138673, "learning_rate": 6.270884022708841e-07, "loss": 0.6656, "step": 30385 }, { "epoch": 0.8871565793699454, "grad_norm": 0.7337482192732098, "learning_rate": 6.269261962692621e-07, "loss": 0.6132, "step": 30386 }, { "epoch": 0.8871857755977928, "grad_norm": 0.7477006265001264, "learning_rate": 6.2676399026764e-07, "loss": 0.6388, "step": 30387 }, { "epoch": 0.8872149718256401, "grad_norm": 0.7330226092574386, "learning_rate": 6.266017842660179e-07, "loss": 0.6646, "step": 30388 }, { "epoch": 0.8872441680534875, "grad_norm": 0.6759936146929378, "learning_rate": 6.264395782643959e-07, "loss": 0.5402, "step": 30389 }, { "epoch": 0.8872733642813349, "grad_norm": 0.7733091841115178, "learning_rate": 6.262773722627738e-07, "loss": 0.7488, "step": 30390 }, { "epoch": 0.8873025605091822, "grad_norm": 0.7423240903648037, "learning_rate": 6.261151662611517e-07, "loss": 0.6731, "step": 30391 }, { "epoch": 0.8873317567370296, "grad_norm": 0.7016915408554768, "learning_rate": 6.259529602595296e-07, "loss": 0.6235, "step": 30392 }, { "epoch": 0.8873609529648769, "grad_norm": 0.7147806502853462, "learning_rate": 6.257907542579075e-07, "loss": 0.6531, "step": 30393 }, { "epoch": 0.8873901491927243, "grad_norm": 0.7239430657355747, "learning_rate": 6.256285482562856e-07, "loss": 0.6943, "step": 30394 }, { "epoch": 0.8874193454205717, "grad_norm": 0.7203700658723817, "learning_rate": 6.254663422546635e-07, "loss": 0.5971, "step": 30395 }, { "epoch": 0.887448541648419, "grad_norm": 0.8024815518899567, "learning_rate": 6.253041362530414e-07, "loss": 0.6402, "step": 30396 }, { "epoch": 0.8874777378762664, "grad_norm": 0.6527382707902937, "learning_rate": 6.251419302514193e-07, "loss": 0.5421, "step": 30397 }, { "epoch": 0.8875069341041137, "grad_norm": 0.7081466351167367, "learning_rate": 6.249797242497973e-07, "loss": 0.612, "step": 30398 }, { "epoch": 0.8875361303319611, "grad_norm": 0.7546471479042797, "learning_rate": 6.248175182481752e-07, "loss": 0.6856, "step": 30399 }, { "epoch": 0.8875653265598085, "grad_norm": 0.7055447408009017, "learning_rate": 6.246553122465532e-07, "loss": 0.6354, "step": 30400 }, { "epoch": 0.8875945227876558, "grad_norm": 0.7435087570860007, "learning_rate": 6.244931062449311e-07, "loss": 0.6011, "step": 30401 }, { "epoch": 0.8876237190155032, "grad_norm": 0.750435131974993, "learning_rate": 6.243309002433091e-07, "loss": 0.619, "step": 30402 }, { "epoch": 0.8876529152433505, "grad_norm": 0.6419318386997206, "learning_rate": 6.24168694241687e-07, "loss": 0.5313, "step": 30403 }, { "epoch": 0.8876821114711979, "grad_norm": 0.6997714650330933, "learning_rate": 6.240064882400649e-07, "loss": 0.643, "step": 30404 }, { "epoch": 0.8877113076990453, "grad_norm": 0.6914738466372512, "learning_rate": 6.238442822384428e-07, "loss": 0.6085, "step": 30405 }, { "epoch": 0.8877405039268926, "grad_norm": 0.7257063851458387, "learning_rate": 6.236820762368208e-07, "loss": 0.6336, "step": 30406 }, { "epoch": 0.88776970015474, "grad_norm": 0.7125075893015467, "learning_rate": 6.235198702351987e-07, "loss": 0.6497, "step": 30407 }, { "epoch": 0.8877988963825874, "grad_norm": 0.7289186952288814, "learning_rate": 6.233576642335766e-07, "loss": 0.6588, "step": 30408 }, { "epoch": 0.8878280926104347, "grad_norm": 0.6976709002493879, "learning_rate": 6.231954582319546e-07, "loss": 0.5711, "step": 30409 }, { "epoch": 0.8878572888382821, "grad_norm": 0.8079565101101861, "learning_rate": 6.230332522303325e-07, "loss": 0.686, "step": 30410 }, { "epoch": 0.8878864850661294, "grad_norm": 0.6314004269307213, "learning_rate": 6.228710462287105e-07, "loss": 0.521, "step": 30411 }, { "epoch": 0.8879156812939768, "grad_norm": 0.7191293613715292, "learning_rate": 6.227088402270884e-07, "loss": 0.6001, "step": 30412 }, { "epoch": 0.8879448775218242, "grad_norm": 0.6943822398502132, "learning_rate": 6.225466342254663e-07, "loss": 0.6058, "step": 30413 }, { "epoch": 0.8879740737496715, "grad_norm": 0.6947791813737858, "learning_rate": 6.223844282238444e-07, "loss": 0.5659, "step": 30414 }, { "epoch": 0.8880032699775189, "grad_norm": 0.7361151592599711, "learning_rate": 6.222222222222223e-07, "loss": 0.6809, "step": 30415 }, { "epoch": 0.8880324662053662, "grad_norm": 0.7001273374721179, "learning_rate": 6.220600162206003e-07, "loss": 0.5869, "step": 30416 }, { "epoch": 0.8880616624332136, "grad_norm": 0.7476651089497066, "learning_rate": 6.218978102189782e-07, "loss": 0.7128, "step": 30417 }, { "epoch": 0.888090858661061, "grad_norm": 0.7233990795827273, "learning_rate": 6.217356042173562e-07, "loss": 0.6382, "step": 30418 }, { "epoch": 0.8881200548889083, "grad_norm": 0.71138557489746, "learning_rate": 6.215733982157341e-07, "loss": 0.6279, "step": 30419 }, { "epoch": 0.8881492511167557, "grad_norm": 0.7228568026517963, "learning_rate": 6.21411192214112e-07, "loss": 0.6208, "step": 30420 }, { "epoch": 0.888178447344603, "grad_norm": 0.7143042170522053, "learning_rate": 6.2124898621249e-07, "loss": 0.6434, "step": 30421 }, { "epoch": 0.8882076435724504, "grad_norm": 0.7054657646714919, "learning_rate": 6.210867802108679e-07, "loss": 0.5386, "step": 30422 }, { "epoch": 0.8882368398002978, "grad_norm": 0.7666009399582926, "learning_rate": 6.209245742092458e-07, "loss": 0.6804, "step": 30423 }, { "epoch": 0.8882660360281451, "grad_norm": 0.7117622415144219, "learning_rate": 6.207623682076237e-07, "loss": 0.5764, "step": 30424 }, { "epoch": 0.8882952322559925, "grad_norm": 0.7032067559479803, "learning_rate": 6.206001622060017e-07, "loss": 0.6096, "step": 30425 }, { "epoch": 0.8883244284838399, "grad_norm": 0.7074914708081825, "learning_rate": 6.204379562043796e-07, "loss": 0.5795, "step": 30426 }, { "epoch": 0.8883536247116872, "grad_norm": 0.6883008959479202, "learning_rate": 6.202757502027575e-07, "loss": 0.6036, "step": 30427 }, { "epoch": 0.8883828209395346, "grad_norm": 0.7481842979959035, "learning_rate": 6.201135442011355e-07, "loss": 0.6936, "step": 30428 }, { "epoch": 0.8884120171673819, "grad_norm": 0.7381814430303648, "learning_rate": 6.199513381995134e-07, "loss": 0.6606, "step": 30429 }, { "epoch": 0.8884412133952293, "grad_norm": 0.7183832341370863, "learning_rate": 6.197891321978914e-07, "loss": 0.6268, "step": 30430 }, { "epoch": 0.8884704096230767, "grad_norm": 0.7264206110981551, "learning_rate": 6.196269261962693e-07, "loss": 0.6394, "step": 30431 }, { "epoch": 0.888499605850924, "grad_norm": 0.6771791643304008, "learning_rate": 6.194647201946472e-07, "loss": 0.5768, "step": 30432 }, { "epoch": 0.8885288020787714, "grad_norm": 0.7902039144638293, "learning_rate": 6.193025141930252e-07, "loss": 0.7284, "step": 30433 }, { "epoch": 0.8885579983066187, "grad_norm": 0.706146908055746, "learning_rate": 6.191403081914031e-07, "loss": 0.6041, "step": 30434 }, { "epoch": 0.8885871945344661, "grad_norm": 0.6788573015878645, "learning_rate": 6.189781021897811e-07, "loss": 0.5652, "step": 30435 }, { "epoch": 0.8886163907623135, "grad_norm": 0.7596197375144746, "learning_rate": 6.18815896188159e-07, "loss": 0.7166, "step": 30436 }, { "epoch": 0.8886455869901608, "grad_norm": 0.7212525926285254, "learning_rate": 6.18653690186537e-07, "loss": 0.6519, "step": 30437 }, { "epoch": 0.8886747832180082, "grad_norm": 0.7005595147627252, "learning_rate": 6.184914841849149e-07, "loss": 0.6264, "step": 30438 }, { "epoch": 0.8887039794458556, "grad_norm": 0.7716899388114516, "learning_rate": 6.183292781832928e-07, "loss": 0.7489, "step": 30439 }, { "epoch": 0.8887331756737029, "grad_norm": 0.781868311069004, "learning_rate": 6.181670721816708e-07, "loss": 0.7767, "step": 30440 }, { "epoch": 0.8887623719015503, "grad_norm": 0.7248805070140384, "learning_rate": 6.180048661800487e-07, "loss": 0.635, "step": 30441 }, { "epoch": 0.8887915681293976, "grad_norm": 0.7235223265844126, "learning_rate": 6.178426601784266e-07, "loss": 0.5991, "step": 30442 }, { "epoch": 0.888820764357245, "grad_norm": 0.7585830225853377, "learning_rate": 6.176804541768045e-07, "loss": 0.71, "step": 30443 }, { "epoch": 0.8888499605850924, "grad_norm": 0.7170225172398906, "learning_rate": 6.175182481751825e-07, "loss": 0.6303, "step": 30444 }, { "epoch": 0.8888791568129397, "grad_norm": 0.7064395904238973, "learning_rate": 6.173560421735604e-07, "loss": 0.594, "step": 30445 }, { "epoch": 0.8889083530407871, "grad_norm": 0.730250274085331, "learning_rate": 6.171938361719383e-07, "loss": 0.6448, "step": 30446 }, { "epoch": 0.8889375492686344, "grad_norm": 0.6676443405058854, "learning_rate": 6.170316301703164e-07, "loss": 0.5445, "step": 30447 }, { "epoch": 0.8889667454964818, "grad_norm": 0.6970246338375969, "learning_rate": 6.168694241686943e-07, "loss": 0.6035, "step": 30448 }, { "epoch": 0.8889959417243292, "grad_norm": 0.7836254355761249, "learning_rate": 6.167072181670723e-07, "loss": 0.7349, "step": 30449 }, { "epoch": 0.8890251379521765, "grad_norm": 0.6369695995579947, "learning_rate": 6.165450121654502e-07, "loss": 0.5272, "step": 30450 }, { "epoch": 0.8890543341800239, "grad_norm": 0.7485378101173287, "learning_rate": 6.163828061638282e-07, "loss": 0.6782, "step": 30451 }, { "epoch": 0.8890835304078712, "grad_norm": 0.7147893156567631, "learning_rate": 6.162206001622061e-07, "loss": 0.6328, "step": 30452 }, { "epoch": 0.8891127266357187, "grad_norm": 0.8316701249566117, "learning_rate": 6.16058394160584e-07, "loss": 0.6804, "step": 30453 }, { "epoch": 0.8891419228635661, "grad_norm": 0.699999365437861, "learning_rate": 6.15896188158962e-07, "loss": 0.6356, "step": 30454 }, { "epoch": 0.8891711190914134, "grad_norm": 0.6973184076538315, "learning_rate": 6.157339821573399e-07, "loss": 0.6058, "step": 30455 }, { "epoch": 0.8892003153192608, "grad_norm": 0.7296106561386352, "learning_rate": 6.155717761557179e-07, "loss": 0.6771, "step": 30456 }, { "epoch": 0.8892295115471082, "grad_norm": 0.7649549396562894, "learning_rate": 6.154095701540958e-07, "loss": 0.7507, "step": 30457 }, { "epoch": 0.8892587077749555, "grad_norm": 0.7520240824872457, "learning_rate": 6.152473641524737e-07, "loss": 0.6692, "step": 30458 }, { "epoch": 0.8892879040028029, "grad_norm": 0.7867807416331429, "learning_rate": 6.150851581508516e-07, "loss": 0.6869, "step": 30459 }, { "epoch": 0.8893171002306502, "grad_norm": 0.6963571175698553, "learning_rate": 6.149229521492296e-07, "loss": 0.6487, "step": 30460 }, { "epoch": 0.8893462964584976, "grad_norm": 0.7538585294088799, "learning_rate": 6.147607461476075e-07, "loss": 0.6638, "step": 30461 }, { "epoch": 0.889375492686345, "grad_norm": 0.7103460371990896, "learning_rate": 6.145985401459854e-07, "loss": 0.6335, "step": 30462 }, { "epoch": 0.8894046889141923, "grad_norm": 0.7648621414934572, "learning_rate": 6.144363341443634e-07, "loss": 0.7281, "step": 30463 }, { "epoch": 0.8894338851420397, "grad_norm": 0.720480748916758, "learning_rate": 6.142741281427413e-07, "loss": 0.5878, "step": 30464 }, { "epoch": 0.889463081369887, "grad_norm": 0.7339468514088686, "learning_rate": 6.141119221411192e-07, "loss": 0.6258, "step": 30465 }, { "epoch": 0.8894922775977344, "grad_norm": 0.7151053472335623, "learning_rate": 6.139497161394972e-07, "loss": 0.609, "step": 30466 }, { "epoch": 0.8895214738255818, "grad_norm": 0.7706926545202237, "learning_rate": 6.137875101378751e-07, "loss": 0.7122, "step": 30467 }, { "epoch": 0.8895506700534291, "grad_norm": 0.7083084275672749, "learning_rate": 6.136253041362531e-07, "loss": 0.6336, "step": 30468 }, { "epoch": 0.8895798662812765, "grad_norm": 0.8716565191473125, "learning_rate": 6.13463098134631e-07, "loss": 0.7009, "step": 30469 }, { "epoch": 0.8896090625091239, "grad_norm": 0.6947604667280615, "learning_rate": 6.13300892133009e-07, "loss": 0.5655, "step": 30470 }, { "epoch": 0.8896382587369712, "grad_norm": 0.7978409098844637, "learning_rate": 6.131386861313869e-07, "loss": 0.6953, "step": 30471 }, { "epoch": 0.8896674549648186, "grad_norm": 0.7318470952974471, "learning_rate": 6.129764801297648e-07, "loss": 0.612, "step": 30472 }, { "epoch": 0.8896966511926659, "grad_norm": 0.6801010877493241, "learning_rate": 6.128142741281428e-07, "loss": 0.5937, "step": 30473 }, { "epoch": 0.8897258474205133, "grad_norm": 0.7372313727404617, "learning_rate": 6.126520681265207e-07, "loss": 0.655, "step": 30474 }, { "epoch": 0.8897550436483607, "grad_norm": 0.6652118211819175, "learning_rate": 6.124898621248987e-07, "loss": 0.5664, "step": 30475 }, { "epoch": 0.889784239876208, "grad_norm": 0.7187396388295981, "learning_rate": 6.123276561232766e-07, "loss": 0.5987, "step": 30476 }, { "epoch": 0.8898134361040554, "grad_norm": 0.7439125869015961, "learning_rate": 6.121654501216545e-07, "loss": 0.627, "step": 30477 }, { "epoch": 0.8898426323319027, "grad_norm": 0.699471022330009, "learning_rate": 6.120032441200324e-07, "loss": 0.6049, "step": 30478 }, { "epoch": 0.8898718285597501, "grad_norm": 0.757737420898251, "learning_rate": 6.118410381184105e-07, "loss": 0.672, "step": 30479 }, { "epoch": 0.8899010247875975, "grad_norm": 0.7173099791242614, "learning_rate": 6.116788321167884e-07, "loss": 0.6188, "step": 30480 }, { "epoch": 0.8899302210154448, "grad_norm": 0.7308299085465843, "learning_rate": 6.115166261151663e-07, "loss": 0.6666, "step": 30481 }, { "epoch": 0.8899594172432922, "grad_norm": 0.695234643584724, "learning_rate": 6.113544201135443e-07, "loss": 0.602, "step": 30482 }, { "epoch": 0.8899886134711396, "grad_norm": 0.7095325381994874, "learning_rate": 6.111922141119222e-07, "loss": 0.6498, "step": 30483 }, { "epoch": 0.8900178096989869, "grad_norm": 0.7014262830405779, "learning_rate": 6.110300081103002e-07, "loss": 0.6335, "step": 30484 }, { "epoch": 0.8900470059268343, "grad_norm": 0.7242655621443925, "learning_rate": 6.108678021086781e-07, "loss": 0.6636, "step": 30485 }, { "epoch": 0.8900762021546816, "grad_norm": 0.6871415814286198, "learning_rate": 6.10705596107056e-07, "loss": 0.5657, "step": 30486 }, { "epoch": 0.890105398382529, "grad_norm": 0.707598854126607, "learning_rate": 6.10543390105434e-07, "loss": 0.6467, "step": 30487 }, { "epoch": 0.8901345946103764, "grad_norm": 0.7075881012373617, "learning_rate": 6.103811841038119e-07, "loss": 0.5934, "step": 30488 }, { "epoch": 0.8901637908382237, "grad_norm": 0.7198517960162071, "learning_rate": 6.102189781021899e-07, "loss": 0.6681, "step": 30489 }, { "epoch": 0.8901929870660711, "grad_norm": 0.7134409152831453, "learning_rate": 6.100567721005678e-07, "loss": 0.6207, "step": 30490 }, { "epoch": 0.8902221832939184, "grad_norm": 0.6901780379655991, "learning_rate": 6.098945660989457e-07, "loss": 0.5689, "step": 30491 }, { "epoch": 0.8902513795217658, "grad_norm": 0.7110192035018663, "learning_rate": 6.097323600973237e-07, "loss": 0.6637, "step": 30492 }, { "epoch": 0.8902805757496132, "grad_norm": 0.7637413913092486, "learning_rate": 6.095701540957016e-07, "loss": 0.6903, "step": 30493 }, { "epoch": 0.8903097719774605, "grad_norm": 0.7710180954355165, "learning_rate": 6.094079480940795e-07, "loss": 0.7024, "step": 30494 }, { "epoch": 0.8903389682053079, "grad_norm": 0.6778323777650821, "learning_rate": 6.092457420924575e-07, "loss": 0.5322, "step": 30495 }, { "epoch": 0.8903681644331553, "grad_norm": 0.7977148596554237, "learning_rate": 6.090835360908354e-07, "loss": 0.7382, "step": 30496 }, { "epoch": 0.8903973606610026, "grad_norm": 0.7438172380532505, "learning_rate": 6.089213300892133e-07, "loss": 0.6023, "step": 30497 }, { "epoch": 0.89042655688885, "grad_norm": 0.746228308262803, "learning_rate": 6.087591240875913e-07, "loss": 0.6277, "step": 30498 }, { "epoch": 0.8904557531166973, "grad_norm": 0.7607801333670576, "learning_rate": 6.085969180859692e-07, "loss": 0.7035, "step": 30499 }, { "epoch": 0.8904849493445447, "grad_norm": 0.7628712474587563, "learning_rate": 6.084347120843471e-07, "loss": 0.7044, "step": 30500 }, { "epoch": 0.8905141455723921, "grad_norm": 0.7009505134123055, "learning_rate": 6.082725060827251e-07, "loss": 0.6164, "step": 30501 }, { "epoch": 0.8905433418002394, "grad_norm": 0.7444810993276209, "learning_rate": 6.08110300081103e-07, "loss": 0.6115, "step": 30502 }, { "epoch": 0.8905725380280868, "grad_norm": 0.6720779612604146, "learning_rate": 6.07948094079481e-07, "loss": 0.5396, "step": 30503 }, { "epoch": 0.8906017342559341, "grad_norm": 0.7022731071292322, "learning_rate": 6.077858880778589e-07, "loss": 0.6001, "step": 30504 }, { "epoch": 0.8906309304837815, "grad_norm": 0.7236015712554776, "learning_rate": 6.076236820762368e-07, "loss": 0.6247, "step": 30505 }, { "epoch": 0.8906601267116289, "grad_norm": 0.6896533067854127, "learning_rate": 6.074614760746148e-07, "loss": 0.5802, "step": 30506 }, { "epoch": 0.8906893229394762, "grad_norm": 0.7160671622398882, "learning_rate": 6.072992700729927e-07, "loss": 0.5956, "step": 30507 }, { "epoch": 0.8907185191673236, "grad_norm": 0.7046715697807979, "learning_rate": 6.071370640713707e-07, "loss": 0.6123, "step": 30508 }, { "epoch": 0.890747715395171, "grad_norm": 0.7343611162492502, "learning_rate": 6.069748580697486e-07, "loss": 0.6463, "step": 30509 }, { "epoch": 0.8907769116230183, "grad_norm": 0.7292766050935526, "learning_rate": 6.068126520681267e-07, "loss": 0.6088, "step": 30510 }, { "epoch": 0.8908061078508657, "grad_norm": 0.8438361002790058, "learning_rate": 6.066504460665046e-07, "loss": 0.6931, "step": 30511 }, { "epoch": 0.890835304078713, "grad_norm": 0.7181094462937319, "learning_rate": 6.064882400648825e-07, "loss": 0.6501, "step": 30512 }, { "epoch": 0.8908645003065604, "grad_norm": 0.654224529948733, "learning_rate": 6.063260340632604e-07, "loss": 0.5563, "step": 30513 }, { "epoch": 0.8908936965344078, "grad_norm": 0.6888850117507923, "learning_rate": 6.061638280616384e-07, "loss": 0.6058, "step": 30514 }, { "epoch": 0.8909228927622551, "grad_norm": 0.7205581277859922, "learning_rate": 6.060016220600163e-07, "loss": 0.6136, "step": 30515 }, { "epoch": 0.8909520889901025, "grad_norm": 0.7481079958577684, "learning_rate": 6.058394160583942e-07, "loss": 0.6534, "step": 30516 }, { "epoch": 0.8909812852179498, "grad_norm": 0.7191589949461781, "learning_rate": 6.056772100567721e-07, "loss": 0.6358, "step": 30517 }, { "epoch": 0.8910104814457972, "grad_norm": 0.7030762619968481, "learning_rate": 6.055150040551501e-07, "loss": 0.5554, "step": 30518 }, { "epoch": 0.8910396776736446, "grad_norm": 0.7289213162866383, "learning_rate": 6.05352798053528e-07, "loss": 0.6441, "step": 30519 }, { "epoch": 0.8910688739014919, "grad_norm": 0.7250136202735383, "learning_rate": 6.05190592051906e-07, "loss": 0.6541, "step": 30520 }, { "epoch": 0.8910980701293393, "grad_norm": 0.7466071864363052, "learning_rate": 6.050283860502839e-07, "loss": 0.6094, "step": 30521 }, { "epoch": 0.8911272663571866, "grad_norm": 0.7080497162168978, "learning_rate": 6.048661800486619e-07, "loss": 0.6405, "step": 30522 }, { "epoch": 0.891156462585034, "grad_norm": 0.7983980215091935, "learning_rate": 6.047039740470398e-07, "loss": 0.6456, "step": 30523 }, { "epoch": 0.8911856588128814, "grad_norm": 0.7557624156435878, "learning_rate": 6.045417680454177e-07, "loss": 0.766, "step": 30524 }, { "epoch": 0.8912148550407287, "grad_norm": 0.7761524389180948, "learning_rate": 6.043795620437957e-07, "loss": 0.7508, "step": 30525 }, { "epoch": 0.8912440512685761, "grad_norm": 0.6894076940637637, "learning_rate": 6.042173560421736e-07, "loss": 0.5725, "step": 30526 }, { "epoch": 0.8912732474964234, "grad_norm": 0.7848574451083054, "learning_rate": 6.040551500405516e-07, "loss": 0.7231, "step": 30527 }, { "epoch": 0.8913024437242708, "grad_norm": 0.7137216077809853, "learning_rate": 6.038929440389295e-07, "loss": 0.589, "step": 30528 }, { "epoch": 0.8913316399521182, "grad_norm": 0.7304912283328825, "learning_rate": 6.037307380373075e-07, "loss": 0.6542, "step": 30529 }, { "epoch": 0.8913608361799655, "grad_norm": 0.7535221967056982, "learning_rate": 6.035685320356854e-07, "loss": 0.6894, "step": 30530 }, { "epoch": 0.8913900324078129, "grad_norm": 0.7199019886706515, "learning_rate": 6.034063260340633e-07, "loss": 0.6303, "step": 30531 }, { "epoch": 0.8914192286356603, "grad_norm": 0.7285369891164191, "learning_rate": 6.032441200324412e-07, "loss": 0.6338, "step": 30532 }, { "epoch": 0.8914484248635076, "grad_norm": 0.7611227176286371, "learning_rate": 6.030819140308192e-07, "loss": 0.5936, "step": 30533 }, { "epoch": 0.891477621091355, "grad_norm": 0.7104287142526947, "learning_rate": 6.029197080291971e-07, "loss": 0.6106, "step": 30534 }, { "epoch": 0.8915068173192023, "grad_norm": 0.7294403626883905, "learning_rate": 6.02757502027575e-07, "loss": 0.5972, "step": 30535 }, { "epoch": 0.8915360135470497, "grad_norm": 0.7487676913157674, "learning_rate": 6.02595296025953e-07, "loss": 0.6955, "step": 30536 }, { "epoch": 0.8915652097748971, "grad_norm": 0.7715109671769234, "learning_rate": 6.024330900243309e-07, "loss": 0.6641, "step": 30537 }, { "epoch": 0.8915944060027444, "grad_norm": 0.6888919218782833, "learning_rate": 6.022708840227088e-07, "loss": 0.624, "step": 30538 }, { "epoch": 0.8916236022305918, "grad_norm": 0.7462670546523609, "learning_rate": 6.021086780210868e-07, "loss": 0.6491, "step": 30539 }, { "epoch": 0.8916527984584391, "grad_norm": 0.706871761942368, "learning_rate": 6.019464720194647e-07, "loss": 0.6105, "step": 30540 }, { "epoch": 0.8916819946862865, "grad_norm": 0.7552526276835483, "learning_rate": 6.017842660178427e-07, "loss": 0.7068, "step": 30541 }, { "epoch": 0.8917111909141339, "grad_norm": 0.6608307942772, "learning_rate": 6.016220600162206e-07, "loss": 0.5321, "step": 30542 }, { "epoch": 0.8917403871419812, "grad_norm": 0.6863484279816213, "learning_rate": 6.014598540145987e-07, "loss": 0.5784, "step": 30543 }, { "epoch": 0.8917695833698286, "grad_norm": 0.770603389224821, "learning_rate": 6.012976480129766e-07, "loss": 0.7028, "step": 30544 }, { "epoch": 0.891798779597676, "grad_norm": 0.6873295775809958, "learning_rate": 6.011354420113545e-07, "loss": 0.5868, "step": 30545 }, { "epoch": 0.8918279758255233, "grad_norm": 0.8099469988722446, "learning_rate": 6.009732360097325e-07, "loss": 0.733, "step": 30546 }, { "epoch": 0.8918571720533707, "grad_norm": 0.7347386640196629, "learning_rate": 6.008110300081104e-07, "loss": 0.6317, "step": 30547 }, { "epoch": 0.891886368281218, "grad_norm": 0.686245186157282, "learning_rate": 6.006488240064883e-07, "loss": 0.5685, "step": 30548 }, { "epoch": 0.8919155645090654, "grad_norm": 0.6630057814095635, "learning_rate": 6.004866180048663e-07, "loss": 0.5624, "step": 30549 }, { "epoch": 0.8919447607369128, "grad_norm": 0.727613962922094, "learning_rate": 6.003244120032442e-07, "loss": 0.6579, "step": 30550 }, { "epoch": 0.8919739569647601, "grad_norm": 0.7088786952180883, "learning_rate": 6.001622060016221e-07, "loss": 0.5745, "step": 30551 }, { "epoch": 0.8920031531926075, "grad_norm": 0.7631668804647365, "learning_rate": 6.000000000000001e-07, "loss": 0.7133, "step": 30552 }, { "epoch": 0.8920323494204548, "grad_norm": 0.6547847952301132, "learning_rate": 5.99837793998378e-07, "loss": 0.5332, "step": 30553 }, { "epoch": 0.8920615456483022, "grad_norm": 0.7077111235239381, "learning_rate": 5.996755879967559e-07, "loss": 0.6351, "step": 30554 }, { "epoch": 0.8920907418761496, "grad_norm": 0.785025817749676, "learning_rate": 5.995133819951339e-07, "loss": 0.6987, "step": 30555 }, { "epoch": 0.8921199381039969, "grad_norm": 0.6864483048613841, "learning_rate": 5.993511759935118e-07, "loss": 0.5425, "step": 30556 }, { "epoch": 0.8921491343318443, "grad_norm": 0.7416497248397713, "learning_rate": 5.991889699918897e-07, "loss": 0.659, "step": 30557 }, { "epoch": 0.8921783305596916, "grad_norm": 0.6562256923706, "learning_rate": 5.990267639902677e-07, "loss": 0.493, "step": 30558 }, { "epoch": 0.892207526787539, "grad_norm": 0.8136868389715086, "learning_rate": 5.988645579886456e-07, "loss": 0.7368, "step": 30559 }, { "epoch": 0.8922367230153864, "grad_norm": 0.7578012789478884, "learning_rate": 5.987023519870236e-07, "loss": 0.7037, "step": 30560 }, { "epoch": 0.8922659192432337, "grad_norm": 0.6777839165281426, "learning_rate": 5.985401459854015e-07, "loss": 0.5609, "step": 30561 }, { "epoch": 0.8922951154710811, "grad_norm": 0.7357265191610293, "learning_rate": 5.983779399837795e-07, "loss": 0.6945, "step": 30562 }, { "epoch": 0.8923243116989285, "grad_norm": 0.7177614943283364, "learning_rate": 5.982157339821574e-07, "loss": 0.6543, "step": 30563 }, { "epoch": 0.8923535079267758, "grad_norm": 0.7258735974656265, "learning_rate": 5.980535279805353e-07, "loss": 0.5789, "step": 30564 }, { "epoch": 0.8923827041546232, "grad_norm": 0.7193068450400296, "learning_rate": 5.978913219789133e-07, "loss": 0.6377, "step": 30565 }, { "epoch": 0.8924119003824705, "grad_norm": 0.8056332994796556, "learning_rate": 5.977291159772912e-07, "loss": 0.6973, "step": 30566 }, { "epoch": 0.8924410966103179, "grad_norm": 0.7830860546578785, "learning_rate": 5.975669099756691e-07, "loss": 0.7293, "step": 30567 }, { "epoch": 0.8924702928381653, "grad_norm": 0.7406276034878488, "learning_rate": 5.974047039740471e-07, "loss": 0.6531, "step": 30568 }, { "epoch": 0.8924994890660126, "grad_norm": 0.6693982267360294, "learning_rate": 5.97242497972425e-07, "loss": 0.5419, "step": 30569 }, { "epoch": 0.89252868529386, "grad_norm": 0.7121025912162064, "learning_rate": 5.970802919708029e-07, "loss": 0.5963, "step": 30570 }, { "epoch": 0.8925578815217073, "grad_norm": 0.7447466876074438, "learning_rate": 5.969180859691808e-07, "loss": 0.6295, "step": 30571 }, { "epoch": 0.8925870777495547, "grad_norm": 0.6799558936449206, "learning_rate": 5.967558799675588e-07, "loss": 0.5674, "step": 30572 }, { "epoch": 0.8926162739774022, "grad_norm": 0.7895505373977488, "learning_rate": 5.965936739659367e-07, "loss": 0.7885, "step": 30573 }, { "epoch": 0.8926454702052495, "grad_norm": 0.7287081414890577, "learning_rate": 5.964314679643147e-07, "loss": 0.5638, "step": 30574 }, { "epoch": 0.8926746664330969, "grad_norm": 0.7090537380275217, "learning_rate": 5.962692619626926e-07, "loss": 0.6307, "step": 30575 }, { "epoch": 0.8927038626609443, "grad_norm": 0.7466680082872621, "learning_rate": 5.961070559610705e-07, "loss": 0.6709, "step": 30576 }, { "epoch": 0.8927330588887916, "grad_norm": 0.721248053621322, "learning_rate": 5.959448499594486e-07, "loss": 0.6511, "step": 30577 }, { "epoch": 0.892762255116639, "grad_norm": 0.7050287478248406, "learning_rate": 5.957826439578265e-07, "loss": 0.6434, "step": 30578 }, { "epoch": 0.8927914513444863, "grad_norm": 0.6871227556548892, "learning_rate": 5.956204379562045e-07, "loss": 0.5954, "step": 30579 }, { "epoch": 0.8928206475723337, "grad_norm": 0.6877590567338077, "learning_rate": 5.954582319545824e-07, "loss": 0.5984, "step": 30580 }, { "epoch": 0.8928498438001811, "grad_norm": 0.6965802989063609, "learning_rate": 5.952960259529604e-07, "loss": 0.575, "step": 30581 }, { "epoch": 0.8928790400280284, "grad_norm": 0.7001713482704555, "learning_rate": 5.951338199513383e-07, "loss": 0.6063, "step": 30582 }, { "epoch": 0.8929082362558758, "grad_norm": 0.7309143367778392, "learning_rate": 5.949716139497162e-07, "loss": 0.621, "step": 30583 }, { "epoch": 0.8929374324837231, "grad_norm": 0.6591557890919989, "learning_rate": 5.948094079480942e-07, "loss": 0.5549, "step": 30584 }, { "epoch": 0.8929666287115705, "grad_norm": 0.7005790553947333, "learning_rate": 5.946472019464721e-07, "loss": 0.5669, "step": 30585 }, { "epoch": 0.8929958249394179, "grad_norm": 0.751612957495059, "learning_rate": 5.9448499594485e-07, "loss": 0.7309, "step": 30586 }, { "epoch": 0.8930250211672652, "grad_norm": 0.7120085928702812, "learning_rate": 5.94322789943228e-07, "loss": 0.6375, "step": 30587 }, { "epoch": 0.8930542173951126, "grad_norm": 0.7245088584146298, "learning_rate": 5.941605839416059e-07, "loss": 0.6504, "step": 30588 }, { "epoch": 0.89308341362296, "grad_norm": 0.7233786730477004, "learning_rate": 5.939983779399838e-07, "loss": 0.6126, "step": 30589 }, { "epoch": 0.8931126098508073, "grad_norm": 0.753774026383831, "learning_rate": 5.938361719383617e-07, "loss": 0.7141, "step": 30590 }, { "epoch": 0.8931418060786547, "grad_norm": 0.9433613395906917, "learning_rate": 5.936739659367397e-07, "loss": 0.7168, "step": 30591 }, { "epoch": 0.893171002306502, "grad_norm": 0.6818202446833865, "learning_rate": 5.935117599351176e-07, "loss": 0.5868, "step": 30592 }, { "epoch": 0.8932001985343494, "grad_norm": 0.7403570762957536, "learning_rate": 5.933495539334956e-07, "loss": 0.6466, "step": 30593 }, { "epoch": 0.8932293947621968, "grad_norm": 0.7230538574954734, "learning_rate": 5.931873479318735e-07, "loss": 0.6201, "step": 30594 }, { "epoch": 0.8932585909900441, "grad_norm": 0.6829343412816524, "learning_rate": 5.930251419302515e-07, "loss": 0.5665, "step": 30595 }, { "epoch": 0.8932877872178915, "grad_norm": 0.7569411393930492, "learning_rate": 5.928629359286294e-07, "loss": 0.5958, "step": 30596 }, { "epoch": 0.8933169834457388, "grad_norm": 0.7371995110717254, "learning_rate": 5.927007299270073e-07, "loss": 0.6647, "step": 30597 }, { "epoch": 0.8933461796735862, "grad_norm": 0.7603472565102514, "learning_rate": 5.925385239253853e-07, "loss": 0.6944, "step": 30598 }, { "epoch": 0.8933753759014336, "grad_norm": 0.7776608399241344, "learning_rate": 5.923763179237632e-07, "loss": 0.6828, "step": 30599 }, { "epoch": 0.8934045721292809, "grad_norm": 0.6482345702821857, "learning_rate": 5.922141119221412e-07, "loss": 0.5189, "step": 30600 }, { "epoch": 0.8934337683571283, "grad_norm": 0.7368257746606649, "learning_rate": 5.920519059205191e-07, "loss": 0.6828, "step": 30601 }, { "epoch": 0.8934629645849756, "grad_norm": 0.7120797757306512, "learning_rate": 5.91889699918897e-07, "loss": 0.6012, "step": 30602 }, { "epoch": 0.893492160812823, "grad_norm": 0.7353913379699772, "learning_rate": 5.91727493917275e-07, "loss": 0.6775, "step": 30603 }, { "epoch": 0.8935213570406704, "grad_norm": 0.7971315487763885, "learning_rate": 5.915652879156529e-07, "loss": 0.7595, "step": 30604 }, { "epoch": 0.8935505532685177, "grad_norm": 0.7608656682580871, "learning_rate": 5.914030819140308e-07, "loss": 0.7061, "step": 30605 }, { "epoch": 0.8935797494963651, "grad_norm": 0.7354901376027666, "learning_rate": 5.912408759124088e-07, "loss": 0.6334, "step": 30606 }, { "epoch": 0.8936089457242125, "grad_norm": 0.7594490047017718, "learning_rate": 5.910786699107867e-07, "loss": 0.6953, "step": 30607 }, { "epoch": 0.8936381419520598, "grad_norm": 0.7142817724905806, "learning_rate": 5.909164639091646e-07, "loss": 0.6564, "step": 30608 }, { "epoch": 0.8936673381799072, "grad_norm": 0.8443093225294507, "learning_rate": 5.907542579075425e-07, "loss": 0.6667, "step": 30609 }, { "epoch": 0.8936965344077545, "grad_norm": 0.9179991752223747, "learning_rate": 5.905920519059206e-07, "loss": 0.7367, "step": 30610 }, { "epoch": 0.8937257306356019, "grad_norm": 0.7023187544250884, "learning_rate": 5.904298459042985e-07, "loss": 0.6153, "step": 30611 }, { "epoch": 0.8937549268634493, "grad_norm": 0.7212632249761249, "learning_rate": 5.902676399026765e-07, "loss": 0.635, "step": 30612 }, { "epoch": 0.8937841230912966, "grad_norm": 0.6809294441749525, "learning_rate": 5.901054339010544e-07, "loss": 0.5832, "step": 30613 }, { "epoch": 0.893813319319144, "grad_norm": 0.7636736616812047, "learning_rate": 5.899432278994324e-07, "loss": 0.5899, "step": 30614 }, { "epoch": 0.8938425155469913, "grad_norm": 0.6794548827293969, "learning_rate": 5.897810218978103e-07, "loss": 0.5909, "step": 30615 }, { "epoch": 0.8938717117748387, "grad_norm": 0.7484519622698174, "learning_rate": 5.896188158961882e-07, "loss": 0.6844, "step": 30616 }, { "epoch": 0.8939009080026861, "grad_norm": 0.6835908020059592, "learning_rate": 5.894566098945662e-07, "loss": 0.5872, "step": 30617 }, { "epoch": 0.8939301042305334, "grad_norm": 0.7517265816956292, "learning_rate": 5.892944038929441e-07, "loss": 0.6851, "step": 30618 }, { "epoch": 0.8939593004583808, "grad_norm": 0.7157789778253514, "learning_rate": 5.891321978913221e-07, "loss": 0.6246, "step": 30619 }, { "epoch": 0.8939884966862282, "grad_norm": 0.6885718936099148, "learning_rate": 5.889699918897e-07, "loss": 0.5729, "step": 30620 }, { "epoch": 0.8940176929140755, "grad_norm": 0.732194701148942, "learning_rate": 5.888077858880779e-07, "loss": 0.6742, "step": 30621 }, { "epoch": 0.8940468891419229, "grad_norm": 0.7387288801670813, "learning_rate": 5.886455798864559e-07, "loss": 0.6256, "step": 30622 }, { "epoch": 0.8940760853697702, "grad_norm": 0.6811686159217335, "learning_rate": 5.884833738848338e-07, "loss": 0.6152, "step": 30623 }, { "epoch": 0.8941052815976176, "grad_norm": 0.7467703997306626, "learning_rate": 5.883211678832117e-07, "loss": 0.5811, "step": 30624 }, { "epoch": 0.894134477825465, "grad_norm": 0.6935170901349597, "learning_rate": 5.881589618815896e-07, "loss": 0.5704, "step": 30625 }, { "epoch": 0.8941636740533123, "grad_norm": 0.7637212277457751, "learning_rate": 5.879967558799676e-07, "loss": 0.6305, "step": 30626 }, { "epoch": 0.8941928702811597, "grad_norm": 0.6972095669284225, "learning_rate": 5.878345498783455e-07, "loss": 0.6367, "step": 30627 }, { "epoch": 0.894222066509007, "grad_norm": 0.6759150950911704, "learning_rate": 5.876723438767235e-07, "loss": 0.5724, "step": 30628 }, { "epoch": 0.8942512627368544, "grad_norm": 0.7699146113148432, "learning_rate": 5.875101378751014e-07, "loss": 0.6984, "step": 30629 }, { "epoch": 0.8942804589647018, "grad_norm": 0.7209196942713484, "learning_rate": 5.873479318734793e-07, "loss": 0.6267, "step": 30630 }, { "epoch": 0.8943096551925491, "grad_norm": 0.6751153307727288, "learning_rate": 5.871857258718573e-07, "loss": 0.561, "step": 30631 }, { "epoch": 0.8943388514203965, "grad_norm": 0.7342635198581946, "learning_rate": 5.870235198702352e-07, "loss": 0.6692, "step": 30632 }, { "epoch": 0.8943680476482438, "grad_norm": 0.7050227101473407, "learning_rate": 5.868613138686132e-07, "loss": 0.6211, "step": 30633 }, { "epoch": 0.8943972438760912, "grad_norm": 0.7254431286979403, "learning_rate": 5.866991078669911e-07, "loss": 0.6515, "step": 30634 }, { "epoch": 0.8944264401039386, "grad_norm": 0.7118950909025188, "learning_rate": 5.86536901865369e-07, "loss": 0.5726, "step": 30635 }, { "epoch": 0.8944556363317859, "grad_norm": 0.7904142867507791, "learning_rate": 5.86374695863747e-07, "loss": 0.6716, "step": 30636 }, { "epoch": 0.8944848325596333, "grad_norm": 0.7319692375002451, "learning_rate": 5.862124898621249e-07, "loss": 0.605, "step": 30637 }, { "epoch": 0.8945140287874807, "grad_norm": 0.7721213208884594, "learning_rate": 5.860502838605029e-07, "loss": 0.6969, "step": 30638 }, { "epoch": 0.894543225015328, "grad_norm": 0.7084729151994331, "learning_rate": 5.858880778588808e-07, "loss": 0.6114, "step": 30639 }, { "epoch": 0.8945724212431754, "grad_norm": 0.7195497652886894, "learning_rate": 5.857258718572587e-07, "loss": 0.6234, "step": 30640 }, { "epoch": 0.8946016174710227, "grad_norm": 0.7325962021094481, "learning_rate": 5.855636658556368e-07, "loss": 0.6355, "step": 30641 }, { "epoch": 0.8946308136988701, "grad_norm": 0.8041821170627231, "learning_rate": 5.854014598540147e-07, "loss": 0.6973, "step": 30642 }, { "epoch": 0.8946600099267175, "grad_norm": 0.7830731151055375, "learning_rate": 5.852392538523926e-07, "loss": 0.7179, "step": 30643 }, { "epoch": 0.8946892061545648, "grad_norm": 0.6945121054476628, "learning_rate": 5.850770478507705e-07, "loss": 0.5853, "step": 30644 }, { "epoch": 0.8947184023824122, "grad_norm": 0.7179273200832407, "learning_rate": 5.849148418491485e-07, "loss": 0.6265, "step": 30645 }, { "epoch": 0.8947475986102595, "grad_norm": 0.6848490106084802, "learning_rate": 5.847526358475264e-07, "loss": 0.5889, "step": 30646 }, { "epoch": 0.8947767948381069, "grad_norm": 0.7153984062248995, "learning_rate": 5.845904298459044e-07, "loss": 0.6308, "step": 30647 }, { "epoch": 0.8948059910659543, "grad_norm": 0.7058251784704399, "learning_rate": 5.844282238442823e-07, "loss": 0.5795, "step": 30648 }, { "epoch": 0.8948351872938016, "grad_norm": 0.7096947020946273, "learning_rate": 5.842660178426602e-07, "loss": 0.6516, "step": 30649 }, { "epoch": 0.894864383521649, "grad_norm": 0.7493920460834983, "learning_rate": 5.841038118410382e-07, "loss": 0.7003, "step": 30650 }, { "epoch": 0.8948935797494963, "grad_norm": 0.7906130074449732, "learning_rate": 5.839416058394161e-07, "loss": 0.6954, "step": 30651 }, { "epoch": 0.8949227759773437, "grad_norm": 0.709701278410031, "learning_rate": 5.837793998377941e-07, "loss": 0.6344, "step": 30652 }, { "epoch": 0.8949519722051911, "grad_norm": 0.8542153089411388, "learning_rate": 5.83617193836172e-07, "loss": 0.6903, "step": 30653 }, { "epoch": 0.8949811684330384, "grad_norm": 0.7042237986650229, "learning_rate": 5.8345498783455e-07, "loss": 0.672, "step": 30654 }, { "epoch": 0.8950103646608858, "grad_norm": 0.7149562864750988, "learning_rate": 5.832927818329279e-07, "loss": 0.5932, "step": 30655 }, { "epoch": 0.8950395608887332, "grad_norm": 0.7002682907714332, "learning_rate": 5.831305758313058e-07, "loss": 0.6347, "step": 30656 }, { "epoch": 0.8950687571165805, "grad_norm": 0.7809637034748069, "learning_rate": 5.829683698296838e-07, "loss": 0.7923, "step": 30657 }, { "epoch": 0.8950979533444279, "grad_norm": 0.7129361699435626, "learning_rate": 5.828061638280617e-07, "loss": 0.6089, "step": 30658 }, { "epoch": 0.8951271495722752, "grad_norm": 0.7236065467509764, "learning_rate": 5.826439578264396e-07, "loss": 0.6246, "step": 30659 }, { "epoch": 0.8951563458001226, "grad_norm": 0.7035335926594174, "learning_rate": 5.824817518248175e-07, "loss": 0.6258, "step": 30660 }, { "epoch": 0.89518554202797, "grad_norm": 0.7404899533947523, "learning_rate": 5.823195458231955e-07, "loss": 0.6194, "step": 30661 }, { "epoch": 0.8952147382558173, "grad_norm": 0.7330491716988854, "learning_rate": 5.821573398215734e-07, "loss": 0.6433, "step": 30662 }, { "epoch": 0.8952439344836647, "grad_norm": 0.7225524793468632, "learning_rate": 5.819951338199513e-07, "loss": 0.6143, "step": 30663 }, { "epoch": 0.895273130711512, "grad_norm": 0.7462722493981789, "learning_rate": 5.818329278183293e-07, "loss": 0.6544, "step": 30664 }, { "epoch": 0.8953023269393594, "grad_norm": 0.7292426052270089, "learning_rate": 5.816707218167072e-07, "loss": 0.6201, "step": 30665 }, { "epoch": 0.8953315231672068, "grad_norm": 0.7650509635013094, "learning_rate": 5.815085158150852e-07, "loss": 0.5774, "step": 30666 }, { "epoch": 0.8953607193950541, "grad_norm": 0.6928788635757024, "learning_rate": 5.813463098134631e-07, "loss": 0.5574, "step": 30667 }, { "epoch": 0.8953899156229015, "grad_norm": 0.7037864854899445, "learning_rate": 5.81184103811841e-07, "loss": 0.6097, "step": 30668 }, { "epoch": 0.8954191118507489, "grad_norm": 0.7366767836972034, "learning_rate": 5.81021897810219e-07, "loss": 0.6822, "step": 30669 }, { "epoch": 0.8954483080785962, "grad_norm": 0.7798928237911593, "learning_rate": 5.808596918085969e-07, "loss": 0.624, "step": 30670 }, { "epoch": 0.8954775043064436, "grad_norm": 0.6950772696698434, "learning_rate": 5.806974858069749e-07, "loss": 0.5882, "step": 30671 }, { "epoch": 0.8955067005342909, "grad_norm": 0.723513182870959, "learning_rate": 5.805352798053528e-07, "loss": 0.661, "step": 30672 }, { "epoch": 0.8955358967621383, "grad_norm": 0.7239716716265326, "learning_rate": 5.803730738037309e-07, "loss": 0.6274, "step": 30673 }, { "epoch": 0.8955650929899857, "grad_norm": 0.7683734260489721, "learning_rate": 5.802108678021088e-07, "loss": 0.7278, "step": 30674 }, { "epoch": 0.895594289217833, "grad_norm": 0.7296612396221906, "learning_rate": 5.800486618004867e-07, "loss": 0.6342, "step": 30675 }, { "epoch": 0.8956234854456804, "grad_norm": 0.7199275431625122, "learning_rate": 5.798864557988647e-07, "loss": 0.5825, "step": 30676 }, { "epoch": 0.8956526816735277, "grad_norm": 0.6608338882881373, "learning_rate": 5.797242497972426e-07, "loss": 0.5301, "step": 30677 }, { "epoch": 0.8956818779013751, "grad_norm": 0.7377877305647959, "learning_rate": 5.795620437956205e-07, "loss": 0.656, "step": 30678 }, { "epoch": 0.8957110741292225, "grad_norm": 0.6608788932124239, "learning_rate": 5.793998377939984e-07, "loss": 0.5701, "step": 30679 }, { "epoch": 0.8957402703570698, "grad_norm": 0.67746652129782, "learning_rate": 5.792376317923764e-07, "loss": 0.5448, "step": 30680 }, { "epoch": 0.8957694665849172, "grad_norm": 0.6663337460934916, "learning_rate": 5.790754257907543e-07, "loss": 0.5766, "step": 30681 }, { "epoch": 0.8957986628127645, "grad_norm": 0.7884583310160284, "learning_rate": 5.789132197891322e-07, "loss": 0.6878, "step": 30682 }, { "epoch": 0.8958278590406119, "grad_norm": 0.6987009013371227, "learning_rate": 5.787510137875102e-07, "loss": 0.5804, "step": 30683 }, { "epoch": 0.8958570552684593, "grad_norm": 0.7257904664059153, "learning_rate": 5.785888077858881e-07, "loss": 0.6787, "step": 30684 }, { "epoch": 0.8958862514963066, "grad_norm": 0.6883760318806035, "learning_rate": 5.784266017842661e-07, "loss": 0.5396, "step": 30685 }, { "epoch": 0.895915447724154, "grad_norm": 0.7443327999266579, "learning_rate": 5.78264395782644e-07, "loss": 0.668, "step": 30686 }, { "epoch": 0.8959446439520014, "grad_norm": 0.7555520608170991, "learning_rate": 5.78102189781022e-07, "loss": 0.6506, "step": 30687 }, { "epoch": 0.8959738401798487, "grad_norm": 0.6875878992924577, "learning_rate": 5.779399837793999e-07, "loss": 0.6049, "step": 30688 }, { "epoch": 0.8960030364076961, "grad_norm": 0.6552189552811657, "learning_rate": 5.777777777777778e-07, "loss": 0.5492, "step": 30689 }, { "epoch": 0.8960322326355434, "grad_norm": 0.7585790020619881, "learning_rate": 5.776155717761558e-07, "loss": 0.7391, "step": 30690 }, { "epoch": 0.8960614288633908, "grad_norm": 0.7862587394071882, "learning_rate": 5.774533657745337e-07, "loss": 0.7378, "step": 30691 }, { "epoch": 0.8960906250912382, "grad_norm": 0.7934493052763825, "learning_rate": 5.772911597729117e-07, "loss": 0.7156, "step": 30692 }, { "epoch": 0.8961198213190855, "grad_norm": 0.7572815602087906, "learning_rate": 5.771289537712896e-07, "loss": 0.6476, "step": 30693 }, { "epoch": 0.896149017546933, "grad_norm": 0.8157264698640753, "learning_rate": 5.769667477696675e-07, "loss": 0.7306, "step": 30694 }, { "epoch": 0.8961782137747804, "grad_norm": 0.732117786464597, "learning_rate": 5.768045417680455e-07, "loss": 0.6347, "step": 30695 }, { "epoch": 0.8962074100026277, "grad_norm": 0.7357841484712416, "learning_rate": 5.766423357664234e-07, "loss": 0.6635, "step": 30696 }, { "epoch": 0.8962366062304751, "grad_norm": 0.7266753511994624, "learning_rate": 5.764801297648013e-07, "loss": 0.6292, "step": 30697 }, { "epoch": 0.8962658024583224, "grad_norm": 0.7074639894114803, "learning_rate": 5.763179237631792e-07, "loss": 0.6101, "step": 30698 }, { "epoch": 0.8962949986861698, "grad_norm": 0.7462203880985736, "learning_rate": 5.761557177615572e-07, "loss": 0.6622, "step": 30699 }, { "epoch": 0.8963241949140172, "grad_norm": 0.6772854799336373, "learning_rate": 5.759935117599351e-07, "loss": 0.57, "step": 30700 }, { "epoch": 0.8963533911418645, "grad_norm": 0.7440577531122187, "learning_rate": 5.75831305758313e-07, "loss": 0.648, "step": 30701 }, { "epoch": 0.8963825873697119, "grad_norm": 0.7261001579997445, "learning_rate": 5.75669099756691e-07, "loss": 0.6545, "step": 30702 }, { "epoch": 0.8964117835975592, "grad_norm": 0.6867877357935868, "learning_rate": 5.755068937550689e-07, "loss": 0.5822, "step": 30703 }, { "epoch": 0.8964409798254066, "grad_norm": 0.7616072355521515, "learning_rate": 5.75344687753447e-07, "loss": 0.6979, "step": 30704 }, { "epoch": 0.896470176053254, "grad_norm": 0.6622536764644025, "learning_rate": 5.751824817518248e-07, "loss": 0.5421, "step": 30705 }, { "epoch": 0.8964993722811013, "grad_norm": 0.7526649723238776, "learning_rate": 5.750202757502029e-07, "loss": 0.6856, "step": 30706 }, { "epoch": 0.8965285685089487, "grad_norm": 0.7290612474971325, "learning_rate": 5.748580697485808e-07, "loss": 0.6413, "step": 30707 }, { "epoch": 0.896557764736796, "grad_norm": 0.7225697449459308, "learning_rate": 5.746958637469587e-07, "loss": 0.6526, "step": 30708 }, { "epoch": 0.8965869609646434, "grad_norm": 0.7276072056209267, "learning_rate": 5.745336577453367e-07, "loss": 0.6158, "step": 30709 }, { "epoch": 0.8966161571924908, "grad_norm": 0.7305053017507185, "learning_rate": 5.743714517437146e-07, "loss": 0.6351, "step": 30710 }, { "epoch": 0.8966453534203381, "grad_norm": 0.7472745342951506, "learning_rate": 5.742092457420926e-07, "loss": 0.6069, "step": 30711 }, { "epoch": 0.8966745496481855, "grad_norm": 0.6753295427032238, "learning_rate": 5.740470397404705e-07, "loss": 0.5812, "step": 30712 }, { "epoch": 0.8967037458760329, "grad_norm": 0.7290232978997221, "learning_rate": 5.738848337388484e-07, "loss": 0.63, "step": 30713 }, { "epoch": 0.8967329421038802, "grad_norm": 0.6953599930132243, "learning_rate": 5.737226277372263e-07, "loss": 0.5676, "step": 30714 }, { "epoch": 0.8967621383317276, "grad_norm": 0.7343902816249915, "learning_rate": 5.735604217356043e-07, "loss": 0.6331, "step": 30715 }, { "epoch": 0.8967913345595749, "grad_norm": 0.787803642722342, "learning_rate": 5.733982157339822e-07, "loss": 0.7243, "step": 30716 }, { "epoch": 0.8968205307874223, "grad_norm": 0.8043739360349158, "learning_rate": 5.732360097323601e-07, "loss": 0.731, "step": 30717 }, { "epoch": 0.8968497270152697, "grad_norm": 0.8009008124658525, "learning_rate": 5.730738037307381e-07, "loss": 0.7676, "step": 30718 }, { "epoch": 0.896878923243117, "grad_norm": 0.697713372205671, "learning_rate": 5.72911597729116e-07, "loss": 0.614, "step": 30719 }, { "epoch": 0.8969081194709644, "grad_norm": 0.9292174547721224, "learning_rate": 5.72749391727494e-07, "loss": 0.6362, "step": 30720 }, { "epoch": 0.8969373156988117, "grad_norm": 0.7453004998884342, "learning_rate": 5.725871857258719e-07, "loss": 0.7083, "step": 30721 }, { "epoch": 0.8969665119266591, "grad_norm": 0.7324876102575147, "learning_rate": 5.724249797242498e-07, "loss": 0.6687, "step": 30722 }, { "epoch": 0.8969957081545065, "grad_norm": 0.7333196079771843, "learning_rate": 5.722627737226278e-07, "loss": 0.6452, "step": 30723 }, { "epoch": 0.8970249043823538, "grad_norm": 0.6611295635656829, "learning_rate": 5.721005677210057e-07, "loss": 0.5516, "step": 30724 }, { "epoch": 0.8970541006102012, "grad_norm": 0.7081098406814451, "learning_rate": 5.719383617193837e-07, "loss": 0.6219, "step": 30725 }, { "epoch": 0.8970832968380485, "grad_norm": 0.680892855376451, "learning_rate": 5.717761557177616e-07, "loss": 0.5794, "step": 30726 }, { "epoch": 0.8971124930658959, "grad_norm": 0.7479767701325936, "learning_rate": 5.716139497161395e-07, "loss": 0.6622, "step": 30727 }, { "epoch": 0.8971416892937433, "grad_norm": 0.6780121794357646, "learning_rate": 5.714517437145175e-07, "loss": 0.576, "step": 30728 }, { "epoch": 0.8971708855215906, "grad_norm": 0.7246091645362706, "learning_rate": 5.712895377128954e-07, "loss": 0.638, "step": 30729 }, { "epoch": 0.897200081749438, "grad_norm": 0.7189195754108474, "learning_rate": 5.711273317112734e-07, "loss": 0.6111, "step": 30730 }, { "epoch": 0.8972292779772854, "grad_norm": 0.6797025177918515, "learning_rate": 5.709651257096513e-07, "loss": 0.6192, "step": 30731 }, { "epoch": 0.8972584742051327, "grad_norm": 0.7018611554810739, "learning_rate": 5.708029197080292e-07, "loss": 0.6096, "step": 30732 }, { "epoch": 0.8972876704329801, "grad_norm": 0.706087342154259, "learning_rate": 5.706407137064071e-07, "loss": 0.5823, "step": 30733 }, { "epoch": 0.8973168666608274, "grad_norm": 0.7069247466667932, "learning_rate": 5.704785077047851e-07, "loss": 0.635, "step": 30734 }, { "epoch": 0.8973460628886748, "grad_norm": 0.7778451534973432, "learning_rate": 5.70316301703163e-07, "loss": 0.7136, "step": 30735 }, { "epoch": 0.8973752591165222, "grad_norm": 0.7030342123677329, "learning_rate": 5.701540957015409e-07, "loss": 0.6327, "step": 30736 }, { "epoch": 0.8974044553443695, "grad_norm": 0.7379079969064052, "learning_rate": 5.69991889699919e-07, "loss": 0.6879, "step": 30737 }, { "epoch": 0.8974336515722169, "grad_norm": 0.7407296877345229, "learning_rate": 5.698296836982968e-07, "loss": 0.6334, "step": 30738 }, { "epoch": 0.8974628478000642, "grad_norm": 0.7035366928378757, "learning_rate": 5.696674776966749e-07, "loss": 0.6438, "step": 30739 }, { "epoch": 0.8974920440279116, "grad_norm": 0.6840482153821666, "learning_rate": 5.695052716950528e-07, "loss": 0.5871, "step": 30740 }, { "epoch": 0.897521240255759, "grad_norm": 0.6823234585768168, "learning_rate": 5.693430656934307e-07, "loss": 0.5988, "step": 30741 }, { "epoch": 0.8975504364836063, "grad_norm": 0.7056971057731961, "learning_rate": 5.691808596918087e-07, "loss": 0.6063, "step": 30742 }, { "epoch": 0.8975796327114537, "grad_norm": 0.735840525975947, "learning_rate": 5.690186536901866e-07, "loss": 0.6339, "step": 30743 }, { "epoch": 0.897608828939301, "grad_norm": 0.7140534993537239, "learning_rate": 5.688564476885646e-07, "loss": 0.604, "step": 30744 }, { "epoch": 0.8976380251671484, "grad_norm": 0.7225197944506055, "learning_rate": 5.686942416869425e-07, "loss": 0.6523, "step": 30745 }, { "epoch": 0.8976672213949958, "grad_norm": 0.6898245315465426, "learning_rate": 5.685320356853205e-07, "loss": 0.6083, "step": 30746 }, { "epoch": 0.8976964176228431, "grad_norm": 0.7143293458637847, "learning_rate": 5.683698296836984e-07, "loss": 0.5805, "step": 30747 }, { "epoch": 0.8977256138506905, "grad_norm": 0.7534434467937892, "learning_rate": 5.682076236820763e-07, "loss": 0.6706, "step": 30748 }, { "epoch": 0.8977548100785379, "grad_norm": 0.7397291699842855, "learning_rate": 5.680454176804543e-07, "loss": 0.6506, "step": 30749 }, { "epoch": 0.8977840063063852, "grad_norm": 0.6922847111870474, "learning_rate": 5.678832116788322e-07, "loss": 0.601, "step": 30750 }, { "epoch": 0.8978132025342326, "grad_norm": 0.7750024092960026, "learning_rate": 5.677210056772101e-07, "loss": 0.715, "step": 30751 }, { "epoch": 0.8978423987620799, "grad_norm": 0.7904476283150462, "learning_rate": 5.67558799675588e-07, "loss": 0.8009, "step": 30752 }, { "epoch": 0.8978715949899273, "grad_norm": 0.7880059846336195, "learning_rate": 5.67396593673966e-07, "loss": 0.6476, "step": 30753 }, { "epoch": 0.8979007912177747, "grad_norm": 0.7134622781712374, "learning_rate": 5.672343876723439e-07, "loss": 0.595, "step": 30754 }, { "epoch": 0.897929987445622, "grad_norm": 0.7311100558851095, "learning_rate": 5.670721816707218e-07, "loss": 0.6169, "step": 30755 }, { "epoch": 0.8979591836734694, "grad_norm": 0.7752022861673884, "learning_rate": 5.669099756690998e-07, "loss": 0.7233, "step": 30756 }, { "epoch": 0.8979883799013167, "grad_norm": 0.9436190848175371, "learning_rate": 5.667477696674777e-07, "loss": 0.6912, "step": 30757 }, { "epoch": 0.8980175761291641, "grad_norm": 0.7072626467600232, "learning_rate": 5.665855636658557e-07, "loss": 0.6414, "step": 30758 }, { "epoch": 0.8980467723570115, "grad_norm": 0.7526931948092281, "learning_rate": 5.664233576642336e-07, "loss": 0.6869, "step": 30759 }, { "epoch": 0.8980759685848588, "grad_norm": 0.7303784892756573, "learning_rate": 5.662611516626115e-07, "loss": 0.655, "step": 30760 }, { "epoch": 0.8981051648127062, "grad_norm": 0.6818292821336368, "learning_rate": 5.660989456609895e-07, "loss": 0.5694, "step": 30761 }, { "epoch": 0.8981343610405536, "grad_norm": 0.6832615626521301, "learning_rate": 5.659367396593674e-07, "loss": 0.5761, "step": 30762 }, { "epoch": 0.8981635572684009, "grad_norm": 0.6801475295887642, "learning_rate": 5.657745336577454e-07, "loss": 0.5833, "step": 30763 }, { "epoch": 0.8981927534962483, "grad_norm": 0.7117418623198196, "learning_rate": 5.656123276561233e-07, "loss": 0.6304, "step": 30764 }, { "epoch": 0.8982219497240956, "grad_norm": 0.6911009430566731, "learning_rate": 5.654501216545013e-07, "loss": 0.6067, "step": 30765 }, { "epoch": 0.898251145951943, "grad_norm": 0.6945891555898643, "learning_rate": 5.652879156528792e-07, "loss": 0.6083, "step": 30766 }, { "epoch": 0.8982803421797904, "grad_norm": 0.7532013342398002, "learning_rate": 5.651257096512571e-07, "loss": 0.7118, "step": 30767 }, { "epoch": 0.8983095384076377, "grad_norm": 0.7689081091273205, "learning_rate": 5.64963503649635e-07, "loss": 0.7216, "step": 30768 }, { "epoch": 0.8983387346354851, "grad_norm": 0.7568581475158763, "learning_rate": 5.64801297648013e-07, "loss": 0.702, "step": 30769 }, { "epoch": 0.8983679308633324, "grad_norm": 0.704846439082425, "learning_rate": 5.64639091646391e-07, "loss": 0.5786, "step": 30770 }, { "epoch": 0.8983971270911798, "grad_norm": 0.7706609860966688, "learning_rate": 5.644768856447689e-07, "loss": 0.6711, "step": 30771 }, { "epoch": 0.8984263233190272, "grad_norm": 0.699650449740796, "learning_rate": 5.643146796431469e-07, "loss": 0.6292, "step": 30772 }, { "epoch": 0.8984555195468745, "grad_norm": 0.7289936933468114, "learning_rate": 5.641524736415248e-07, "loss": 0.6608, "step": 30773 }, { "epoch": 0.8984847157747219, "grad_norm": 0.7453968792102947, "learning_rate": 5.639902676399027e-07, "loss": 0.636, "step": 30774 }, { "epoch": 0.8985139120025692, "grad_norm": 0.7376720627981291, "learning_rate": 5.638280616382807e-07, "loss": 0.6693, "step": 30775 }, { "epoch": 0.8985431082304166, "grad_norm": 0.6829175227914307, "learning_rate": 5.636658556366586e-07, "loss": 0.593, "step": 30776 }, { "epoch": 0.898572304458264, "grad_norm": 0.6856920746493829, "learning_rate": 5.635036496350366e-07, "loss": 0.5689, "step": 30777 }, { "epoch": 0.8986015006861113, "grad_norm": 0.7133029916098911, "learning_rate": 5.633414436334145e-07, "loss": 0.6513, "step": 30778 }, { "epoch": 0.8986306969139587, "grad_norm": 0.7384567660080485, "learning_rate": 5.631792376317925e-07, "loss": 0.6719, "step": 30779 }, { "epoch": 0.898659893141806, "grad_norm": 0.7185143583899073, "learning_rate": 5.630170316301704e-07, "loss": 0.6401, "step": 30780 }, { "epoch": 0.8986890893696534, "grad_norm": 0.7184044345753288, "learning_rate": 5.628548256285483e-07, "loss": 0.646, "step": 30781 }, { "epoch": 0.8987182855975008, "grad_norm": 0.7064135962734175, "learning_rate": 5.626926196269263e-07, "loss": 0.5998, "step": 30782 }, { "epoch": 0.8987474818253481, "grad_norm": 0.709392555876656, "learning_rate": 5.625304136253042e-07, "loss": 0.5773, "step": 30783 }, { "epoch": 0.8987766780531955, "grad_norm": 0.6624972653754615, "learning_rate": 5.623682076236822e-07, "loss": 0.5799, "step": 30784 }, { "epoch": 0.8988058742810429, "grad_norm": 0.7464168803141294, "learning_rate": 5.622060016220601e-07, "loss": 0.6532, "step": 30785 }, { "epoch": 0.8988350705088902, "grad_norm": 0.7265715582459149, "learning_rate": 5.62043795620438e-07, "loss": 0.6269, "step": 30786 }, { "epoch": 0.8988642667367376, "grad_norm": 0.726498371657431, "learning_rate": 5.618815896188159e-07, "loss": 0.6661, "step": 30787 }, { "epoch": 0.8988934629645849, "grad_norm": 0.6263718256361088, "learning_rate": 5.617193836171939e-07, "loss": 0.4926, "step": 30788 }, { "epoch": 0.8989226591924323, "grad_norm": 0.6698805499726399, "learning_rate": 5.615571776155718e-07, "loss": 0.5604, "step": 30789 }, { "epoch": 0.8989518554202797, "grad_norm": 0.8044120490028036, "learning_rate": 5.613949716139497e-07, "loss": 0.7519, "step": 30790 }, { "epoch": 0.898981051648127, "grad_norm": 0.7402782921681262, "learning_rate": 5.612327656123277e-07, "loss": 0.6682, "step": 30791 }, { "epoch": 0.8990102478759744, "grad_norm": 0.8653794965170907, "learning_rate": 5.610705596107056e-07, "loss": 0.7935, "step": 30792 }, { "epoch": 0.8990394441038217, "grad_norm": 0.6817308256328144, "learning_rate": 5.609083536090835e-07, "loss": 0.5476, "step": 30793 }, { "epoch": 0.8990686403316691, "grad_norm": 0.7174567397386317, "learning_rate": 5.607461476074615e-07, "loss": 0.6527, "step": 30794 }, { "epoch": 0.8990978365595165, "grad_norm": 0.6844927095260278, "learning_rate": 5.605839416058394e-07, "loss": 0.5442, "step": 30795 }, { "epoch": 0.8991270327873638, "grad_norm": 0.7165788733127848, "learning_rate": 5.604217356042174e-07, "loss": 0.6713, "step": 30796 }, { "epoch": 0.8991562290152112, "grad_norm": 0.6950896312887758, "learning_rate": 5.602595296025953e-07, "loss": 0.659, "step": 30797 }, { "epoch": 0.8991854252430586, "grad_norm": 0.6938158339702379, "learning_rate": 5.600973236009733e-07, "loss": 0.6148, "step": 30798 }, { "epoch": 0.8992146214709059, "grad_norm": 0.7063857306235884, "learning_rate": 5.599351175993512e-07, "loss": 0.6462, "step": 30799 }, { "epoch": 0.8992438176987533, "grad_norm": 0.7569857629249083, "learning_rate": 5.597729115977291e-07, "loss": 0.6813, "step": 30800 }, { "epoch": 0.8992730139266006, "grad_norm": 0.7720097575108517, "learning_rate": 5.596107055961071e-07, "loss": 0.7326, "step": 30801 }, { "epoch": 0.899302210154448, "grad_norm": 0.7135257389688269, "learning_rate": 5.59448499594485e-07, "loss": 0.6044, "step": 30802 }, { "epoch": 0.8993314063822954, "grad_norm": 0.7570705984454177, "learning_rate": 5.59286293592863e-07, "loss": 0.6542, "step": 30803 }, { "epoch": 0.8993606026101427, "grad_norm": 0.8075131129071305, "learning_rate": 5.59124087591241e-07, "loss": 0.7625, "step": 30804 }, { "epoch": 0.8993897988379901, "grad_norm": 0.7911788419116496, "learning_rate": 5.589618815896189e-07, "loss": 0.704, "step": 30805 }, { "epoch": 0.8994189950658374, "grad_norm": 0.7385962569293175, "learning_rate": 5.587996755879968e-07, "loss": 0.6287, "step": 30806 }, { "epoch": 0.8994481912936848, "grad_norm": 0.745515237287715, "learning_rate": 5.586374695863748e-07, "loss": 0.6396, "step": 30807 }, { "epoch": 0.8994773875215322, "grad_norm": 0.7354711130575259, "learning_rate": 5.584752635847527e-07, "loss": 0.6925, "step": 30808 }, { "epoch": 0.8995065837493795, "grad_norm": 0.7585668838937524, "learning_rate": 5.583130575831306e-07, "loss": 0.6604, "step": 30809 }, { "epoch": 0.8995357799772269, "grad_norm": 0.7555268094111064, "learning_rate": 5.581508515815086e-07, "loss": 0.6567, "step": 30810 }, { "epoch": 0.8995649762050743, "grad_norm": 0.7242911727375639, "learning_rate": 5.579886455798865e-07, "loss": 0.586, "step": 30811 }, { "epoch": 0.8995941724329216, "grad_norm": 0.7776318199353839, "learning_rate": 5.578264395782645e-07, "loss": 0.6785, "step": 30812 }, { "epoch": 0.899623368660769, "grad_norm": 0.7024405900535138, "learning_rate": 5.576642335766424e-07, "loss": 0.6028, "step": 30813 }, { "epoch": 0.8996525648886164, "grad_norm": 0.7288562626962126, "learning_rate": 5.575020275750203e-07, "loss": 0.6174, "step": 30814 }, { "epoch": 0.8996817611164638, "grad_norm": 0.7734696740641152, "learning_rate": 5.573398215733983e-07, "loss": 0.6988, "step": 30815 }, { "epoch": 0.8997109573443112, "grad_norm": 0.7435692287785372, "learning_rate": 5.571776155717762e-07, "loss": 0.6506, "step": 30816 }, { "epoch": 0.8997401535721585, "grad_norm": 0.7026294374851262, "learning_rate": 5.570154095701542e-07, "loss": 0.636, "step": 30817 }, { "epoch": 0.8997693498000059, "grad_norm": 0.7214090514618648, "learning_rate": 5.568532035685321e-07, "loss": 0.6267, "step": 30818 }, { "epoch": 0.8997985460278533, "grad_norm": 0.6985663310297109, "learning_rate": 5.5669099756691e-07, "loss": 0.5882, "step": 30819 }, { "epoch": 0.8998277422557006, "grad_norm": 0.6673067197315238, "learning_rate": 5.56528791565288e-07, "loss": 0.5308, "step": 30820 }, { "epoch": 0.899856938483548, "grad_norm": 0.6664453724974129, "learning_rate": 5.563665855636659e-07, "loss": 0.5324, "step": 30821 }, { "epoch": 0.8998861347113953, "grad_norm": 0.7518785237510114, "learning_rate": 5.562043795620438e-07, "loss": 0.6846, "step": 30822 }, { "epoch": 0.8999153309392427, "grad_norm": 0.6916784999748226, "learning_rate": 5.560421735604218e-07, "loss": 0.6384, "step": 30823 }, { "epoch": 0.8999445271670901, "grad_norm": 0.7442521328174677, "learning_rate": 5.558799675587997e-07, "loss": 0.6434, "step": 30824 }, { "epoch": 0.8999737233949374, "grad_norm": 0.758377229944897, "learning_rate": 5.557177615571776e-07, "loss": 0.6367, "step": 30825 }, { "epoch": 0.9000029196227848, "grad_norm": 0.727348900008018, "learning_rate": 5.555555555555555e-07, "loss": 0.6526, "step": 30826 }, { "epoch": 0.9000321158506321, "grad_norm": 0.7815771110306795, "learning_rate": 5.553933495539335e-07, "loss": 0.6714, "step": 30827 }, { "epoch": 0.9000613120784795, "grad_norm": 0.6772083773089369, "learning_rate": 5.552311435523114e-07, "loss": 0.5571, "step": 30828 }, { "epoch": 0.9000905083063269, "grad_norm": 0.7226336857989676, "learning_rate": 5.550689375506894e-07, "loss": 0.6285, "step": 30829 }, { "epoch": 0.9001197045341742, "grad_norm": 0.7289354710186623, "learning_rate": 5.549067315490673e-07, "loss": 0.6907, "step": 30830 }, { "epoch": 0.9001489007620216, "grad_norm": 0.7440451905022072, "learning_rate": 5.547445255474453e-07, "loss": 0.6598, "step": 30831 }, { "epoch": 0.900178096989869, "grad_norm": 0.7590479181501493, "learning_rate": 5.545823195458232e-07, "loss": 0.7061, "step": 30832 }, { "epoch": 0.9002072932177163, "grad_norm": 0.9482006789023483, "learning_rate": 5.544201135442011e-07, "loss": 0.6985, "step": 30833 }, { "epoch": 0.9002364894455637, "grad_norm": 0.7304802310483248, "learning_rate": 5.542579075425791e-07, "loss": 0.6407, "step": 30834 }, { "epoch": 0.900265685673411, "grad_norm": 0.7235820576069428, "learning_rate": 5.54095701540957e-07, "loss": 0.5873, "step": 30835 }, { "epoch": 0.9002948819012584, "grad_norm": 0.7125062736392099, "learning_rate": 5.539334955393351e-07, "loss": 0.6429, "step": 30836 }, { "epoch": 0.9003240781291058, "grad_norm": 0.7460484996872233, "learning_rate": 5.53771289537713e-07, "loss": 0.682, "step": 30837 }, { "epoch": 0.9003532743569531, "grad_norm": 0.674941909635244, "learning_rate": 5.53609083536091e-07, "loss": 0.5865, "step": 30838 }, { "epoch": 0.9003824705848005, "grad_norm": 0.7304953265157107, "learning_rate": 5.534468775344689e-07, "loss": 0.661, "step": 30839 }, { "epoch": 0.9004116668126478, "grad_norm": 0.742159484916578, "learning_rate": 5.532846715328468e-07, "loss": 0.5961, "step": 30840 }, { "epoch": 0.9004408630404952, "grad_norm": 0.7061291612682167, "learning_rate": 5.531224655312247e-07, "loss": 0.6568, "step": 30841 }, { "epoch": 0.9004700592683426, "grad_norm": 0.7552392521786985, "learning_rate": 5.529602595296027e-07, "loss": 0.674, "step": 30842 }, { "epoch": 0.9004992554961899, "grad_norm": 0.7303669556056736, "learning_rate": 5.527980535279806e-07, "loss": 0.6364, "step": 30843 }, { "epoch": 0.9005284517240373, "grad_norm": 0.7531846425858046, "learning_rate": 5.526358475263585e-07, "loss": 0.6164, "step": 30844 }, { "epoch": 0.9005576479518846, "grad_norm": 0.7099431998850672, "learning_rate": 5.524736415247364e-07, "loss": 0.581, "step": 30845 }, { "epoch": 0.900586844179732, "grad_norm": 0.700871651754452, "learning_rate": 5.523114355231144e-07, "loss": 0.6183, "step": 30846 }, { "epoch": 0.9006160404075794, "grad_norm": 0.718399431241766, "learning_rate": 5.521492295214923e-07, "loss": 0.6599, "step": 30847 }, { "epoch": 0.9006452366354267, "grad_norm": 0.6657684732760362, "learning_rate": 5.519870235198703e-07, "loss": 0.5153, "step": 30848 }, { "epoch": 0.9006744328632741, "grad_norm": 0.8234858216390714, "learning_rate": 5.518248175182482e-07, "loss": 0.7273, "step": 30849 }, { "epoch": 0.9007036290911214, "grad_norm": 0.7088339139480679, "learning_rate": 5.516626115166262e-07, "loss": 0.5652, "step": 30850 }, { "epoch": 0.9007328253189688, "grad_norm": 0.7589311591091233, "learning_rate": 5.515004055150041e-07, "loss": 0.732, "step": 30851 }, { "epoch": 0.9007620215468162, "grad_norm": 0.7422487787683326, "learning_rate": 5.51338199513382e-07, "loss": 0.6575, "step": 30852 }, { "epoch": 0.9007912177746635, "grad_norm": 0.769838899786251, "learning_rate": 5.5117599351176e-07, "loss": 0.6945, "step": 30853 }, { "epoch": 0.9008204140025109, "grad_norm": 0.6739558808111048, "learning_rate": 5.510137875101379e-07, "loss": 0.5906, "step": 30854 }, { "epoch": 0.9008496102303583, "grad_norm": 0.6771810979120428, "learning_rate": 5.508515815085159e-07, "loss": 0.5876, "step": 30855 }, { "epoch": 0.9008788064582056, "grad_norm": 0.7239546654698702, "learning_rate": 5.506893755068938e-07, "loss": 0.6572, "step": 30856 }, { "epoch": 0.900908002686053, "grad_norm": 0.7070289606954541, "learning_rate": 5.505271695052717e-07, "loss": 0.6313, "step": 30857 }, { "epoch": 0.9009371989139003, "grad_norm": 0.7137143117330994, "learning_rate": 5.503649635036497e-07, "loss": 0.6301, "step": 30858 }, { "epoch": 0.9009663951417477, "grad_norm": 0.8915417276104277, "learning_rate": 5.502027575020276e-07, "loss": 0.6483, "step": 30859 }, { "epoch": 0.9009955913695951, "grad_norm": 0.7060740516810144, "learning_rate": 5.500405515004055e-07, "loss": 0.6606, "step": 30860 }, { "epoch": 0.9010247875974424, "grad_norm": 0.7757353952414447, "learning_rate": 5.498783454987835e-07, "loss": 0.6703, "step": 30861 }, { "epoch": 0.9010539838252898, "grad_norm": 0.6696927221800866, "learning_rate": 5.497161394971614e-07, "loss": 0.5488, "step": 30862 }, { "epoch": 0.9010831800531371, "grad_norm": 0.7294605246116017, "learning_rate": 5.495539334955393e-07, "loss": 0.6504, "step": 30863 }, { "epoch": 0.9011123762809845, "grad_norm": 0.7247496677627853, "learning_rate": 5.493917274939173e-07, "loss": 0.6473, "step": 30864 }, { "epoch": 0.9011415725088319, "grad_norm": 0.7179978925149696, "learning_rate": 5.492295214922952e-07, "loss": 0.6435, "step": 30865 }, { "epoch": 0.9011707687366792, "grad_norm": 0.7208596075506115, "learning_rate": 5.490673154906731e-07, "loss": 0.6703, "step": 30866 }, { "epoch": 0.9011999649645266, "grad_norm": 0.7287243997249468, "learning_rate": 5.489051094890512e-07, "loss": 0.6648, "step": 30867 }, { "epoch": 0.901229161192374, "grad_norm": 0.7258111189177193, "learning_rate": 5.48742903487429e-07, "loss": 0.6753, "step": 30868 }, { "epoch": 0.9012583574202213, "grad_norm": 0.762218789534398, "learning_rate": 5.485806974858071e-07, "loss": 0.6848, "step": 30869 }, { "epoch": 0.9012875536480687, "grad_norm": 0.7047994168779244, "learning_rate": 5.48418491484185e-07, "loss": 0.5907, "step": 30870 }, { "epoch": 0.901316749875916, "grad_norm": 0.7432108041926084, "learning_rate": 5.48256285482563e-07, "loss": 0.6323, "step": 30871 }, { "epoch": 0.9013459461037634, "grad_norm": 0.6855068039004746, "learning_rate": 5.480940794809409e-07, "loss": 0.5772, "step": 30872 }, { "epoch": 0.9013751423316108, "grad_norm": 0.7277293771088321, "learning_rate": 5.479318734793188e-07, "loss": 0.7033, "step": 30873 }, { "epoch": 0.9014043385594581, "grad_norm": 0.7504598414507663, "learning_rate": 5.477696674776968e-07, "loss": 0.6274, "step": 30874 }, { "epoch": 0.9014335347873055, "grad_norm": 0.7587648713820441, "learning_rate": 5.476074614760747e-07, "loss": 0.6873, "step": 30875 }, { "epoch": 0.9014627310151528, "grad_norm": 0.7046709333187275, "learning_rate": 5.474452554744526e-07, "loss": 0.5908, "step": 30876 }, { "epoch": 0.9014919272430002, "grad_norm": 0.6957823322993568, "learning_rate": 5.472830494728306e-07, "loss": 0.5966, "step": 30877 }, { "epoch": 0.9015211234708476, "grad_norm": 0.6654550793496234, "learning_rate": 5.471208434712085e-07, "loss": 0.568, "step": 30878 }, { "epoch": 0.9015503196986949, "grad_norm": 0.6689335674904002, "learning_rate": 5.469586374695864e-07, "loss": 0.5339, "step": 30879 }, { "epoch": 0.9015795159265423, "grad_norm": 0.7072874874064533, "learning_rate": 5.467964314679643e-07, "loss": 0.611, "step": 30880 }, { "epoch": 0.9016087121543896, "grad_norm": 0.6657941372114564, "learning_rate": 5.466342254663423e-07, "loss": 0.5973, "step": 30881 }, { "epoch": 0.901637908382237, "grad_norm": 0.6793367952640575, "learning_rate": 5.464720194647202e-07, "loss": 0.6155, "step": 30882 }, { "epoch": 0.9016671046100844, "grad_norm": 0.7439059923188358, "learning_rate": 5.463098134630982e-07, "loss": 0.6652, "step": 30883 }, { "epoch": 0.9016963008379317, "grad_norm": 0.6716260990500961, "learning_rate": 5.461476074614761e-07, "loss": 0.5864, "step": 30884 }, { "epoch": 0.9017254970657791, "grad_norm": 0.7705625109088656, "learning_rate": 5.45985401459854e-07, "loss": 0.7014, "step": 30885 }, { "epoch": 0.9017546932936265, "grad_norm": 0.7066789860716947, "learning_rate": 5.45823195458232e-07, "loss": 0.6003, "step": 30886 }, { "epoch": 0.9017838895214738, "grad_norm": 0.7103489315142937, "learning_rate": 5.456609894566099e-07, "loss": 0.6592, "step": 30887 }, { "epoch": 0.9018130857493212, "grad_norm": 0.699840810198556, "learning_rate": 5.454987834549879e-07, "loss": 0.6133, "step": 30888 }, { "epoch": 0.9018422819771685, "grad_norm": 0.7778616462622417, "learning_rate": 5.453365774533658e-07, "loss": 0.6856, "step": 30889 }, { "epoch": 0.9018714782050159, "grad_norm": 0.7411967479755398, "learning_rate": 5.451743714517438e-07, "loss": 0.6781, "step": 30890 }, { "epoch": 0.9019006744328633, "grad_norm": 0.7606681169790888, "learning_rate": 5.450121654501217e-07, "loss": 0.6926, "step": 30891 }, { "epoch": 0.9019298706607106, "grad_norm": 0.7654284796951809, "learning_rate": 5.448499594484996e-07, "loss": 0.6921, "step": 30892 }, { "epoch": 0.901959066888558, "grad_norm": 0.6716393521007634, "learning_rate": 5.446877534468776e-07, "loss": 0.5476, "step": 30893 }, { "epoch": 0.9019882631164053, "grad_norm": 0.7887082632527315, "learning_rate": 5.445255474452555e-07, "loss": 0.6481, "step": 30894 }, { "epoch": 0.9020174593442527, "grad_norm": 0.7386406832282584, "learning_rate": 5.443633414436334e-07, "loss": 0.6363, "step": 30895 }, { "epoch": 0.9020466555721001, "grad_norm": 0.6679241601312106, "learning_rate": 5.442011354420114e-07, "loss": 0.5715, "step": 30896 }, { "epoch": 0.9020758517999474, "grad_norm": 0.7085164251211117, "learning_rate": 5.440389294403893e-07, "loss": 0.6016, "step": 30897 }, { "epoch": 0.9021050480277948, "grad_norm": 0.6517042686591243, "learning_rate": 5.438767234387672e-07, "loss": 0.5472, "step": 30898 }, { "epoch": 0.9021342442556421, "grad_norm": 0.7660609834605836, "learning_rate": 5.437145174371451e-07, "loss": 0.6879, "step": 30899 }, { "epoch": 0.9021634404834895, "grad_norm": 0.7579326663153789, "learning_rate": 5.435523114355232e-07, "loss": 0.6805, "step": 30900 }, { "epoch": 0.9021926367113369, "grad_norm": 0.7399778144372599, "learning_rate": 5.43390105433901e-07, "loss": 0.7211, "step": 30901 }, { "epoch": 0.9022218329391842, "grad_norm": 0.7261927880287586, "learning_rate": 5.432278994322791e-07, "loss": 0.6427, "step": 30902 }, { "epoch": 0.9022510291670316, "grad_norm": 0.7148309267480922, "learning_rate": 5.43065693430657e-07, "loss": 0.6077, "step": 30903 }, { "epoch": 0.902280225394879, "grad_norm": 0.7109160352624991, "learning_rate": 5.42903487429035e-07, "loss": 0.6538, "step": 30904 }, { "epoch": 0.9023094216227263, "grad_norm": 0.6866732366426017, "learning_rate": 5.427412814274129e-07, "loss": 0.5897, "step": 30905 }, { "epoch": 0.9023386178505737, "grad_norm": 0.6694977970246448, "learning_rate": 5.425790754257908e-07, "loss": 0.5227, "step": 30906 }, { "epoch": 0.902367814078421, "grad_norm": 0.7463174520203012, "learning_rate": 5.424168694241688e-07, "loss": 0.678, "step": 30907 }, { "epoch": 0.9023970103062684, "grad_norm": 0.724411686915614, "learning_rate": 5.422546634225467e-07, "loss": 0.6885, "step": 30908 }, { "epoch": 0.9024262065341158, "grad_norm": 0.7157827510319344, "learning_rate": 5.420924574209247e-07, "loss": 0.6654, "step": 30909 }, { "epoch": 0.9024554027619631, "grad_norm": 0.6877208714238009, "learning_rate": 5.419302514193026e-07, "loss": 0.6201, "step": 30910 }, { "epoch": 0.9024845989898105, "grad_norm": 0.7671591545212182, "learning_rate": 5.417680454176805e-07, "loss": 0.7399, "step": 30911 }, { "epoch": 0.9025137952176578, "grad_norm": 0.6925825699893156, "learning_rate": 5.416058394160585e-07, "loss": 0.5911, "step": 30912 }, { "epoch": 0.9025429914455052, "grad_norm": 0.6946171435361567, "learning_rate": 5.414436334144364e-07, "loss": 0.5785, "step": 30913 }, { "epoch": 0.9025721876733526, "grad_norm": 0.7279294252821153, "learning_rate": 5.412814274128143e-07, "loss": 0.6286, "step": 30914 }, { "epoch": 0.9026013839011999, "grad_norm": 0.6874193993878958, "learning_rate": 5.411192214111922e-07, "loss": 0.5993, "step": 30915 }, { "epoch": 0.9026305801290473, "grad_norm": 0.7050152786974708, "learning_rate": 5.409570154095702e-07, "loss": 0.6088, "step": 30916 }, { "epoch": 0.9026597763568946, "grad_norm": 0.6974153245999815, "learning_rate": 5.407948094079481e-07, "loss": 0.6214, "step": 30917 }, { "epoch": 0.902688972584742, "grad_norm": 0.6865329338404518, "learning_rate": 5.40632603406326e-07, "loss": 0.6012, "step": 30918 }, { "epoch": 0.9027181688125894, "grad_norm": 0.6970257288731377, "learning_rate": 5.40470397404704e-07, "loss": 0.5876, "step": 30919 }, { "epoch": 0.9027473650404367, "grad_norm": 0.6935088340054624, "learning_rate": 5.403081914030819e-07, "loss": 0.599, "step": 30920 }, { "epoch": 0.9027765612682841, "grad_norm": 0.7755911913584753, "learning_rate": 5.401459854014599e-07, "loss": 0.7338, "step": 30921 }, { "epoch": 0.9028057574961315, "grad_norm": 0.7273278254428348, "learning_rate": 5.399837793998378e-07, "loss": 0.5661, "step": 30922 }, { "epoch": 0.9028349537239788, "grad_norm": 0.7120980644562356, "learning_rate": 5.398215733982158e-07, "loss": 0.5856, "step": 30923 }, { "epoch": 0.9028641499518262, "grad_norm": 0.7122722183783016, "learning_rate": 5.396593673965937e-07, "loss": 0.6383, "step": 30924 }, { "epoch": 0.9028933461796735, "grad_norm": 0.7978916679011755, "learning_rate": 5.394971613949716e-07, "loss": 0.7352, "step": 30925 }, { "epoch": 0.9029225424075209, "grad_norm": 0.7074493011794978, "learning_rate": 5.393349553933496e-07, "loss": 0.6489, "step": 30926 }, { "epoch": 0.9029517386353683, "grad_norm": 0.6749085702814984, "learning_rate": 5.391727493917275e-07, "loss": 0.5418, "step": 30927 }, { "epoch": 0.9029809348632156, "grad_norm": 0.6715416425393326, "learning_rate": 5.390105433901055e-07, "loss": 0.5519, "step": 30928 }, { "epoch": 0.903010131091063, "grad_norm": 0.7249646491745738, "learning_rate": 5.388483373884834e-07, "loss": 0.6187, "step": 30929 }, { "epoch": 0.9030393273189103, "grad_norm": 0.678659375751881, "learning_rate": 5.386861313868613e-07, "loss": 0.5586, "step": 30930 }, { "epoch": 0.9030685235467577, "grad_norm": 0.7666109202136216, "learning_rate": 5.385239253852394e-07, "loss": 0.7427, "step": 30931 }, { "epoch": 0.9030977197746051, "grad_norm": 0.7007752318612648, "learning_rate": 5.383617193836173e-07, "loss": 0.6376, "step": 30932 }, { "epoch": 0.9031269160024524, "grad_norm": 0.6696783126722852, "learning_rate": 5.381995133819952e-07, "loss": 0.5873, "step": 30933 }, { "epoch": 0.9031561122302998, "grad_norm": 0.7066183962999878, "learning_rate": 5.380373073803731e-07, "loss": 0.6064, "step": 30934 }, { "epoch": 0.9031853084581473, "grad_norm": 0.7098489778841562, "learning_rate": 5.378751013787511e-07, "loss": 0.5927, "step": 30935 }, { "epoch": 0.9032145046859946, "grad_norm": 0.674357669102485, "learning_rate": 5.37712895377129e-07, "loss": 0.5314, "step": 30936 }, { "epoch": 0.903243700913842, "grad_norm": 0.7521984720851203, "learning_rate": 5.375506893755069e-07, "loss": 0.6625, "step": 30937 }, { "epoch": 0.9032728971416893, "grad_norm": 0.724829237886251, "learning_rate": 5.373884833738849e-07, "loss": 0.6327, "step": 30938 }, { "epoch": 0.9033020933695367, "grad_norm": 0.7069697231525095, "learning_rate": 5.372262773722628e-07, "loss": 0.5696, "step": 30939 }, { "epoch": 0.9033312895973841, "grad_norm": 0.723568868502361, "learning_rate": 5.370640713706408e-07, "loss": 0.6099, "step": 30940 }, { "epoch": 0.9033604858252314, "grad_norm": 0.730484366257884, "learning_rate": 5.369018653690187e-07, "loss": 0.6078, "step": 30941 }, { "epoch": 0.9033896820530788, "grad_norm": 0.7442641872700891, "learning_rate": 5.367396593673967e-07, "loss": 0.6668, "step": 30942 }, { "epoch": 0.9034188782809262, "grad_norm": 0.8014289602791261, "learning_rate": 5.365774533657746e-07, "loss": 0.6599, "step": 30943 }, { "epoch": 0.9034480745087735, "grad_norm": 0.6872330545274313, "learning_rate": 5.364152473641525e-07, "loss": 0.6226, "step": 30944 }, { "epoch": 0.9034772707366209, "grad_norm": 0.7547953606379034, "learning_rate": 5.362530413625305e-07, "loss": 0.6256, "step": 30945 }, { "epoch": 0.9035064669644682, "grad_norm": 0.6931522120612561, "learning_rate": 5.360908353609084e-07, "loss": 0.5777, "step": 30946 }, { "epoch": 0.9035356631923156, "grad_norm": 0.6948756797948724, "learning_rate": 5.359286293592864e-07, "loss": 0.5978, "step": 30947 }, { "epoch": 0.903564859420163, "grad_norm": 0.7025426860309079, "learning_rate": 5.357664233576643e-07, "loss": 0.644, "step": 30948 }, { "epoch": 0.9035940556480103, "grad_norm": 0.702991648573197, "learning_rate": 5.356042173560422e-07, "loss": 0.6075, "step": 30949 }, { "epoch": 0.9036232518758577, "grad_norm": 0.7571576187124943, "learning_rate": 5.354420113544202e-07, "loss": 0.7078, "step": 30950 }, { "epoch": 0.903652448103705, "grad_norm": 0.7055482890571677, "learning_rate": 5.352798053527981e-07, "loss": 0.6201, "step": 30951 }, { "epoch": 0.9036816443315524, "grad_norm": 0.7173646751332902, "learning_rate": 5.35117599351176e-07, "loss": 0.5662, "step": 30952 }, { "epoch": 0.9037108405593998, "grad_norm": 0.7530597350420345, "learning_rate": 5.349553933495539e-07, "loss": 0.7262, "step": 30953 }, { "epoch": 0.9037400367872471, "grad_norm": 0.6971964053004696, "learning_rate": 5.347931873479319e-07, "loss": 0.6403, "step": 30954 }, { "epoch": 0.9037692330150945, "grad_norm": 0.7038470254651248, "learning_rate": 5.346309813463098e-07, "loss": 0.6377, "step": 30955 }, { "epoch": 0.9037984292429418, "grad_norm": 0.6905272856428281, "learning_rate": 5.344687753446878e-07, "loss": 0.6538, "step": 30956 }, { "epoch": 0.9038276254707892, "grad_norm": 0.7208506828114148, "learning_rate": 5.343065693430657e-07, "loss": 0.6125, "step": 30957 }, { "epoch": 0.9038568216986366, "grad_norm": 0.717165125077695, "learning_rate": 5.341443633414436e-07, "loss": 0.6293, "step": 30958 }, { "epoch": 0.9038860179264839, "grad_norm": 0.6939996314725195, "learning_rate": 5.339821573398216e-07, "loss": 0.6016, "step": 30959 }, { "epoch": 0.9039152141543313, "grad_norm": 0.7468596513996917, "learning_rate": 5.338199513381995e-07, "loss": 0.7033, "step": 30960 }, { "epoch": 0.9039444103821787, "grad_norm": 0.7031067702243359, "learning_rate": 5.336577453365775e-07, "loss": 0.5611, "step": 30961 }, { "epoch": 0.903973606610026, "grad_norm": 0.7876747470706592, "learning_rate": 5.334955393349554e-07, "loss": 0.6969, "step": 30962 }, { "epoch": 0.9040028028378734, "grad_norm": 0.711940812243818, "learning_rate": 5.333333333333335e-07, "loss": 0.618, "step": 30963 }, { "epoch": 0.9040319990657207, "grad_norm": 0.718566938922928, "learning_rate": 5.331711273317114e-07, "loss": 0.635, "step": 30964 }, { "epoch": 0.9040611952935681, "grad_norm": 0.7111057006496302, "learning_rate": 5.330089213300893e-07, "loss": 0.617, "step": 30965 }, { "epoch": 0.9040903915214155, "grad_norm": 0.7186112961043303, "learning_rate": 5.328467153284673e-07, "loss": 0.5505, "step": 30966 }, { "epoch": 0.9041195877492628, "grad_norm": 0.7660909627793486, "learning_rate": 5.326845093268452e-07, "loss": 0.7017, "step": 30967 }, { "epoch": 0.9041487839771102, "grad_norm": 0.6872357398583178, "learning_rate": 5.325223033252231e-07, "loss": 0.6392, "step": 30968 }, { "epoch": 0.9041779802049575, "grad_norm": 0.7419946885883215, "learning_rate": 5.32360097323601e-07, "loss": 0.6483, "step": 30969 }, { "epoch": 0.9042071764328049, "grad_norm": 0.7078090245494004, "learning_rate": 5.32197891321979e-07, "loss": 0.5919, "step": 30970 }, { "epoch": 0.9042363726606523, "grad_norm": 0.671050802027708, "learning_rate": 5.320356853203569e-07, "loss": 0.5703, "step": 30971 }, { "epoch": 0.9042655688884996, "grad_norm": 0.7324489153005579, "learning_rate": 5.318734793187348e-07, "loss": 0.6712, "step": 30972 }, { "epoch": 0.904294765116347, "grad_norm": 0.7936490632714982, "learning_rate": 5.317112733171128e-07, "loss": 0.7122, "step": 30973 }, { "epoch": 0.9043239613441943, "grad_norm": 0.7258033274307093, "learning_rate": 5.315490673154907e-07, "loss": 0.6354, "step": 30974 }, { "epoch": 0.9043531575720417, "grad_norm": 0.7630674372606491, "learning_rate": 5.313868613138687e-07, "loss": 0.7203, "step": 30975 }, { "epoch": 0.9043823537998891, "grad_norm": 0.769173528746673, "learning_rate": 5.312246553122466e-07, "loss": 0.6185, "step": 30976 }, { "epoch": 0.9044115500277364, "grad_norm": 0.6649959608999643, "learning_rate": 5.310624493106245e-07, "loss": 0.6024, "step": 30977 }, { "epoch": 0.9044407462555838, "grad_norm": 0.758879342389134, "learning_rate": 5.309002433090025e-07, "loss": 0.6717, "step": 30978 }, { "epoch": 0.9044699424834312, "grad_norm": 0.7634207526039678, "learning_rate": 5.307380373073804e-07, "loss": 0.7067, "step": 30979 }, { "epoch": 0.9044991387112785, "grad_norm": 0.7514911600263028, "learning_rate": 5.305758313057584e-07, "loss": 0.7066, "step": 30980 }, { "epoch": 0.9045283349391259, "grad_norm": 0.7610495425595266, "learning_rate": 5.304136253041363e-07, "loss": 0.6871, "step": 30981 }, { "epoch": 0.9045575311669732, "grad_norm": 0.7130382141156529, "learning_rate": 5.302514193025143e-07, "loss": 0.6563, "step": 30982 }, { "epoch": 0.9045867273948206, "grad_norm": 0.7297331600471683, "learning_rate": 5.300892133008922e-07, "loss": 0.6204, "step": 30983 }, { "epoch": 0.904615923622668, "grad_norm": 0.6859805249724791, "learning_rate": 5.299270072992701e-07, "loss": 0.5926, "step": 30984 }, { "epoch": 0.9046451198505153, "grad_norm": 0.7545633136273822, "learning_rate": 5.297648012976481e-07, "loss": 0.6711, "step": 30985 }, { "epoch": 0.9046743160783627, "grad_norm": 0.7231036672371081, "learning_rate": 5.29602595296026e-07, "loss": 0.6311, "step": 30986 }, { "epoch": 0.90470351230621, "grad_norm": 0.6684834648390232, "learning_rate": 5.294403892944039e-07, "loss": 0.5767, "step": 30987 }, { "epoch": 0.9047327085340574, "grad_norm": 0.7413432082695554, "learning_rate": 5.292781832927818e-07, "loss": 0.6398, "step": 30988 }, { "epoch": 0.9047619047619048, "grad_norm": 0.7128107686289205, "learning_rate": 5.291159772911598e-07, "loss": 0.5883, "step": 30989 }, { "epoch": 0.9047911009897521, "grad_norm": 0.7575065191129902, "learning_rate": 5.289537712895377e-07, "loss": 0.7064, "step": 30990 }, { "epoch": 0.9048202972175995, "grad_norm": 0.8326703542562814, "learning_rate": 5.287915652879156e-07, "loss": 0.7483, "step": 30991 }, { "epoch": 0.9048494934454469, "grad_norm": 0.7354306997263638, "learning_rate": 5.286293592862936e-07, "loss": 0.5894, "step": 30992 }, { "epoch": 0.9048786896732942, "grad_norm": 0.7351987340938688, "learning_rate": 5.284671532846715e-07, "loss": 0.607, "step": 30993 }, { "epoch": 0.9049078859011416, "grad_norm": 0.6988311026918659, "learning_rate": 5.283049472830495e-07, "loss": 0.517, "step": 30994 }, { "epoch": 0.9049370821289889, "grad_norm": 0.7488412688907168, "learning_rate": 5.281427412814274e-07, "loss": 0.6385, "step": 30995 }, { "epoch": 0.9049662783568363, "grad_norm": 0.7697734128091986, "learning_rate": 5.279805352798053e-07, "loss": 0.6448, "step": 30996 }, { "epoch": 0.9049954745846837, "grad_norm": 0.7363179627541491, "learning_rate": 5.278183292781834e-07, "loss": 0.6354, "step": 30997 }, { "epoch": 0.905024670812531, "grad_norm": 0.7172824059194883, "learning_rate": 5.276561232765613e-07, "loss": 0.6285, "step": 30998 }, { "epoch": 0.9050538670403784, "grad_norm": 0.6948203157778625, "learning_rate": 5.274939172749393e-07, "loss": 0.5873, "step": 30999 }, { "epoch": 0.9050830632682257, "grad_norm": 0.7602940622994074, "learning_rate": 5.273317112733172e-07, "loss": 0.6793, "step": 31000 }, { "epoch": 0.9051122594960731, "grad_norm": 0.946356968294929, "learning_rate": 5.271695052716952e-07, "loss": 0.6703, "step": 31001 }, { "epoch": 0.9051414557239205, "grad_norm": 0.7395850740747857, "learning_rate": 5.270072992700731e-07, "loss": 0.6652, "step": 31002 }, { "epoch": 0.9051706519517678, "grad_norm": 0.7509450170957636, "learning_rate": 5.26845093268451e-07, "loss": 0.6408, "step": 31003 }, { "epoch": 0.9051998481796152, "grad_norm": 0.7404966083710465, "learning_rate": 5.26682887266829e-07, "loss": 0.6799, "step": 31004 }, { "epoch": 0.9052290444074625, "grad_norm": 0.713319151718132, "learning_rate": 5.265206812652069e-07, "loss": 0.6406, "step": 31005 }, { "epoch": 0.9052582406353099, "grad_norm": 0.7255599662311379, "learning_rate": 5.263584752635848e-07, "loss": 0.6911, "step": 31006 }, { "epoch": 0.9052874368631573, "grad_norm": 0.7594488550280553, "learning_rate": 5.261962692619627e-07, "loss": 0.6372, "step": 31007 }, { "epoch": 0.9053166330910046, "grad_norm": 0.7124868529043168, "learning_rate": 5.260340632603407e-07, "loss": 0.6012, "step": 31008 }, { "epoch": 0.905345829318852, "grad_norm": 0.6929729987328909, "learning_rate": 5.258718572587186e-07, "loss": 0.6205, "step": 31009 }, { "epoch": 0.9053750255466994, "grad_norm": 0.7950707394636237, "learning_rate": 5.257096512570965e-07, "loss": 0.6816, "step": 31010 }, { "epoch": 0.9054042217745467, "grad_norm": 0.759101427814762, "learning_rate": 5.255474452554745e-07, "loss": 0.6428, "step": 31011 }, { "epoch": 0.9054334180023941, "grad_norm": 0.7577056684650816, "learning_rate": 5.253852392538524e-07, "loss": 0.7249, "step": 31012 }, { "epoch": 0.9054626142302414, "grad_norm": 0.6895045502596147, "learning_rate": 5.252230332522304e-07, "loss": 0.6011, "step": 31013 }, { "epoch": 0.9054918104580888, "grad_norm": 0.7800136153306565, "learning_rate": 5.250608272506083e-07, "loss": 0.7217, "step": 31014 }, { "epoch": 0.9055210066859362, "grad_norm": 0.747451688148677, "learning_rate": 5.248986212489863e-07, "loss": 0.7387, "step": 31015 }, { "epoch": 0.9055502029137835, "grad_norm": 0.7059777162304114, "learning_rate": 5.247364152473642e-07, "loss": 0.6112, "step": 31016 }, { "epoch": 0.9055793991416309, "grad_norm": 0.6980324275348603, "learning_rate": 5.245742092457421e-07, "loss": 0.6226, "step": 31017 }, { "epoch": 0.9056085953694782, "grad_norm": 0.7296518838192686, "learning_rate": 5.244120032441201e-07, "loss": 0.6354, "step": 31018 }, { "epoch": 0.9056377915973256, "grad_norm": 0.7624990057393259, "learning_rate": 5.24249797242498e-07, "loss": 0.6557, "step": 31019 }, { "epoch": 0.905666987825173, "grad_norm": 0.6885678419147122, "learning_rate": 5.24087591240876e-07, "loss": 0.5411, "step": 31020 }, { "epoch": 0.9056961840530203, "grad_norm": 0.7395034598736596, "learning_rate": 5.239253852392539e-07, "loss": 0.6504, "step": 31021 }, { "epoch": 0.9057253802808677, "grad_norm": 0.760440648945054, "learning_rate": 5.237631792376318e-07, "loss": 0.7121, "step": 31022 }, { "epoch": 0.905754576508715, "grad_norm": 0.7982874879741721, "learning_rate": 5.236009732360097e-07, "loss": 0.7128, "step": 31023 }, { "epoch": 0.9057837727365624, "grad_norm": 0.6669524484256737, "learning_rate": 5.234387672343877e-07, "loss": 0.5981, "step": 31024 }, { "epoch": 0.9058129689644098, "grad_norm": 0.76031190761313, "learning_rate": 5.232765612327656e-07, "loss": 0.7806, "step": 31025 }, { "epoch": 0.9058421651922571, "grad_norm": 0.673858205837, "learning_rate": 5.231143552311435e-07, "loss": 0.5523, "step": 31026 }, { "epoch": 0.9058713614201045, "grad_norm": 0.6454175224222113, "learning_rate": 5.229521492295215e-07, "loss": 0.5621, "step": 31027 }, { "epoch": 0.9059005576479519, "grad_norm": 0.7246242206947074, "learning_rate": 5.227899432278994e-07, "loss": 0.6421, "step": 31028 }, { "epoch": 0.9059297538757992, "grad_norm": 0.7396329347818404, "learning_rate": 5.226277372262773e-07, "loss": 0.6371, "step": 31029 }, { "epoch": 0.9059589501036466, "grad_norm": 0.736462821490935, "learning_rate": 5.224655312246554e-07, "loss": 0.7025, "step": 31030 }, { "epoch": 0.9059881463314939, "grad_norm": 0.7472317243196656, "learning_rate": 5.223033252230333e-07, "loss": 0.6801, "step": 31031 }, { "epoch": 0.9060173425593413, "grad_norm": 0.7013211120026268, "learning_rate": 5.221411192214113e-07, "loss": 0.6836, "step": 31032 }, { "epoch": 0.9060465387871887, "grad_norm": 0.7056528835551784, "learning_rate": 5.219789132197892e-07, "loss": 0.6307, "step": 31033 }, { "epoch": 0.906075735015036, "grad_norm": 0.7179003153206759, "learning_rate": 5.218167072181672e-07, "loss": 0.6736, "step": 31034 }, { "epoch": 0.9061049312428834, "grad_norm": 0.7482780017828439, "learning_rate": 5.216545012165451e-07, "loss": 0.6419, "step": 31035 }, { "epoch": 0.9061341274707307, "grad_norm": 0.6992286139054226, "learning_rate": 5.21492295214923e-07, "loss": 0.5412, "step": 31036 }, { "epoch": 0.9061633236985781, "grad_norm": 0.7038132904538121, "learning_rate": 5.21330089213301e-07, "loss": 0.632, "step": 31037 }, { "epoch": 0.9061925199264255, "grad_norm": 0.6893110568199483, "learning_rate": 5.211678832116789e-07, "loss": 0.5798, "step": 31038 }, { "epoch": 0.9062217161542728, "grad_norm": 0.7253219733416187, "learning_rate": 5.210056772100569e-07, "loss": 0.6496, "step": 31039 }, { "epoch": 0.9062509123821202, "grad_norm": 0.7533102373753411, "learning_rate": 5.208434712084348e-07, "loss": 0.6998, "step": 31040 }, { "epoch": 0.9062801086099675, "grad_norm": 0.7329487878508784, "learning_rate": 5.206812652068127e-07, "loss": 0.677, "step": 31041 }, { "epoch": 0.9063093048378149, "grad_norm": 0.6581180986693544, "learning_rate": 5.205190592051906e-07, "loss": 0.5826, "step": 31042 }, { "epoch": 0.9063385010656623, "grad_norm": 0.7124189250445432, "learning_rate": 5.203568532035686e-07, "loss": 0.5947, "step": 31043 }, { "epoch": 0.9063676972935096, "grad_norm": 0.6678970991008069, "learning_rate": 5.201946472019465e-07, "loss": 0.5572, "step": 31044 }, { "epoch": 0.906396893521357, "grad_norm": 0.6683768277952121, "learning_rate": 5.200324412003244e-07, "loss": 0.5309, "step": 31045 }, { "epoch": 0.9064260897492044, "grad_norm": 0.7061790983449239, "learning_rate": 5.198702351987024e-07, "loss": 0.6197, "step": 31046 }, { "epoch": 0.9064552859770517, "grad_norm": 0.7520481463824195, "learning_rate": 5.197080291970803e-07, "loss": 0.6348, "step": 31047 }, { "epoch": 0.9064844822048991, "grad_norm": 0.6800315533889516, "learning_rate": 5.195458231954583e-07, "loss": 0.58, "step": 31048 }, { "epoch": 0.9065136784327464, "grad_norm": 0.7340119571575343, "learning_rate": 5.193836171938362e-07, "loss": 0.6512, "step": 31049 }, { "epoch": 0.9065428746605938, "grad_norm": 0.7088325889111647, "learning_rate": 5.192214111922141e-07, "loss": 0.612, "step": 31050 }, { "epoch": 0.9065720708884412, "grad_norm": 0.6910283707631321, "learning_rate": 5.190592051905921e-07, "loss": 0.6343, "step": 31051 }, { "epoch": 0.9066012671162885, "grad_norm": 0.6975856783252704, "learning_rate": 5.1889699918897e-07, "loss": 0.6215, "step": 31052 }, { "epoch": 0.9066304633441359, "grad_norm": 0.7526526312841583, "learning_rate": 5.18734793187348e-07, "loss": 0.6907, "step": 31053 }, { "epoch": 0.9066596595719832, "grad_norm": 0.6691272215302425, "learning_rate": 5.185725871857259e-07, "loss": 0.5557, "step": 31054 }, { "epoch": 0.9066888557998306, "grad_norm": 0.8053962188361958, "learning_rate": 5.184103811841038e-07, "loss": 0.7118, "step": 31055 }, { "epoch": 0.9067180520276781, "grad_norm": 0.7281082583721681, "learning_rate": 5.182481751824818e-07, "loss": 0.6517, "step": 31056 }, { "epoch": 0.9067472482555254, "grad_norm": 0.8254750227533236, "learning_rate": 5.180859691808597e-07, "loss": 0.7374, "step": 31057 }, { "epoch": 0.9067764444833728, "grad_norm": 0.7026938847069923, "learning_rate": 5.179237631792376e-07, "loss": 0.6356, "step": 31058 }, { "epoch": 0.9068056407112202, "grad_norm": 0.6658429504466948, "learning_rate": 5.177615571776156e-07, "loss": 0.5884, "step": 31059 }, { "epoch": 0.9068348369390675, "grad_norm": 0.7114199968387555, "learning_rate": 5.175993511759935e-07, "loss": 0.6044, "step": 31060 }, { "epoch": 0.9068640331669149, "grad_norm": 0.7897932621933713, "learning_rate": 5.174371451743714e-07, "loss": 0.5782, "step": 31061 }, { "epoch": 0.9068932293947622, "grad_norm": 0.7357612517738634, "learning_rate": 5.172749391727495e-07, "loss": 0.6127, "step": 31062 }, { "epoch": 0.9069224256226096, "grad_norm": 0.7414295932584289, "learning_rate": 5.171127331711274e-07, "loss": 0.6576, "step": 31063 }, { "epoch": 0.906951621850457, "grad_norm": 0.7168083542180931, "learning_rate": 5.169505271695053e-07, "loss": 0.6396, "step": 31064 }, { "epoch": 0.9069808180783043, "grad_norm": 0.7082581018653715, "learning_rate": 5.167883211678833e-07, "loss": 0.6375, "step": 31065 }, { "epoch": 0.9070100143061517, "grad_norm": 0.7487448159049025, "learning_rate": 5.166261151662612e-07, "loss": 0.6775, "step": 31066 }, { "epoch": 0.907039210533999, "grad_norm": 0.751600318865524, "learning_rate": 5.164639091646392e-07, "loss": 0.7174, "step": 31067 }, { "epoch": 0.9070684067618464, "grad_norm": 0.7134751917944181, "learning_rate": 5.163017031630171e-07, "loss": 0.6309, "step": 31068 }, { "epoch": 0.9070976029896938, "grad_norm": 0.7804168483292073, "learning_rate": 5.16139497161395e-07, "loss": 0.7724, "step": 31069 }, { "epoch": 0.9071267992175411, "grad_norm": 0.7454534185677797, "learning_rate": 5.15977291159773e-07, "loss": 0.6518, "step": 31070 }, { "epoch": 0.9071559954453885, "grad_norm": 0.6858130973581877, "learning_rate": 5.158150851581509e-07, "loss": 0.5969, "step": 31071 }, { "epoch": 0.9071851916732359, "grad_norm": 0.669565247165246, "learning_rate": 5.156528791565289e-07, "loss": 0.5242, "step": 31072 }, { "epoch": 0.9072143879010832, "grad_norm": 0.7169063998455634, "learning_rate": 5.154906731549068e-07, "loss": 0.5981, "step": 31073 }, { "epoch": 0.9072435841289306, "grad_norm": 0.7530603625606085, "learning_rate": 5.153284671532848e-07, "loss": 0.6646, "step": 31074 }, { "epoch": 0.9072727803567779, "grad_norm": 0.6552589269947726, "learning_rate": 5.151662611516627e-07, "loss": 0.5593, "step": 31075 }, { "epoch": 0.9073019765846253, "grad_norm": 0.6936196975307637, "learning_rate": 5.150040551500406e-07, "loss": 0.6162, "step": 31076 }, { "epoch": 0.9073311728124727, "grad_norm": 0.7244437039288513, "learning_rate": 5.148418491484185e-07, "loss": 0.5853, "step": 31077 }, { "epoch": 0.90736036904032, "grad_norm": 0.7280785544177685, "learning_rate": 5.146796431467965e-07, "loss": 0.623, "step": 31078 }, { "epoch": 0.9073895652681674, "grad_norm": 0.6650919757912179, "learning_rate": 5.145174371451744e-07, "loss": 0.548, "step": 31079 }, { "epoch": 0.9074187614960147, "grad_norm": 0.6599867836677406, "learning_rate": 5.143552311435523e-07, "loss": 0.5479, "step": 31080 }, { "epoch": 0.9074479577238621, "grad_norm": 0.7044007085036174, "learning_rate": 5.141930251419303e-07, "loss": 0.581, "step": 31081 }, { "epoch": 0.9074771539517095, "grad_norm": 0.7033376366046504, "learning_rate": 5.140308191403082e-07, "loss": 0.628, "step": 31082 }, { "epoch": 0.9075063501795568, "grad_norm": 0.7093921733430124, "learning_rate": 5.138686131386861e-07, "loss": 0.6087, "step": 31083 }, { "epoch": 0.9075355464074042, "grad_norm": 0.6940430092883545, "learning_rate": 5.137064071370641e-07, "loss": 0.6372, "step": 31084 }, { "epoch": 0.9075647426352516, "grad_norm": 0.7428222776416615, "learning_rate": 5.13544201135442e-07, "loss": 0.6436, "step": 31085 }, { "epoch": 0.9075939388630989, "grad_norm": 0.7274991868962438, "learning_rate": 5.1338199513382e-07, "loss": 0.6728, "step": 31086 }, { "epoch": 0.9076231350909463, "grad_norm": 0.8037523884295277, "learning_rate": 5.132197891321979e-07, "loss": 0.7548, "step": 31087 }, { "epoch": 0.9076523313187936, "grad_norm": 0.6371519845375229, "learning_rate": 5.130575831305758e-07, "loss": 0.4617, "step": 31088 }, { "epoch": 0.907681527546641, "grad_norm": 0.7860371046836936, "learning_rate": 5.128953771289538e-07, "loss": 0.6323, "step": 31089 }, { "epoch": 0.9077107237744884, "grad_norm": 0.7048599980284825, "learning_rate": 5.127331711273317e-07, "loss": 0.6496, "step": 31090 }, { "epoch": 0.9077399200023357, "grad_norm": 0.7460749710668994, "learning_rate": 5.125709651257097e-07, "loss": 0.6069, "step": 31091 }, { "epoch": 0.9077691162301831, "grad_norm": 0.6800326903202042, "learning_rate": 5.124087591240876e-07, "loss": 0.5445, "step": 31092 }, { "epoch": 0.9077983124580304, "grad_norm": 0.6947592485443281, "learning_rate": 5.122465531224657e-07, "loss": 0.6214, "step": 31093 }, { "epoch": 0.9078275086858778, "grad_norm": 0.7398446260292119, "learning_rate": 5.120843471208436e-07, "loss": 0.5793, "step": 31094 }, { "epoch": 0.9078567049137252, "grad_norm": 0.7164447072642209, "learning_rate": 5.119221411192215e-07, "loss": 0.6897, "step": 31095 }, { "epoch": 0.9078859011415725, "grad_norm": 0.6729090655759631, "learning_rate": 5.117599351175994e-07, "loss": 0.5439, "step": 31096 }, { "epoch": 0.9079150973694199, "grad_norm": 0.6811557872211365, "learning_rate": 5.115977291159774e-07, "loss": 0.5368, "step": 31097 }, { "epoch": 0.9079442935972672, "grad_norm": 0.7556406379755758, "learning_rate": 5.114355231143553e-07, "loss": 0.5848, "step": 31098 }, { "epoch": 0.9079734898251146, "grad_norm": 0.7259502324877557, "learning_rate": 5.112733171127332e-07, "loss": 0.6193, "step": 31099 }, { "epoch": 0.908002686052962, "grad_norm": 0.6867484442236887, "learning_rate": 5.111111111111112e-07, "loss": 0.6251, "step": 31100 }, { "epoch": 0.9080318822808093, "grad_norm": 0.7734131141209051, "learning_rate": 5.109489051094891e-07, "loss": 0.7017, "step": 31101 }, { "epoch": 0.9080610785086567, "grad_norm": 0.7664705328960055, "learning_rate": 5.10786699107867e-07, "loss": 0.6889, "step": 31102 }, { "epoch": 0.908090274736504, "grad_norm": 0.7399951099288661, "learning_rate": 5.10624493106245e-07, "loss": 0.6852, "step": 31103 }, { "epoch": 0.9081194709643514, "grad_norm": 0.7293082645486455, "learning_rate": 5.104622871046229e-07, "loss": 0.6773, "step": 31104 }, { "epoch": 0.9081486671921988, "grad_norm": 0.7559097871524596, "learning_rate": 5.103000811030009e-07, "loss": 0.6359, "step": 31105 }, { "epoch": 0.9081778634200461, "grad_norm": 0.701702644792603, "learning_rate": 5.101378751013788e-07, "loss": 0.6111, "step": 31106 }, { "epoch": 0.9082070596478935, "grad_norm": 0.7313662576471828, "learning_rate": 5.099756690997568e-07, "loss": 0.6245, "step": 31107 }, { "epoch": 0.9082362558757409, "grad_norm": 0.7105132049959164, "learning_rate": 5.098134630981347e-07, "loss": 0.6545, "step": 31108 }, { "epoch": 0.9082654521035882, "grad_norm": 0.7539954112237022, "learning_rate": 5.096512570965126e-07, "loss": 0.6839, "step": 31109 }, { "epoch": 0.9082946483314356, "grad_norm": 0.7295201041667961, "learning_rate": 5.094890510948906e-07, "loss": 0.6458, "step": 31110 }, { "epoch": 0.908323844559283, "grad_norm": 0.7054726079376276, "learning_rate": 5.093268450932685e-07, "loss": 0.6103, "step": 31111 }, { "epoch": 0.9083530407871303, "grad_norm": 0.6703438828041616, "learning_rate": 5.091646390916464e-07, "loss": 0.5778, "step": 31112 }, { "epoch": 0.9083822370149777, "grad_norm": 0.765700929892397, "learning_rate": 5.090024330900244e-07, "loss": 0.6289, "step": 31113 }, { "epoch": 0.908411433242825, "grad_norm": 0.7780097078004106, "learning_rate": 5.088402270884023e-07, "loss": 0.764, "step": 31114 }, { "epoch": 0.9084406294706724, "grad_norm": 0.7416221875534196, "learning_rate": 5.086780210867802e-07, "loss": 0.6778, "step": 31115 }, { "epoch": 0.9084698256985198, "grad_norm": 0.6999613526986658, "learning_rate": 5.085158150851582e-07, "loss": 0.6394, "step": 31116 }, { "epoch": 0.9084990219263671, "grad_norm": 0.7405198586261574, "learning_rate": 5.083536090835361e-07, "loss": 0.6421, "step": 31117 }, { "epoch": 0.9085282181542145, "grad_norm": 0.717427677813454, "learning_rate": 5.08191403081914e-07, "loss": 0.6339, "step": 31118 }, { "epoch": 0.9085574143820618, "grad_norm": 0.6755001310374033, "learning_rate": 5.08029197080292e-07, "loss": 0.5626, "step": 31119 }, { "epoch": 0.9085866106099092, "grad_norm": 0.7256870769849457, "learning_rate": 5.078669910786699e-07, "loss": 0.6329, "step": 31120 }, { "epoch": 0.9086158068377566, "grad_norm": 0.6483359704242556, "learning_rate": 5.077047850770478e-07, "loss": 0.5341, "step": 31121 }, { "epoch": 0.9086450030656039, "grad_norm": 0.7419098375353336, "learning_rate": 5.075425790754258e-07, "loss": 0.5985, "step": 31122 }, { "epoch": 0.9086741992934513, "grad_norm": 0.6522129723948041, "learning_rate": 5.073803730738037e-07, "loss": 0.555, "step": 31123 }, { "epoch": 0.9087033955212986, "grad_norm": 0.7335731970362537, "learning_rate": 5.072181670721817e-07, "loss": 0.6268, "step": 31124 }, { "epoch": 0.908732591749146, "grad_norm": 0.735094408353074, "learning_rate": 5.070559610705596e-07, "loss": 0.632, "step": 31125 }, { "epoch": 0.9087617879769934, "grad_norm": 0.6839521203544221, "learning_rate": 5.068937550689377e-07, "loss": 0.6225, "step": 31126 }, { "epoch": 0.9087909842048407, "grad_norm": 0.7510815997095371, "learning_rate": 5.067315490673156e-07, "loss": 0.6395, "step": 31127 }, { "epoch": 0.9088201804326881, "grad_norm": 0.7583657678284316, "learning_rate": 5.065693430656935e-07, "loss": 0.6724, "step": 31128 }, { "epoch": 0.9088493766605354, "grad_norm": 0.6631887232187356, "learning_rate": 5.064071370640715e-07, "loss": 0.5461, "step": 31129 }, { "epoch": 0.9088785728883828, "grad_norm": 0.7161490234422344, "learning_rate": 5.062449310624494e-07, "loss": 0.6145, "step": 31130 }, { "epoch": 0.9089077691162302, "grad_norm": 0.7365178708189627, "learning_rate": 5.060827250608273e-07, "loss": 0.675, "step": 31131 }, { "epoch": 0.9089369653440775, "grad_norm": 0.6824280689352077, "learning_rate": 5.059205190592053e-07, "loss": 0.5847, "step": 31132 }, { "epoch": 0.9089661615719249, "grad_norm": 0.6846317586323426, "learning_rate": 5.057583130575832e-07, "loss": 0.5786, "step": 31133 }, { "epoch": 0.9089953577997723, "grad_norm": 0.6958490757301028, "learning_rate": 5.055961070559611e-07, "loss": 0.5964, "step": 31134 }, { "epoch": 0.9090245540276196, "grad_norm": 0.7563481246176325, "learning_rate": 5.05433901054339e-07, "loss": 0.5951, "step": 31135 }, { "epoch": 0.909053750255467, "grad_norm": 0.7037882276071739, "learning_rate": 5.05271695052717e-07, "loss": 0.6217, "step": 31136 }, { "epoch": 0.9090829464833143, "grad_norm": 0.7714297100686311, "learning_rate": 5.051094890510949e-07, "loss": 0.7108, "step": 31137 }, { "epoch": 0.9091121427111617, "grad_norm": 0.6956474280042032, "learning_rate": 5.049472830494729e-07, "loss": 0.6192, "step": 31138 }, { "epoch": 0.9091413389390091, "grad_norm": 0.7501992461499873, "learning_rate": 5.047850770478508e-07, "loss": 0.7394, "step": 31139 }, { "epoch": 0.9091705351668564, "grad_norm": 0.6898320348393906, "learning_rate": 5.046228710462288e-07, "loss": 0.5834, "step": 31140 }, { "epoch": 0.9091997313947038, "grad_norm": 0.723312411108213, "learning_rate": 5.044606650446067e-07, "loss": 0.6888, "step": 31141 }, { "epoch": 0.9092289276225511, "grad_norm": 0.7379540664700185, "learning_rate": 5.042984590429846e-07, "loss": 0.6948, "step": 31142 }, { "epoch": 0.9092581238503985, "grad_norm": 0.7479378911433013, "learning_rate": 5.041362530413626e-07, "loss": 0.6649, "step": 31143 }, { "epoch": 0.9092873200782459, "grad_norm": 0.7482391145319517, "learning_rate": 5.039740470397405e-07, "loss": 0.651, "step": 31144 }, { "epoch": 0.9093165163060932, "grad_norm": 0.6812286100705282, "learning_rate": 5.038118410381185e-07, "loss": 0.5758, "step": 31145 }, { "epoch": 0.9093457125339406, "grad_norm": 0.7132327020555669, "learning_rate": 5.036496350364964e-07, "loss": 0.6106, "step": 31146 }, { "epoch": 0.909374908761788, "grad_norm": 0.6899395865573729, "learning_rate": 5.034874290348743e-07, "loss": 0.5637, "step": 31147 }, { "epoch": 0.9094041049896353, "grad_norm": 0.7239052274855476, "learning_rate": 5.033252230332523e-07, "loss": 0.6421, "step": 31148 }, { "epoch": 0.9094333012174827, "grad_norm": 0.7507147564529054, "learning_rate": 5.031630170316302e-07, "loss": 0.6852, "step": 31149 }, { "epoch": 0.90946249744533, "grad_norm": 0.7220877583093394, "learning_rate": 5.030008110300081e-07, "loss": 0.6287, "step": 31150 }, { "epoch": 0.9094916936731774, "grad_norm": 0.6669132773293404, "learning_rate": 5.028386050283861e-07, "loss": 0.5126, "step": 31151 }, { "epoch": 0.9095208899010248, "grad_norm": 0.7538308397855455, "learning_rate": 5.02676399026764e-07, "loss": 0.6991, "step": 31152 }, { "epoch": 0.9095500861288721, "grad_norm": 0.7373512831089888, "learning_rate": 5.025141930251419e-07, "loss": 0.6857, "step": 31153 }, { "epoch": 0.9095792823567195, "grad_norm": 0.6786077747041978, "learning_rate": 5.023519870235198e-07, "loss": 0.5556, "step": 31154 }, { "epoch": 0.9096084785845668, "grad_norm": 0.686058959253367, "learning_rate": 5.021897810218978e-07, "loss": 0.5981, "step": 31155 }, { "epoch": 0.9096376748124142, "grad_norm": 0.6863453286868623, "learning_rate": 5.020275750202757e-07, "loss": 0.5994, "step": 31156 }, { "epoch": 0.9096668710402616, "grad_norm": 0.8021082662059618, "learning_rate": 5.018653690186537e-07, "loss": 0.7181, "step": 31157 }, { "epoch": 0.9096960672681089, "grad_norm": 0.7210675483871961, "learning_rate": 5.017031630170316e-07, "loss": 0.6711, "step": 31158 }, { "epoch": 0.9097252634959563, "grad_norm": 0.7288714101437478, "learning_rate": 5.015409570154097e-07, "loss": 0.6385, "step": 31159 }, { "epoch": 0.9097544597238036, "grad_norm": 0.7579390256477989, "learning_rate": 5.013787510137876e-07, "loss": 0.6552, "step": 31160 }, { "epoch": 0.909783655951651, "grad_norm": 0.6588057549068577, "learning_rate": 5.012165450121655e-07, "loss": 0.5722, "step": 31161 }, { "epoch": 0.9098128521794984, "grad_norm": 0.7572326676870588, "learning_rate": 5.010543390105435e-07, "loss": 0.7265, "step": 31162 }, { "epoch": 0.9098420484073457, "grad_norm": 0.8819220478330723, "learning_rate": 5.008921330089214e-07, "loss": 0.8064, "step": 31163 }, { "epoch": 0.9098712446351931, "grad_norm": 0.6784110653184663, "learning_rate": 5.007299270072994e-07, "loss": 0.5712, "step": 31164 }, { "epoch": 0.9099004408630404, "grad_norm": 0.6673179660249835, "learning_rate": 5.005677210056773e-07, "loss": 0.5539, "step": 31165 }, { "epoch": 0.9099296370908878, "grad_norm": 0.7099116603409398, "learning_rate": 5.004055150040552e-07, "loss": 0.6025, "step": 31166 }, { "epoch": 0.9099588333187352, "grad_norm": 0.7638763324654991, "learning_rate": 5.002433090024332e-07, "loss": 0.6732, "step": 31167 }, { "epoch": 0.9099880295465825, "grad_norm": 0.7522478066154239, "learning_rate": 5.000811030008111e-07, "loss": 0.6843, "step": 31168 }, { "epoch": 0.9100172257744299, "grad_norm": 0.74350527445159, "learning_rate": 4.99918896999189e-07, "loss": 0.6254, "step": 31169 }, { "epoch": 0.9100464220022773, "grad_norm": 0.7219084654816587, "learning_rate": 4.997566909975669e-07, "loss": 0.669, "step": 31170 }, { "epoch": 0.9100756182301246, "grad_norm": 0.6713131416144474, "learning_rate": 4.995944849959449e-07, "loss": 0.5699, "step": 31171 }, { "epoch": 0.910104814457972, "grad_norm": 0.8221543666286257, "learning_rate": 4.994322789943228e-07, "loss": 0.6861, "step": 31172 }, { "epoch": 0.9101340106858193, "grad_norm": 0.740117562526895, "learning_rate": 4.992700729927008e-07, "loss": 0.6248, "step": 31173 }, { "epoch": 0.9101632069136667, "grad_norm": 0.7179407844156375, "learning_rate": 4.991078669910787e-07, "loss": 0.6483, "step": 31174 }, { "epoch": 0.9101924031415141, "grad_norm": 0.8007909688416618, "learning_rate": 4.989456609894566e-07, "loss": 0.674, "step": 31175 }, { "epoch": 0.9102215993693615, "grad_norm": 0.7166783558400146, "learning_rate": 4.987834549878346e-07, "loss": 0.6491, "step": 31176 }, { "epoch": 0.9102507955972089, "grad_norm": 0.7289926241938894, "learning_rate": 4.986212489862125e-07, "loss": 0.655, "step": 31177 }, { "epoch": 0.9102799918250563, "grad_norm": 0.736630922058097, "learning_rate": 4.984590429845905e-07, "loss": 0.7043, "step": 31178 }, { "epoch": 0.9103091880529036, "grad_norm": 0.8353451886774234, "learning_rate": 4.982968369829684e-07, "loss": 0.6479, "step": 31179 }, { "epoch": 0.910338384280751, "grad_norm": 0.7107993952297901, "learning_rate": 4.981346309813463e-07, "loss": 0.6238, "step": 31180 }, { "epoch": 0.9103675805085983, "grad_norm": 0.713822491257093, "learning_rate": 4.979724249797243e-07, "loss": 0.6356, "step": 31181 }, { "epoch": 0.9103967767364457, "grad_norm": 0.6884909953701621, "learning_rate": 4.978102189781022e-07, "loss": 0.5534, "step": 31182 }, { "epoch": 0.9104259729642931, "grad_norm": 0.7519693988551637, "learning_rate": 4.976480129764802e-07, "loss": 0.6053, "step": 31183 }, { "epoch": 0.9104551691921404, "grad_norm": 0.7102537476248939, "learning_rate": 4.974858069748581e-07, "loss": 0.5912, "step": 31184 }, { "epoch": 0.9104843654199878, "grad_norm": 0.710346662957248, "learning_rate": 4.97323600973236e-07, "loss": 0.6415, "step": 31185 }, { "epoch": 0.9105135616478351, "grad_norm": 0.8059086731082324, "learning_rate": 4.97161394971614e-07, "loss": 0.6778, "step": 31186 }, { "epoch": 0.9105427578756825, "grad_norm": 0.7218630277350961, "learning_rate": 4.969991889699919e-07, "loss": 0.6305, "step": 31187 }, { "epoch": 0.9105719541035299, "grad_norm": 0.7101971239822973, "learning_rate": 4.968369829683698e-07, "loss": 0.6402, "step": 31188 }, { "epoch": 0.9106011503313772, "grad_norm": 0.7301347570421204, "learning_rate": 4.966747769667477e-07, "loss": 0.6939, "step": 31189 }, { "epoch": 0.9106303465592246, "grad_norm": 0.708485243772906, "learning_rate": 4.965125709651257e-07, "loss": 0.6135, "step": 31190 }, { "epoch": 0.910659542787072, "grad_norm": 0.6593051842244899, "learning_rate": 4.963503649635036e-07, "loss": 0.5125, "step": 31191 }, { "epoch": 0.9106887390149193, "grad_norm": 0.7813911053320927, "learning_rate": 4.961881589618817e-07, "loss": 0.7074, "step": 31192 }, { "epoch": 0.9107179352427667, "grad_norm": 0.7250991055169853, "learning_rate": 4.960259529602596e-07, "loss": 0.6604, "step": 31193 }, { "epoch": 0.910747131470614, "grad_norm": 0.7093965238069572, "learning_rate": 4.958637469586375e-07, "loss": 0.6247, "step": 31194 }, { "epoch": 0.9107763276984614, "grad_norm": 0.6870027032847564, "learning_rate": 4.957015409570155e-07, "loss": 0.6109, "step": 31195 }, { "epoch": 0.9108055239263088, "grad_norm": 0.7898647112747259, "learning_rate": 4.955393349553934e-07, "loss": 0.7435, "step": 31196 }, { "epoch": 0.9108347201541561, "grad_norm": 0.7205566938829032, "learning_rate": 4.953771289537714e-07, "loss": 0.6149, "step": 31197 }, { "epoch": 0.9108639163820035, "grad_norm": 0.8364403064033601, "learning_rate": 4.952149229521493e-07, "loss": 0.634, "step": 31198 }, { "epoch": 0.9108931126098508, "grad_norm": 0.6971905540029407, "learning_rate": 4.950527169505273e-07, "loss": 0.6338, "step": 31199 }, { "epoch": 0.9109223088376982, "grad_norm": 0.7304070590420582, "learning_rate": 4.948905109489052e-07, "loss": 0.6145, "step": 31200 }, { "epoch": 0.9109515050655456, "grad_norm": 0.7398630485553823, "learning_rate": 4.947283049472831e-07, "loss": 0.6684, "step": 31201 }, { "epoch": 0.9109807012933929, "grad_norm": 0.7814901674479542, "learning_rate": 4.945660989456611e-07, "loss": 0.701, "step": 31202 }, { "epoch": 0.9110098975212403, "grad_norm": 0.7994268772981671, "learning_rate": 4.94403892944039e-07, "loss": 0.7277, "step": 31203 }, { "epoch": 0.9110390937490876, "grad_norm": 0.714448155005463, "learning_rate": 4.942416869424169e-07, "loss": 0.6718, "step": 31204 }, { "epoch": 0.911068289976935, "grad_norm": 0.7314044440030312, "learning_rate": 4.940794809407949e-07, "loss": 0.6226, "step": 31205 }, { "epoch": 0.9110974862047824, "grad_norm": 0.6824811199142817, "learning_rate": 4.939172749391728e-07, "loss": 0.5474, "step": 31206 }, { "epoch": 0.9111266824326297, "grad_norm": 0.6880117327567018, "learning_rate": 4.937550689375507e-07, "loss": 0.5635, "step": 31207 }, { "epoch": 0.9111558786604771, "grad_norm": 0.8084992736427411, "learning_rate": 4.935928629359286e-07, "loss": 0.7212, "step": 31208 }, { "epoch": 0.9111850748883245, "grad_norm": 0.6985362442048942, "learning_rate": 4.934306569343066e-07, "loss": 0.6057, "step": 31209 }, { "epoch": 0.9112142711161718, "grad_norm": 0.6834279954190001, "learning_rate": 4.932684509326845e-07, "loss": 0.5916, "step": 31210 }, { "epoch": 0.9112434673440192, "grad_norm": 0.7291508239732731, "learning_rate": 4.931062449310625e-07, "loss": 0.651, "step": 31211 }, { "epoch": 0.9112726635718665, "grad_norm": 0.7695148361973821, "learning_rate": 4.929440389294404e-07, "loss": 0.7012, "step": 31212 }, { "epoch": 0.9113018597997139, "grad_norm": 0.7654210392881611, "learning_rate": 4.927818329278183e-07, "loss": 0.6619, "step": 31213 }, { "epoch": 0.9113310560275613, "grad_norm": 0.773948685860646, "learning_rate": 4.926196269261963e-07, "loss": 0.7036, "step": 31214 }, { "epoch": 0.9113602522554086, "grad_norm": 0.7256819876802854, "learning_rate": 4.924574209245742e-07, "loss": 0.6172, "step": 31215 }, { "epoch": 0.911389448483256, "grad_norm": 0.7760184355223942, "learning_rate": 4.922952149229522e-07, "loss": 0.719, "step": 31216 }, { "epoch": 0.9114186447111033, "grad_norm": 0.6897603337326124, "learning_rate": 4.921330089213301e-07, "loss": 0.6018, "step": 31217 }, { "epoch": 0.9114478409389507, "grad_norm": 0.8167207323652703, "learning_rate": 4.919708029197081e-07, "loss": 0.7649, "step": 31218 }, { "epoch": 0.9114770371667981, "grad_norm": 0.6818718837040328, "learning_rate": 4.91808596918086e-07, "loss": 0.5879, "step": 31219 }, { "epoch": 0.9115062333946454, "grad_norm": 0.6951818697818227, "learning_rate": 4.916463909164639e-07, "loss": 0.5938, "step": 31220 }, { "epoch": 0.9115354296224928, "grad_norm": 0.7066321156274213, "learning_rate": 4.914841849148419e-07, "loss": 0.5834, "step": 31221 }, { "epoch": 0.9115646258503401, "grad_norm": 0.6717237214665507, "learning_rate": 4.913219789132198e-07, "loss": 0.5924, "step": 31222 }, { "epoch": 0.9115938220781875, "grad_norm": 0.7200060706043322, "learning_rate": 4.911597729115977e-07, "loss": 0.6785, "step": 31223 }, { "epoch": 0.9116230183060349, "grad_norm": 0.7007968046658274, "learning_rate": 4.909975669099756e-07, "loss": 0.647, "step": 31224 }, { "epoch": 0.9116522145338822, "grad_norm": 0.9378660579484084, "learning_rate": 4.908353609083537e-07, "loss": 0.6499, "step": 31225 }, { "epoch": 0.9116814107617296, "grad_norm": 0.7699574371131005, "learning_rate": 4.906731549067316e-07, "loss": 0.7013, "step": 31226 }, { "epoch": 0.911710606989577, "grad_norm": 0.6899353773481912, "learning_rate": 4.905109489051095e-07, "loss": 0.5646, "step": 31227 }, { "epoch": 0.9117398032174243, "grad_norm": 0.7419001431350332, "learning_rate": 4.903487429034875e-07, "loss": 0.6717, "step": 31228 }, { "epoch": 0.9117689994452717, "grad_norm": 0.6638724773538316, "learning_rate": 4.901865369018654e-07, "loss": 0.5594, "step": 31229 }, { "epoch": 0.911798195673119, "grad_norm": 0.6919167711541829, "learning_rate": 4.900243309002434e-07, "loss": 0.5316, "step": 31230 }, { "epoch": 0.9118273919009664, "grad_norm": 0.7403742401593004, "learning_rate": 4.898621248986213e-07, "loss": 0.6601, "step": 31231 }, { "epoch": 0.9118565881288138, "grad_norm": 0.6941732403652092, "learning_rate": 4.896999188969993e-07, "loss": 0.5991, "step": 31232 }, { "epoch": 0.9118857843566611, "grad_norm": 0.7201105784428646, "learning_rate": 4.895377128953772e-07, "loss": 0.5998, "step": 31233 }, { "epoch": 0.9119149805845085, "grad_norm": 0.7240130031815175, "learning_rate": 4.893755068937551e-07, "loss": 0.6397, "step": 31234 }, { "epoch": 0.9119441768123558, "grad_norm": 0.7441780172285232, "learning_rate": 4.892133008921331e-07, "loss": 0.563, "step": 31235 }, { "epoch": 0.9119733730402032, "grad_norm": 0.6833002795195787, "learning_rate": 4.89051094890511e-07, "loss": 0.5948, "step": 31236 }, { "epoch": 0.9120025692680506, "grad_norm": 0.9800627426089691, "learning_rate": 4.88888888888889e-07, "loss": 0.7091, "step": 31237 }, { "epoch": 0.9120317654958979, "grad_norm": 0.8072922475153725, "learning_rate": 4.887266828872669e-07, "loss": 0.6732, "step": 31238 }, { "epoch": 0.9120609617237453, "grad_norm": 0.7030437703411152, "learning_rate": 4.885644768856448e-07, "loss": 0.6382, "step": 31239 }, { "epoch": 0.9120901579515927, "grad_norm": 0.8923250009865688, "learning_rate": 4.884022708840228e-07, "loss": 0.6446, "step": 31240 }, { "epoch": 0.91211935417944, "grad_norm": 0.7071792664792064, "learning_rate": 4.882400648824007e-07, "loss": 0.6615, "step": 31241 }, { "epoch": 0.9121485504072874, "grad_norm": 0.73782742778644, "learning_rate": 4.880778588807786e-07, "loss": 0.5783, "step": 31242 }, { "epoch": 0.9121777466351347, "grad_norm": 0.7837004032067665, "learning_rate": 4.879156528791565e-07, "loss": 0.7374, "step": 31243 }, { "epoch": 0.9122069428629821, "grad_norm": 0.7327912762435972, "learning_rate": 4.877534468775345e-07, "loss": 0.5548, "step": 31244 }, { "epoch": 0.9122361390908295, "grad_norm": 0.6486379391794934, "learning_rate": 4.875912408759124e-07, "loss": 0.5174, "step": 31245 }, { "epoch": 0.9122653353186768, "grad_norm": 0.6955290663503279, "learning_rate": 4.874290348742903e-07, "loss": 0.6198, "step": 31246 }, { "epoch": 0.9122945315465242, "grad_norm": 0.7284967743165995, "learning_rate": 4.872668288726683e-07, "loss": 0.6517, "step": 31247 }, { "epoch": 0.9123237277743715, "grad_norm": 0.6939417005643083, "learning_rate": 4.871046228710462e-07, "loss": 0.6067, "step": 31248 }, { "epoch": 0.9123529240022189, "grad_norm": 0.7353453566728935, "learning_rate": 4.869424168694242e-07, "loss": 0.6508, "step": 31249 }, { "epoch": 0.9123821202300663, "grad_norm": 0.7342707294066912, "learning_rate": 4.867802108678021e-07, "loss": 0.6241, "step": 31250 }, { "epoch": 0.9124113164579136, "grad_norm": 0.7915874355913948, "learning_rate": 4.866180048661801e-07, "loss": 0.6847, "step": 31251 }, { "epoch": 0.912440512685761, "grad_norm": 0.7185821311592824, "learning_rate": 4.86455798864558e-07, "loss": 0.6168, "step": 31252 }, { "epoch": 0.9124697089136083, "grad_norm": 0.7674879337362234, "learning_rate": 4.862935928629359e-07, "loss": 0.6862, "step": 31253 }, { "epoch": 0.9124989051414557, "grad_norm": 0.7347531649979495, "learning_rate": 4.86131386861314e-07, "loss": 0.6653, "step": 31254 }, { "epoch": 0.9125281013693031, "grad_norm": 0.7136805919589957, "learning_rate": 4.859691808596918e-07, "loss": 0.6062, "step": 31255 }, { "epoch": 0.9125572975971504, "grad_norm": 0.7502388691988688, "learning_rate": 4.858069748580699e-07, "loss": 0.6952, "step": 31256 }, { "epoch": 0.9125864938249978, "grad_norm": 0.72314260479556, "learning_rate": 4.856447688564478e-07, "loss": 0.6507, "step": 31257 }, { "epoch": 0.9126156900528452, "grad_norm": 0.7562742273054732, "learning_rate": 4.854825628548257e-07, "loss": 0.6236, "step": 31258 }, { "epoch": 0.9126448862806925, "grad_norm": 0.7424933922306091, "learning_rate": 4.853203568532037e-07, "loss": 0.644, "step": 31259 }, { "epoch": 0.9126740825085399, "grad_norm": 0.6435059676838849, "learning_rate": 4.851581508515816e-07, "loss": 0.5113, "step": 31260 }, { "epoch": 0.9127032787363872, "grad_norm": 0.6981222249800901, "learning_rate": 4.849959448499595e-07, "loss": 0.6083, "step": 31261 }, { "epoch": 0.9127324749642346, "grad_norm": 0.7422521424078937, "learning_rate": 4.848337388483374e-07, "loss": 0.6827, "step": 31262 }, { "epoch": 0.912761671192082, "grad_norm": 0.7428383307041782, "learning_rate": 4.846715328467154e-07, "loss": 0.7548, "step": 31263 }, { "epoch": 0.9127908674199293, "grad_norm": 0.72734501036103, "learning_rate": 4.845093268450933e-07, "loss": 0.6337, "step": 31264 }, { "epoch": 0.9128200636477767, "grad_norm": 0.7689048840653415, "learning_rate": 4.843471208434712e-07, "loss": 0.6932, "step": 31265 }, { "epoch": 0.912849259875624, "grad_norm": 0.7017817876200229, "learning_rate": 4.841849148418492e-07, "loss": 0.6483, "step": 31266 }, { "epoch": 0.9128784561034714, "grad_norm": 0.7122459079836639, "learning_rate": 4.840227088402271e-07, "loss": 0.6143, "step": 31267 }, { "epoch": 0.9129076523313188, "grad_norm": 0.7585236714960785, "learning_rate": 4.838605028386051e-07, "loss": 0.652, "step": 31268 }, { "epoch": 0.9129368485591661, "grad_norm": 0.7430895720239267, "learning_rate": 4.83698296836983e-07, "loss": 0.6525, "step": 31269 }, { "epoch": 0.9129660447870135, "grad_norm": 0.6640257654932681, "learning_rate": 4.83536090835361e-07, "loss": 0.5283, "step": 31270 }, { "epoch": 0.9129952410148608, "grad_norm": 0.6782974818005585, "learning_rate": 4.833738848337389e-07, "loss": 0.5472, "step": 31271 }, { "epoch": 0.9130244372427082, "grad_norm": 0.6850747891708749, "learning_rate": 4.832116788321168e-07, "loss": 0.6172, "step": 31272 }, { "epoch": 0.9130536334705556, "grad_norm": 0.7449989832907815, "learning_rate": 4.830494728304948e-07, "loss": 0.6663, "step": 31273 }, { "epoch": 0.9130828296984029, "grad_norm": 0.6601742349806258, "learning_rate": 4.828872668288727e-07, "loss": 0.5488, "step": 31274 }, { "epoch": 0.9131120259262503, "grad_norm": 0.6925671464666603, "learning_rate": 4.827250608272507e-07, "loss": 0.6119, "step": 31275 }, { "epoch": 0.9131412221540977, "grad_norm": 0.6994891049661934, "learning_rate": 4.825628548256286e-07, "loss": 0.6223, "step": 31276 }, { "epoch": 0.913170418381945, "grad_norm": 0.6971672000452861, "learning_rate": 4.824006488240065e-07, "loss": 0.5852, "step": 31277 }, { "epoch": 0.9131996146097924, "grad_norm": 0.7292668109776992, "learning_rate": 4.822384428223844e-07, "loss": 0.6424, "step": 31278 }, { "epoch": 0.9132288108376397, "grad_norm": 0.7065354616430392, "learning_rate": 4.820762368207624e-07, "loss": 0.6057, "step": 31279 }, { "epoch": 0.9132580070654871, "grad_norm": 0.7265380428379826, "learning_rate": 4.819140308191403e-07, "loss": 0.642, "step": 31280 }, { "epoch": 0.9132872032933345, "grad_norm": 0.6577882836966845, "learning_rate": 4.817518248175182e-07, "loss": 0.5075, "step": 31281 }, { "epoch": 0.9133163995211818, "grad_norm": 0.7255518717035292, "learning_rate": 4.815896188158962e-07, "loss": 0.6436, "step": 31282 }, { "epoch": 0.9133455957490292, "grad_norm": 0.7002444863759355, "learning_rate": 4.814274128142741e-07, "loss": 0.6209, "step": 31283 }, { "epoch": 0.9133747919768765, "grad_norm": 0.7163595643720249, "learning_rate": 4.812652068126521e-07, "loss": 0.603, "step": 31284 }, { "epoch": 0.9134039882047239, "grad_norm": 0.6851031276281816, "learning_rate": 4.8110300081103e-07, "loss": 0.5737, "step": 31285 }, { "epoch": 0.9134331844325713, "grad_norm": 0.6992459124682183, "learning_rate": 4.809407948094079e-07, "loss": 0.6216, "step": 31286 }, { "epoch": 0.9134623806604186, "grad_norm": 0.6462492179738725, "learning_rate": 4.80778588807786e-07, "loss": 0.5471, "step": 31287 }, { "epoch": 0.913491576888266, "grad_norm": 0.7456690928831784, "learning_rate": 4.806163828061638e-07, "loss": 0.6776, "step": 31288 }, { "epoch": 0.9135207731161133, "grad_norm": 0.6897143962237097, "learning_rate": 4.804541768045419e-07, "loss": 0.5825, "step": 31289 }, { "epoch": 0.9135499693439607, "grad_norm": 0.6972064825134877, "learning_rate": 4.802919708029198e-07, "loss": 0.6027, "step": 31290 }, { "epoch": 0.9135791655718081, "grad_norm": 0.7624090002552228, "learning_rate": 4.801297648012978e-07, "loss": 0.6729, "step": 31291 }, { "epoch": 0.9136083617996554, "grad_norm": 0.7808178275096825, "learning_rate": 4.799675587996757e-07, "loss": 0.675, "step": 31292 }, { "epoch": 0.9136375580275028, "grad_norm": 0.6864093981886067, "learning_rate": 4.798053527980536e-07, "loss": 0.5887, "step": 31293 }, { "epoch": 0.9136667542553502, "grad_norm": 0.7148298078680356, "learning_rate": 4.796431467964316e-07, "loss": 0.601, "step": 31294 }, { "epoch": 0.9136959504831975, "grad_norm": 0.6699292377915276, "learning_rate": 4.794809407948095e-07, "loss": 0.5348, "step": 31295 }, { "epoch": 0.9137251467110449, "grad_norm": 0.7158057358548098, "learning_rate": 4.793187347931874e-07, "loss": 0.5702, "step": 31296 }, { "epoch": 0.9137543429388923, "grad_norm": 0.7323666502801333, "learning_rate": 4.791565287915653e-07, "loss": 0.6533, "step": 31297 }, { "epoch": 0.9137835391667397, "grad_norm": 0.7120754620401712, "learning_rate": 4.789943227899433e-07, "loss": 0.6677, "step": 31298 }, { "epoch": 0.9138127353945871, "grad_norm": 0.7785600103237601, "learning_rate": 4.788321167883212e-07, "loss": 0.7229, "step": 31299 }, { "epoch": 0.9138419316224344, "grad_norm": 0.7465715151932073, "learning_rate": 4.786699107866991e-07, "loss": 0.6376, "step": 31300 }, { "epoch": 0.9138711278502818, "grad_norm": 0.6760661835905419, "learning_rate": 4.785077047850771e-07, "loss": 0.5552, "step": 31301 }, { "epoch": 0.9139003240781292, "grad_norm": 0.7645139373494576, "learning_rate": 4.78345498783455e-07, "loss": 0.7462, "step": 31302 }, { "epoch": 0.9139295203059765, "grad_norm": 0.7483905131718687, "learning_rate": 4.78183292781833e-07, "loss": 0.6598, "step": 31303 }, { "epoch": 0.9139587165338239, "grad_norm": 0.7034867765648283, "learning_rate": 4.780210867802109e-07, "loss": 0.5886, "step": 31304 }, { "epoch": 0.9139879127616712, "grad_norm": 0.7527189102569767, "learning_rate": 4.778588807785888e-07, "loss": 0.6479, "step": 31305 }, { "epoch": 0.9140171089895186, "grad_norm": 0.6853372535706911, "learning_rate": 4.776966747769668e-07, "loss": 0.5838, "step": 31306 }, { "epoch": 0.914046305217366, "grad_norm": 0.769850552778461, "learning_rate": 4.775344687753447e-07, "loss": 0.6888, "step": 31307 }, { "epoch": 0.9140755014452133, "grad_norm": 0.6886358131729116, "learning_rate": 4.773722627737227e-07, "loss": 0.5945, "step": 31308 }, { "epoch": 0.9141046976730607, "grad_norm": 0.823998523649613, "learning_rate": 4.772100567721006e-07, "loss": 0.6781, "step": 31309 }, { "epoch": 0.914133893900908, "grad_norm": 0.7336023169176452, "learning_rate": 4.770478507704786e-07, "loss": 0.6361, "step": 31310 }, { "epoch": 0.9141630901287554, "grad_norm": 0.7127320895900718, "learning_rate": 4.768856447688565e-07, "loss": 0.6645, "step": 31311 }, { "epoch": 0.9141922863566028, "grad_norm": 0.6979009869970272, "learning_rate": 4.767234387672344e-07, "loss": 0.6461, "step": 31312 }, { "epoch": 0.9142214825844501, "grad_norm": 0.7186335971894325, "learning_rate": 4.765612327656124e-07, "loss": 0.6085, "step": 31313 }, { "epoch": 0.9142506788122975, "grad_norm": 0.7426373854084146, "learning_rate": 4.763990267639903e-07, "loss": 0.7031, "step": 31314 }, { "epoch": 0.9142798750401449, "grad_norm": 0.7571034597434773, "learning_rate": 4.762368207623683e-07, "loss": 0.6514, "step": 31315 }, { "epoch": 0.9143090712679922, "grad_norm": 0.7524862948153874, "learning_rate": 4.760746147607462e-07, "loss": 0.6782, "step": 31316 }, { "epoch": 0.9143382674958396, "grad_norm": 0.7011278108664103, "learning_rate": 4.7591240875912414e-07, "loss": 0.6149, "step": 31317 }, { "epoch": 0.9143674637236869, "grad_norm": 0.669468325261799, "learning_rate": 4.757502027575021e-07, "loss": 0.5345, "step": 31318 }, { "epoch": 0.9143966599515343, "grad_norm": 0.7187458242229533, "learning_rate": 4.7558799675588e-07, "loss": 0.6404, "step": 31319 }, { "epoch": 0.9144258561793817, "grad_norm": 0.6923364868258682, "learning_rate": 4.7542579075425795e-07, "loss": 0.6225, "step": 31320 }, { "epoch": 0.914455052407229, "grad_norm": 0.7106508209828023, "learning_rate": 4.7526358475263585e-07, "loss": 0.6563, "step": 31321 }, { "epoch": 0.9144842486350764, "grad_norm": 0.7214126834542898, "learning_rate": 4.7510137875101386e-07, "loss": 0.5784, "step": 31322 }, { "epoch": 0.9145134448629237, "grad_norm": 0.7419604936224119, "learning_rate": 4.7493917274939176e-07, "loss": 0.6344, "step": 31323 }, { "epoch": 0.9145426410907711, "grad_norm": 0.7027826226098632, "learning_rate": 4.7477696674776976e-07, "loss": 0.6507, "step": 31324 }, { "epoch": 0.9145718373186185, "grad_norm": 0.7163804939898953, "learning_rate": 4.7461476074614767e-07, "loss": 0.5938, "step": 31325 }, { "epoch": 0.9146010335464658, "grad_norm": 0.7193721802314548, "learning_rate": 4.7445255474452557e-07, "loss": 0.6162, "step": 31326 }, { "epoch": 0.9146302297743132, "grad_norm": 0.7227868587208472, "learning_rate": 4.742903487429035e-07, "loss": 0.6617, "step": 31327 }, { "epoch": 0.9146594260021605, "grad_norm": 0.6691033536943535, "learning_rate": 4.741281427412815e-07, "loss": 0.523, "step": 31328 }, { "epoch": 0.9146886222300079, "grad_norm": 0.7527855700332601, "learning_rate": 4.7396593673965943e-07, "loss": 0.644, "step": 31329 }, { "epoch": 0.9147178184578553, "grad_norm": 0.7682142282320188, "learning_rate": 4.7380373073803733e-07, "loss": 0.6619, "step": 31330 }, { "epoch": 0.9147470146857026, "grad_norm": 0.6745797522502055, "learning_rate": 4.7364152473641523e-07, "loss": 0.5698, "step": 31331 }, { "epoch": 0.91477621091355, "grad_norm": 0.7430135066786344, "learning_rate": 4.7347931873479324e-07, "loss": 0.6766, "step": 31332 }, { "epoch": 0.9148054071413974, "grad_norm": 0.7210895007889501, "learning_rate": 4.7331711273317114e-07, "loss": 0.6228, "step": 31333 }, { "epoch": 0.9148346033692447, "grad_norm": 0.679490348967883, "learning_rate": 4.7315490673154914e-07, "loss": 0.5565, "step": 31334 }, { "epoch": 0.9148637995970921, "grad_norm": 0.8066339163356434, "learning_rate": 4.7299270072992705e-07, "loss": 0.7186, "step": 31335 }, { "epoch": 0.9148929958249394, "grad_norm": 0.6727304496988201, "learning_rate": 4.72830494728305e-07, "loss": 0.6021, "step": 31336 }, { "epoch": 0.9149221920527868, "grad_norm": 0.6907807333455139, "learning_rate": 4.726682887266829e-07, "loss": 0.5965, "step": 31337 }, { "epoch": 0.9149513882806342, "grad_norm": 0.708800272498116, "learning_rate": 4.7250608272506085e-07, "loss": 0.5779, "step": 31338 }, { "epoch": 0.9149805845084815, "grad_norm": 0.7270946336086035, "learning_rate": 4.723438767234388e-07, "loss": 0.651, "step": 31339 }, { "epoch": 0.9150097807363289, "grad_norm": 0.7762264801073353, "learning_rate": 4.721816707218167e-07, "loss": 0.7416, "step": 31340 }, { "epoch": 0.9150389769641762, "grad_norm": 0.714250644133111, "learning_rate": 4.720194647201947e-07, "loss": 0.6141, "step": 31341 }, { "epoch": 0.9150681731920236, "grad_norm": 0.7455684012167768, "learning_rate": 4.718572587185726e-07, "loss": 0.6818, "step": 31342 }, { "epoch": 0.915097369419871, "grad_norm": 0.7781744555888399, "learning_rate": 4.716950527169506e-07, "loss": 0.7084, "step": 31343 }, { "epoch": 0.9151265656477183, "grad_norm": 0.7545599043012791, "learning_rate": 4.715328467153285e-07, "loss": 0.6821, "step": 31344 }, { "epoch": 0.9151557618755657, "grad_norm": 0.7097426923461243, "learning_rate": 4.713706407137064e-07, "loss": 0.6132, "step": 31345 }, { "epoch": 0.915184958103413, "grad_norm": 0.7088417336788359, "learning_rate": 4.712084347120844e-07, "loss": 0.6232, "step": 31346 }, { "epoch": 0.9152141543312604, "grad_norm": 0.6974873517489465, "learning_rate": 4.7104622871046233e-07, "loss": 0.6199, "step": 31347 }, { "epoch": 0.9152433505591078, "grad_norm": 0.7220439370929993, "learning_rate": 4.708840227088403e-07, "loss": 0.6157, "step": 31348 }, { "epoch": 0.9152725467869551, "grad_norm": 0.8008486960020895, "learning_rate": 4.707218167072182e-07, "loss": 0.6775, "step": 31349 }, { "epoch": 0.9153017430148025, "grad_norm": 0.6667005707869589, "learning_rate": 4.705596107055962e-07, "loss": 0.519, "step": 31350 }, { "epoch": 0.9153309392426499, "grad_norm": 0.7575391757540905, "learning_rate": 4.703974047039741e-07, "loss": 0.6058, "step": 31351 }, { "epoch": 0.9153601354704972, "grad_norm": 0.7051844139506932, "learning_rate": 4.70235198702352e-07, "loss": 0.5637, "step": 31352 }, { "epoch": 0.9153893316983446, "grad_norm": 0.6930406974086663, "learning_rate": 4.7007299270073e-07, "loss": 0.5499, "step": 31353 }, { "epoch": 0.9154185279261919, "grad_norm": 0.7173250680344484, "learning_rate": 4.699107866991079e-07, "loss": 0.5997, "step": 31354 }, { "epoch": 0.9154477241540393, "grad_norm": 0.6854818321003894, "learning_rate": 4.6974858069748586e-07, "loss": 0.5507, "step": 31355 }, { "epoch": 0.9154769203818867, "grad_norm": 0.665875126871567, "learning_rate": 4.6958637469586376e-07, "loss": 0.5215, "step": 31356 }, { "epoch": 0.915506116609734, "grad_norm": 0.7731927112073657, "learning_rate": 4.694241686942417e-07, "loss": 0.7597, "step": 31357 }, { "epoch": 0.9155353128375814, "grad_norm": 0.7328610099971017, "learning_rate": 4.6926196269261967e-07, "loss": 0.6716, "step": 31358 }, { "epoch": 0.9155645090654287, "grad_norm": 0.7235823394068513, "learning_rate": 4.6909975669099757e-07, "loss": 0.6534, "step": 31359 }, { "epoch": 0.9155937052932761, "grad_norm": 1.0959937540711955, "learning_rate": 4.689375506893756e-07, "loss": 0.6244, "step": 31360 }, { "epoch": 0.9156229015211235, "grad_norm": 0.7080490069078063, "learning_rate": 4.687753446877535e-07, "loss": 0.5864, "step": 31361 }, { "epoch": 0.9156520977489708, "grad_norm": 0.7826113278063244, "learning_rate": 4.6861313868613143e-07, "loss": 0.6795, "step": 31362 }, { "epoch": 0.9156812939768182, "grad_norm": 0.795117115863185, "learning_rate": 4.684509326845094e-07, "loss": 0.755, "step": 31363 }, { "epoch": 0.9157104902046656, "grad_norm": 0.7271358251879753, "learning_rate": 4.682887266828873e-07, "loss": 0.624, "step": 31364 }, { "epoch": 0.9157396864325129, "grad_norm": 0.789738465666606, "learning_rate": 4.6812652068126524e-07, "loss": 0.7716, "step": 31365 }, { "epoch": 0.9157688826603603, "grad_norm": 0.6381901249675169, "learning_rate": 4.6796431467964314e-07, "loss": 0.4983, "step": 31366 }, { "epoch": 0.9157980788882076, "grad_norm": 0.7454474943311091, "learning_rate": 4.6780210867802114e-07, "loss": 0.686, "step": 31367 }, { "epoch": 0.915827275116055, "grad_norm": 0.7288061157582537, "learning_rate": 4.6763990267639905e-07, "loss": 0.7025, "step": 31368 }, { "epoch": 0.9158564713439024, "grad_norm": 0.7345676471643987, "learning_rate": 4.6747769667477705e-07, "loss": 0.5961, "step": 31369 }, { "epoch": 0.9158856675717497, "grad_norm": 0.6468843051634023, "learning_rate": 4.6731549067315495e-07, "loss": 0.5342, "step": 31370 }, { "epoch": 0.9159148637995971, "grad_norm": 0.7219812161968208, "learning_rate": 4.6715328467153285e-07, "loss": 0.6753, "step": 31371 }, { "epoch": 0.9159440600274444, "grad_norm": 0.7522196201051058, "learning_rate": 4.6699107866991086e-07, "loss": 0.6232, "step": 31372 }, { "epoch": 0.9159732562552918, "grad_norm": 0.7312439844487162, "learning_rate": 4.6682887266828876e-07, "loss": 0.6629, "step": 31373 }, { "epoch": 0.9160024524831392, "grad_norm": 0.6649088634927678, "learning_rate": 4.666666666666667e-07, "loss": 0.5596, "step": 31374 }, { "epoch": 0.9160316487109865, "grad_norm": 0.6761320804157185, "learning_rate": 4.665044606650446e-07, "loss": 0.5889, "step": 31375 }, { "epoch": 0.9160608449388339, "grad_norm": 0.7738648597424227, "learning_rate": 4.663422546634226e-07, "loss": 0.6939, "step": 31376 }, { "epoch": 0.9160900411666812, "grad_norm": 0.6817535232724113, "learning_rate": 4.661800486618005e-07, "loss": 0.5824, "step": 31377 }, { "epoch": 0.9161192373945286, "grad_norm": 0.747950582365063, "learning_rate": 4.660178426601784e-07, "loss": 0.6998, "step": 31378 }, { "epoch": 0.916148433622376, "grad_norm": 0.7003633039634907, "learning_rate": 4.6585563665855643e-07, "loss": 0.632, "step": 31379 }, { "epoch": 0.9161776298502233, "grad_norm": 0.7153560971208346, "learning_rate": 4.6569343065693433e-07, "loss": 0.6458, "step": 31380 }, { "epoch": 0.9162068260780707, "grad_norm": 0.7201880896387692, "learning_rate": 4.655312246553123e-07, "loss": 0.6322, "step": 31381 }, { "epoch": 0.916236022305918, "grad_norm": 0.7640710513170003, "learning_rate": 4.6536901865369024e-07, "loss": 0.6395, "step": 31382 }, { "epoch": 0.9162652185337654, "grad_norm": 0.6792220364796977, "learning_rate": 4.652068126520682e-07, "loss": 0.6118, "step": 31383 }, { "epoch": 0.9162944147616128, "grad_norm": 0.7128803269252123, "learning_rate": 4.650446066504461e-07, "loss": 0.6386, "step": 31384 }, { "epoch": 0.9163236109894601, "grad_norm": 0.7210448749286442, "learning_rate": 4.64882400648824e-07, "loss": 0.6224, "step": 31385 }, { "epoch": 0.9163528072173075, "grad_norm": 0.7459634904838979, "learning_rate": 4.64720194647202e-07, "loss": 0.6945, "step": 31386 }, { "epoch": 0.9163820034451549, "grad_norm": 0.7143607271840721, "learning_rate": 4.645579886455799e-07, "loss": 0.5965, "step": 31387 }, { "epoch": 0.9164111996730022, "grad_norm": 0.7130035140391943, "learning_rate": 4.643957826439579e-07, "loss": 0.5797, "step": 31388 }, { "epoch": 0.9164403959008496, "grad_norm": 0.7431366841476398, "learning_rate": 4.642335766423358e-07, "loss": 0.6874, "step": 31389 }, { "epoch": 0.9164695921286969, "grad_norm": 0.6938282332581405, "learning_rate": 4.640713706407137e-07, "loss": 0.6078, "step": 31390 }, { "epoch": 0.9164987883565443, "grad_norm": 0.7104752570548707, "learning_rate": 4.6390916463909167e-07, "loss": 0.5814, "step": 31391 }, { "epoch": 0.9165279845843917, "grad_norm": 0.7538635852076396, "learning_rate": 4.637469586374696e-07, "loss": 0.6644, "step": 31392 }, { "epoch": 0.916557180812239, "grad_norm": 0.6945156618103286, "learning_rate": 4.635847526358476e-07, "loss": 0.5611, "step": 31393 }, { "epoch": 0.9165863770400864, "grad_norm": 0.7426809803758582, "learning_rate": 4.634225466342255e-07, "loss": 0.6486, "step": 31394 }, { "epoch": 0.9166155732679337, "grad_norm": 0.7311184589788344, "learning_rate": 4.632603406326035e-07, "loss": 0.6377, "step": 31395 }, { "epoch": 0.9166447694957811, "grad_norm": 0.7231779483301516, "learning_rate": 4.630981346309814e-07, "loss": 0.6419, "step": 31396 }, { "epoch": 0.9166739657236285, "grad_norm": 0.7283155241864651, "learning_rate": 4.629359286293593e-07, "loss": 0.671, "step": 31397 }, { "epoch": 0.9167031619514758, "grad_norm": 0.7108619802901454, "learning_rate": 4.627737226277373e-07, "loss": 0.6141, "step": 31398 }, { "epoch": 0.9167323581793232, "grad_norm": 1.0204673526050718, "learning_rate": 4.626115166261152e-07, "loss": 0.664, "step": 31399 }, { "epoch": 0.9167615544071706, "grad_norm": 0.6595452715729845, "learning_rate": 4.6244931062449315e-07, "loss": 0.5476, "step": 31400 }, { "epoch": 0.9167907506350179, "grad_norm": 0.770717257891344, "learning_rate": 4.622871046228711e-07, "loss": 0.6711, "step": 31401 }, { "epoch": 0.9168199468628653, "grad_norm": 0.6932490370026201, "learning_rate": 4.6212489862124905e-07, "loss": 0.637, "step": 31402 }, { "epoch": 0.9168491430907126, "grad_norm": 0.6839896034312385, "learning_rate": 4.6196269261962695e-07, "loss": 0.6137, "step": 31403 }, { "epoch": 0.91687833931856, "grad_norm": 0.7542340864474085, "learning_rate": 4.6180048661800486e-07, "loss": 0.7286, "step": 31404 }, { "epoch": 0.9169075355464074, "grad_norm": 0.7392247811690196, "learning_rate": 4.6163828061638286e-07, "loss": 0.65, "step": 31405 }, { "epoch": 0.9169367317742547, "grad_norm": 0.7901870752240107, "learning_rate": 4.6147607461476076e-07, "loss": 0.7544, "step": 31406 }, { "epoch": 0.9169659280021021, "grad_norm": 0.6940635085138376, "learning_rate": 4.6131386861313877e-07, "loss": 0.6008, "step": 31407 }, { "epoch": 0.9169951242299494, "grad_norm": 0.7419205068912764, "learning_rate": 4.6115166261151667e-07, "loss": 0.6752, "step": 31408 }, { "epoch": 0.9170243204577968, "grad_norm": 0.6542130705736564, "learning_rate": 4.609894566098946e-07, "loss": 0.5126, "step": 31409 }, { "epoch": 0.9170535166856442, "grad_norm": 0.6835749854524896, "learning_rate": 4.608272506082725e-07, "loss": 0.5554, "step": 31410 }, { "epoch": 0.9170827129134915, "grad_norm": 0.6899931113988326, "learning_rate": 4.606650446066505e-07, "loss": 0.5746, "step": 31411 }, { "epoch": 0.9171119091413389, "grad_norm": 0.7273387421431635, "learning_rate": 4.6050283860502843e-07, "loss": 0.6465, "step": 31412 }, { "epoch": 0.9171411053691862, "grad_norm": 0.7509662925084211, "learning_rate": 4.6034063260340633e-07, "loss": 0.6366, "step": 31413 }, { "epoch": 0.9171703015970336, "grad_norm": 0.7401574446206729, "learning_rate": 4.6017842660178434e-07, "loss": 0.639, "step": 31414 }, { "epoch": 0.917199497824881, "grad_norm": 0.7891732812335767, "learning_rate": 4.6001622060016224e-07, "loss": 0.6341, "step": 31415 }, { "epoch": 0.9172286940527283, "grad_norm": 0.7041359781513105, "learning_rate": 4.5985401459854014e-07, "loss": 0.6024, "step": 31416 }, { "epoch": 0.9172578902805758, "grad_norm": 0.7796553767740668, "learning_rate": 4.5969180859691815e-07, "loss": 0.6683, "step": 31417 }, { "epoch": 0.9172870865084232, "grad_norm": 0.6977105617427, "learning_rate": 4.5952960259529605e-07, "loss": 0.5795, "step": 31418 }, { "epoch": 0.9173162827362705, "grad_norm": 0.7831634125745863, "learning_rate": 4.59367396593674e-07, "loss": 0.747, "step": 31419 }, { "epoch": 0.9173454789641179, "grad_norm": 0.6539170842633255, "learning_rate": 4.592051905920519e-07, "loss": 0.4853, "step": 31420 }, { "epoch": 0.9173746751919652, "grad_norm": 0.6986042881731216, "learning_rate": 4.590429845904299e-07, "loss": 0.6247, "step": 31421 }, { "epoch": 0.9174038714198126, "grad_norm": 0.6830651064248053, "learning_rate": 4.588807785888078e-07, "loss": 0.5993, "step": 31422 }, { "epoch": 0.91743306764766, "grad_norm": 0.7634591996413304, "learning_rate": 4.587185725871857e-07, "loss": 0.7094, "step": 31423 }, { "epoch": 0.9174622638755073, "grad_norm": 0.7536173245549974, "learning_rate": 4.585563665855637e-07, "loss": 0.6812, "step": 31424 }, { "epoch": 0.9174914601033547, "grad_norm": 0.7342238269843395, "learning_rate": 4.583941605839416e-07, "loss": 0.6264, "step": 31425 }, { "epoch": 0.9175206563312021, "grad_norm": 1.2061159317955292, "learning_rate": 4.582319545823196e-07, "loss": 0.7208, "step": 31426 }, { "epoch": 0.9175498525590494, "grad_norm": 0.7717769673627928, "learning_rate": 4.5806974858069753e-07, "loss": 0.6609, "step": 31427 }, { "epoch": 0.9175790487868968, "grad_norm": 0.7226836995831644, "learning_rate": 4.579075425790755e-07, "loss": 0.6556, "step": 31428 }, { "epoch": 0.9176082450147441, "grad_norm": 0.6703669737597038, "learning_rate": 4.577453365774534e-07, "loss": 0.5779, "step": 31429 }, { "epoch": 0.9176374412425915, "grad_norm": 0.8521662853809477, "learning_rate": 4.5758313057583134e-07, "loss": 0.6874, "step": 31430 }, { "epoch": 0.9176666374704389, "grad_norm": 0.6603780615531568, "learning_rate": 4.574209245742093e-07, "loss": 0.5787, "step": 31431 }, { "epoch": 0.9176958336982862, "grad_norm": 0.651838632143585, "learning_rate": 4.572587185725872e-07, "loss": 0.5549, "step": 31432 }, { "epoch": 0.9177250299261336, "grad_norm": 0.744228469792055, "learning_rate": 4.570965125709652e-07, "loss": 0.6758, "step": 31433 }, { "epoch": 0.917754226153981, "grad_norm": 0.7128620168912144, "learning_rate": 4.569343065693431e-07, "loss": 0.5929, "step": 31434 }, { "epoch": 0.9177834223818283, "grad_norm": 0.679209229972882, "learning_rate": 4.5677210056772105e-07, "loss": 0.5513, "step": 31435 }, { "epoch": 0.9178126186096757, "grad_norm": 0.691777602923107, "learning_rate": 4.56609894566099e-07, "loss": 0.6308, "step": 31436 }, { "epoch": 0.917841814837523, "grad_norm": 0.6982459231508638, "learning_rate": 4.564476885644769e-07, "loss": 0.6132, "step": 31437 }, { "epoch": 0.9178710110653704, "grad_norm": 0.6744768836657857, "learning_rate": 4.5628548256285486e-07, "loss": 0.6041, "step": 31438 }, { "epoch": 0.9179002072932178, "grad_norm": 0.7106630992216486, "learning_rate": 4.5612327656123276e-07, "loss": 0.6228, "step": 31439 }, { "epoch": 0.9179294035210651, "grad_norm": 0.6744603910366297, "learning_rate": 4.5596107055961077e-07, "loss": 0.5462, "step": 31440 }, { "epoch": 0.9179585997489125, "grad_norm": 0.7369506376864597, "learning_rate": 4.5579886455798867e-07, "loss": 0.6937, "step": 31441 }, { "epoch": 0.9179877959767598, "grad_norm": 0.7384996627124035, "learning_rate": 4.556366585563667e-07, "loss": 0.6706, "step": 31442 }, { "epoch": 0.9180169922046072, "grad_norm": 0.7690902241459605, "learning_rate": 4.554744525547446e-07, "loss": 0.681, "step": 31443 }, { "epoch": 0.9180461884324546, "grad_norm": 0.7737435627984282, "learning_rate": 4.553122465531225e-07, "loss": 0.64, "step": 31444 }, { "epoch": 0.9180753846603019, "grad_norm": 0.8332176371899996, "learning_rate": 4.5515004055150043e-07, "loss": 0.6585, "step": 31445 }, { "epoch": 0.9181045808881493, "grad_norm": 0.6788803318877658, "learning_rate": 4.549878345498784e-07, "loss": 0.5856, "step": 31446 }, { "epoch": 0.9181337771159966, "grad_norm": 0.7474902955875101, "learning_rate": 4.5482562854825634e-07, "loss": 0.643, "step": 31447 }, { "epoch": 0.918162973343844, "grad_norm": 0.729409081537663, "learning_rate": 4.5466342254663424e-07, "loss": 0.6656, "step": 31448 }, { "epoch": 0.9181921695716914, "grad_norm": 0.6684774672294101, "learning_rate": 4.5450121654501214e-07, "loss": 0.5576, "step": 31449 }, { "epoch": 0.9182213657995387, "grad_norm": 0.7399798098953839, "learning_rate": 4.5433901054339015e-07, "loss": 0.6282, "step": 31450 }, { "epoch": 0.9182505620273861, "grad_norm": 0.6567047270912233, "learning_rate": 4.5417680454176805e-07, "loss": 0.5291, "step": 31451 }, { "epoch": 0.9182797582552334, "grad_norm": 0.7026964871233712, "learning_rate": 4.5401459854014606e-07, "loss": 0.6163, "step": 31452 }, { "epoch": 0.9183089544830808, "grad_norm": 0.790858914190662, "learning_rate": 4.5385239253852396e-07, "loss": 0.7511, "step": 31453 }, { "epoch": 0.9183381507109282, "grad_norm": 0.7367290752313481, "learning_rate": 4.536901865369019e-07, "loss": 0.6845, "step": 31454 }, { "epoch": 0.9183673469387755, "grad_norm": 0.7148425426291215, "learning_rate": 4.5352798053527987e-07, "loss": 0.6127, "step": 31455 }, { "epoch": 0.9183965431666229, "grad_norm": 0.6802622888554305, "learning_rate": 4.5336577453365777e-07, "loss": 0.4905, "step": 31456 }, { "epoch": 0.9184257393944703, "grad_norm": 0.7775739345893231, "learning_rate": 4.532035685320357e-07, "loss": 0.7481, "step": 31457 }, { "epoch": 0.9184549356223176, "grad_norm": 0.8024709192450419, "learning_rate": 4.530413625304136e-07, "loss": 0.7616, "step": 31458 }, { "epoch": 0.918484131850165, "grad_norm": 0.7055232823076887, "learning_rate": 4.5287915652879163e-07, "loss": 0.6068, "step": 31459 }, { "epoch": 0.9185133280780123, "grad_norm": 0.7880696561707762, "learning_rate": 4.5271695052716953e-07, "loss": 0.645, "step": 31460 }, { "epoch": 0.9185425243058597, "grad_norm": 0.7339674203370466, "learning_rate": 4.5255474452554754e-07, "loss": 0.6594, "step": 31461 }, { "epoch": 0.9185717205337071, "grad_norm": 0.7278213419689883, "learning_rate": 4.5239253852392544e-07, "loss": 0.6191, "step": 31462 }, { "epoch": 0.9186009167615544, "grad_norm": 0.7680781778240868, "learning_rate": 4.5223033252230334e-07, "loss": 0.7064, "step": 31463 }, { "epoch": 0.9186301129894018, "grad_norm": 0.6811325778233167, "learning_rate": 4.520681265206813e-07, "loss": 0.5594, "step": 31464 }, { "epoch": 0.9186593092172491, "grad_norm": 0.6923447693193279, "learning_rate": 4.5190592051905925e-07, "loss": 0.6089, "step": 31465 }, { "epoch": 0.9186885054450965, "grad_norm": 0.7761118786859985, "learning_rate": 4.517437145174372e-07, "loss": 0.6564, "step": 31466 }, { "epoch": 0.9187177016729439, "grad_norm": 0.711051326995564, "learning_rate": 4.515815085158151e-07, "loss": 0.6264, "step": 31467 }, { "epoch": 0.9187468979007912, "grad_norm": 0.6840835969654728, "learning_rate": 4.514193025141931e-07, "loss": 0.556, "step": 31468 }, { "epoch": 0.9187760941286386, "grad_norm": 0.7429561582582249, "learning_rate": 4.51257096512571e-07, "loss": 0.6581, "step": 31469 }, { "epoch": 0.918805290356486, "grad_norm": 0.7018854711602763, "learning_rate": 4.510948905109489e-07, "loss": 0.6381, "step": 31470 }, { "epoch": 0.9188344865843333, "grad_norm": 0.6882140728202307, "learning_rate": 4.509326845093269e-07, "loss": 0.5767, "step": 31471 }, { "epoch": 0.9188636828121807, "grad_norm": 0.6818600437642547, "learning_rate": 4.507704785077048e-07, "loss": 0.6278, "step": 31472 }, { "epoch": 0.918892879040028, "grad_norm": 0.7985369793265573, "learning_rate": 4.5060827250608277e-07, "loss": 0.711, "step": 31473 }, { "epoch": 0.9189220752678754, "grad_norm": 0.6945860693589062, "learning_rate": 4.5044606650446067e-07, "loss": 0.6132, "step": 31474 }, { "epoch": 0.9189512714957228, "grad_norm": 0.7107768213599104, "learning_rate": 4.5028386050283863e-07, "loss": 0.6294, "step": 31475 }, { "epoch": 0.9189804677235701, "grad_norm": 0.714793981311102, "learning_rate": 4.501216545012166e-07, "loss": 0.6638, "step": 31476 }, { "epoch": 0.9190096639514175, "grad_norm": 0.903169058344867, "learning_rate": 4.499594484995945e-07, "loss": 0.6604, "step": 31477 }, { "epoch": 0.9190388601792648, "grad_norm": 0.6718316020522723, "learning_rate": 4.497972424979725e-07, "loss": 0.5754, "step": 31478 }, { "epoch": 0.9190680564071122, "grad_norm": 0.683426401663349, "learning_rate": 4.496350364963504e-07, "loss": 0.5732, "step": 31479 }, { "epoch": 0.9190972526349596, "grad_norm": 0.7030778152173831, "learning_rate": 4.4947283049472834e-07, "loss": 0.6, "step": 31480 }, { "epoch": 0.9191264488628069, "grad_norm": 0.7149101135355557, "learning_rate": 4.493106244931063e-07, "loss": 0.627, "step": 31481 }, { "epoch": 0.9191556450906543, "grad_norm": 0.6717901614477988, "learning_rate": 4.491484184914842e-07, "loss": 0.5454, "step": 31482 }, { "epoch": 0.9191848413185016, "grad_norm": 0.8489074923133508, "learning_rate": 4.4898621248986215e-07, "loss": 0.729, "step": 31483 }, { "epoch": 0.919214037546349, "grad_norm": 0.7189385665172215, "learning_rate": 4.488240064882401e-07, "loss": 0.6244, "step": 31484 }, { "epoch": 0.9192432337741964, "grad_norm": 0.7453845481643272, "learning_rate": 4.4866180048661806e-07, "loss": 0.6561, "step": 31485 }, { "epoch": 0.9192724300020437, "grad_norm": 0.7479617852892366, "learning_rate": 4.4849959448499596e-07, "loss": 0.6968, "step": 31486 }, { "epoch": 0.9193016262298911, "grad_norm": 0.6635881395799607, "learning_rate": 4.4833738848337397e-07, "loss": 0.5468, "step": 31487 }, { "epoch": 0.9193308224577385, "grad_norm": 0.7081824229859602, "learning_rate": 4.4817518248175187e-07, "loss": 0.6273, "step": 31488 }, { "epoch": 0.9193600186855858, "grad_norm": 0.7584839901623728, "learning_rate": 4.4801297648012977e-07, "loss": 0.6888, "step": 31489 }, { "epoch": 0.9193892149134332, "grad_norm": 0.7690886209227057, "learning_rate": 4.478507704785078e-07, "loss": 0.6894, "step": 31490 }, { "epoch": 0.9194184111412805, "grad_norm": 0.7219753356441553, "learning_rate": 4.476885644768857e-07, "loss": 0.6376, "step": 31491 }, { "epoch": 0.9194476073691279, "grad_norm": 0.6604036728633744, "learning_rate": 4.4752635847526363e-07, "loss": 0.549, "step": 31492 }, { "epoch": 0.9194768035969753, "grad_norm": 0.7285059166806244, "learning_rate": 4.4736415247364153e-07, "loss": 0.5798, "step": 31493 }, { "epoch": 0.9195059998248226, "grad_norm": 0.6750659887133724, "learning_rate": 4.4720194647201954e-07, "loss": 0.5789, "step": 31494 }, { "epoch": 0.91953519605267, "grad_norm": 0.7311003953912308, "learning_rate": 4.4703974047039744e-07, "loss": 0.6093, "step": 31495 }, { "epoch": 0.9195643922805173, "grad_norm": 0.7263282196766381, "learning_rate": 4.4687753446877534e-07, "loss": 0.7022, "step": 31496 }, { "epoch": 0.9195935885083647, "grad_norm": 0.7824725873007908, "learning_rate": 4.4671532846715335e-07, "loss": 0.7359, "step": 31497 }, { "epoch": 0.9196227847362121, "grad_norm": 0.7384522805054153, "learning_rate": 4.4655312246553125e-07, "loss": 0.6339, "step": 31498 }, { "epoch": 0.9196519809640594, "grad_norm": 0.6924896355790309, "learning_rate": 4.463909164639092e-07, "loss": 0.6155, "step": 31499 }, { "epoch": 0.9196811771919068, "grad_norm": 0.7671370228580038, "learning_rate": 4.4622871046228716e-07, "loss": 0.667, "step": 31500 }, { "epoch": 0.9197103734197541, "grad_norm": 0.7099286166050577, "learning_rate": 4.460665044606651e-07, "loss": 0.5994, "step": 31501 }, { "epoch": 0.9197395696476015, "grad_norm": 0.760095311698923, "learning_rate": 4.45904298459043e-07, "loss": 0.7417, "step": 31502 }, { "epoch": 0.9197687658754489, "grad_norm": 0.772571020018178, "learning_rate": 4.457420924574209e-07, "loss": 0.6482, "step": 31503 }, { "epoch": 0.9197979621032962, "grad_norm": 0.7829793419046371, "learning_rate": 4.455798864557989e-07, "loss": 0.7179, "step": 31504 }, { "epoch": 0.9198271583311436, "grad_norm": 0.7218005883794759, "learning_rate": 4.454176804541768e-07, "loss": 0.5883, "step": 31505 }, { "epoch": 0.919856354558991, "grad_norm": 0.7356183371115316, "learning_rate": 4.452554744525548e-07, "loss": 0.683, "step": 31506 }, { "epoch": 0.9198855507868383, "grad_norm": 0.7179388359054512, "learning_rate": 4.4509326845093273e-07, "loss": 0.6375, "step": 31507 }, { "epoch": 0.9199147470146857, "grad_norm": 0.7203325528878005, "learning_rate": 4.4493106244931063e-07, "loss": 0.5965, "step": 31508 }, { "epoch": 0.919943943242533, "grad_norm": 0.773368195404892, "learning_rate": 4.447688564476886e-07, "loss": 0.7157, "step": 31509 }, { "epoch": 0.9199731394703804, "grad_norm": 0.8537586728104544, "learning_rate": 4.4460665044606654e-07, "loss": 0.5881, "step": 31510 }, { "epoch": 0.9200023356982278, "grad_norm": 0.7366088436254885, "learning_rate": 4.444444444444445e-07, "loss": 0.6059, "step": 31511 }, { "epoch": 0.9200315319260751, "grad_norm": 0.6989712700273606, "learning_rate": 4.442822384428224e-07, "loss": 0.5998, "step": 31512 }, { "epoch": 0.9200607281539225, "grad_norm": 0.7093204929744389, "learning_rate": 4.441200324412004e-07, "loss": 0.5656, "step": 31513 }, { "epoch": 0.9200899243817698, "grad_norm": 0.7605451242782402, "learning_rate": 4.439578264395783e-07, "loss": 0.6756, "step": 31514 }, { "epoch": 0.9201191206096172, "grad_norm": 0.7186010249308027, "learning_rate": 4.437956204379562e-07, "loss": 0.6341, "step": 31515 }, { "epoch": 0.9201483168374646, "grad_norm": 0.6971366700260465, "learning_rate": 4.436334144363342e-07, "loss": 0.6303, "step": 31516 }, { "epoch": 0.9201775130653119, "grad_norm": 0.7100456476577072, "learning_rate": 4.434712084347121e-07, "loss": 0.6079, "step": 31517 }, { "epoch": 0.9202067092931593, "grad_norm": 0.7153340490418347, "learning_rate": 4.4330900243309006e-07, "loss": 0.6411, "step": 31518 }, { "epoch": 0.9202359055210066, "grad_norm": 0.6910731339539977, "learning_rate": 4.43146796431468e-07, "loss": 0.5298, "step": 31519 }, { "epoch": 0.920265101748854, "grad_norm": 0.730592028698732, "learning_rate": 4.4298459042984597e-07, "loss": 0.625, "step": 31520 }, { "epoch": 0.9202942979767014, "grad_norm": 0.7560852092642109, "learning_rate": 4.4282238442822387e-07, "loss": 0.6717, "step": 31521 }, { "epoch": 0.9203234942045487, "grad_norm": 0.7542781636744536, "learning_rate": 4.4266017842660177e-07, "loss": 0.6977, "step": 31522 }, { "epoch": 0.9203526904323961, "grad_norm": 0.6964337426756866, "learning_rate": 4.424979724249798e-07, "loss": 0.5713, "step": 31523 }, { "epoch": 0.9203818866602435, "grad_norm": 0.6442661087403391, "learning_rate": 4.423357664233577e-07, "loss": 0.557, "step": 31524 }, { "epoch": 0.9204110828880908, "grad_norm": 0.6758501725701621, "learning_rate": 4.421735604217357e-07, "loss": 0.5264, "step": 31525 }, { "epoch": 0.9204402791159382, "grad_norm": 0.7432028703830013, "learning_rate": 4.420113544201136e-07, "loss": 0.6255, "step": 31526 }, { "epoch": 0.9204694753437855, "grad_norm": 0.7012428978067278, "learning_rate": 4.4184914841849154e-07, "loss": 0.6193, "step": 31527 }, { "epoch": 0.9204986715716329, "grad_norm": 0.7148185940189332, "learning_rate": 4.4168694241686944e-07, "loss": 0.6419, "step": 31528 }, { "epoch": 0.9205278677994803, "grad_norm": 0.6908112459059408, "learning_rate": 4.415247364152474e-07, "loss": 0.5564, "step": 31529 }, { "epoch": 0.9205570640273276, "grad_norm": 0.7456339168375803, "learning_rate": 4.4136253041362535e-07, "loss": 0.6578, "step": 31530 }, { "epoch": 0.920586260255175, "grad_norm": 0.7605414086133717, "learning_rate": 4.4120032441200325e-07, "loss": 0.6523, "step": 31531 }, { "epoch": 0.9206154564830223, "grad_norm": 0.7162888911586284, "learning_rate": 4.4103811841038126e-07, "loss": 0.6195, "step": 31532 }, { "epoch": 0.9206446527108697, "grad_norm": 0.7642644307425422, "learning_rate": 4.4087591240875916e-07, "loss": 0.6514, "step": 31533 }, { "epoch": 0.9206738489387171, "grad_norm": 0.7535655206864568, "learning_rate": 4.407137064071371e-07, "loss": 0.7145, "step": 31534 }, { "epoch": 0.9207030451665644, "grad_norm": 0.7479681130528133, "learning_rate": 4.4055150040551506e-07, "loss": 0.6356, "step": 31535 }, { "epoch": 0.9207322413944118, "grad_norm": 0.7506962162856333, "learning_rate": 4.4038929440389296e-07, "loss": 0.6661, "step": 31536 }, { "epoch": 0.9207614376222591, "grad_norm": 0.6659243918677726, "learning_rate": 4.402270884022709e-07, "loss": 0.5689, "step": 31537 }, { "epoch": 0.9207906338501066, "grad_norm": 0.6762382669782089, "learning_rate": 4.4006488240064887e-07, "loss": 0.5824, "step": 31538 }, { "epoch": 0.920819830077954, "grad_norm": 0.7168261417842751, "learning_rate": 4.3990267639902683e-07, "loss": 0.6148, "step": 31539 }, { "epoch": 0.9208490263058013, "grad_norm": 0.6916480238627734, "learning_rate": 4.3974047039740473e-07, "loss": 0.5994, "step": 31540 }, { "epoch": 0.9208782225336487, "grad_norm": 0.8162255586736463, "learning_rate": 4.3957826439578263e-07, "loss": 0.6377, "step": 31541 }, { "epoch": 0.9209074187614961, "grad_norm": 0.7190022729733461, "learning_rate": 4.3941605839416064e-07, "loss": 0.659, "step": 31542 }, { "epoch": 0.9209366149893434, "grad_norm": 0.7999488417832248, "learning_rate": 4.3925385239253854e-07, "loss": 0.6394, "step": 31543 }, { "epoch": 0.9209658112171908, "grad_norm": 0.6994302406914733, "learning_rate": 4.3909164639091654e-07, "loss": 0.5947, "step": 31544 }, { "epoch": 0.9209950074450381, "grad_norm": 0.7407630653101528, "learning_rate": 4.3892944038929444e-07, "loss": 0.6578, "step": 31545 }, { "epoch": 0.9210242036728855, "grad_norm": 0.7614060630547522, "learning_rate": 4.387672343876724e-07, "loss": 0.6975, "step": 31546 }, { "epoch": 0.9210533999007329, "grad_norm": 0.7882618539863337, "learning_rate": 4.386050283860503e-07, "loss": 0.6796, "step": 31547 }, { "epoch": 0.9210825961285802, "grad_norm": 0.8012768950958135, "learning_rate": 4.3844282238442825e-07, "loss": 0.7577, "step": 31548 }, { "epoch": 0.9211117923564276, "grad_norm": 0.7717429034503918, "learning_rate": 4.382806163828062e-07, "loss": 0.7343, "step": 31549 }, { "epoch": 0.921140988584275, "grad_norm": 0.7412890621467827, "learning_rate": 4.381184103811841e-07, "loss": 0.6819, "step": 31550 }, { "epoch": 0.9211701848121223, "grad_norm": 0.7563203292586, "learning_rate": 4.379562043795621e-07, "loss": 0.6292, "step": 31551 }, { "epoch": 0.9211993810399697, "grad_norm": 0.6851303028013938, "learning_rate": 4.3779399837794e-07, "loss": 0.6032, "step": 31552 }, { "epoch": 0.921228577267817, "grad_norm": 0.7091019303712769, "learning_rate": 4.3763179237631797e-07, "loss": 0.6485, "step": 31553 }, { "epoch": 0.9212577734956644, "grad_norm": 0.7599877778884351, "learning_rate": 4.374695863746959e-07, "loss": 0.5904, "step": 31554 }, { "epoch": 0.9212869697235118, "grad_norm": 0.7248096763669076, "learning_rate": 4.373073803730738e-07, "loss": 0.6042, "step": 31555 }, { "epoch": 0.9213161659513591, "grad_norm": 0.778424512504793, "learning_rate": 4.371451743714518e-07, "loss": 0.6842, "step": 31556 }, { "epoch": 0.9213453621792065, "grad_norm": 0.7489442470321533, "learning_rate": 4.369829683698297e-07, "loss": 0.6127, "step": 31557 }, { "epoch": 0.9213745584070538, "grad_norm": 0.7607643878946947, "learning_rate": 4.368207623682077e-07, "loss": 0.7394, "step": 31558 }, { "epoch": 0.9214037546349012, "grad_norm": 0.7535568371409682, "learning_rate": 4.366585563665856e-07, "loss": 0.6837, "step": 31559 }, { "epoch": 0.9214329508627486, "grad_norm": 0.7148969214960421, "learning_rate": 4.364963503649636e-07, "loss": 0.6298, "step": 31560 }, { "epoch": 0.9214621470905959, "grad_norm": 0.7391782639658562, "learning_rate": 4.363341443633415e-07, "loss": 0.6661, "step": 31561 }, { "epoch": 0.9214913433184433, "grad_norm": 0.7342841805570608, "learning_rate": 4.361719383617194e-07, "loss": 0.698, "step": 31562 }, { "epoch": 0.9215205395462907, "grad_norm": 0.6926193297968243, "learning_rate": 4.3600973236009735e-07, "loss": 0.5986, "step": 31563 }, { "epoch": 0.921549735774138, "grad_norm": 0.6690021788330388, "learning_rate": 4.358475263584753e-07, "loss": 0.5389, "step": 31564 }, { "epoch": 0.9215789320019854, "grad_norm": 0.6694489874976024, "learning_rate": 4.3568532035685326e-07, "loss": 0.57, "step": 31565 }, { "epoch": 0.9216081282298327, "grad_norm": 0.7965642309388592, "learning_rate": 4.3552311435523116e-07, "loss": 0.6325, "step": 31566 }, { "epoch": 0.9216373244576801, "grad_norm": 0.7087524964066104, "learning_rate": 4.353609083536091e-07, "loss": 0.6213, "step": 31567 }, { "epoch": 0.9216665206855275, "grad_norm": 0.7324708893135093, "learning_rate": 4.3519870235198706e-07, "loss": 0.6734, "step": 31568 }, { "epoch": 0.9216957169133748, "grad_norm": 0.7307805606055178, "learning_rate": 4.3503649635036497e-07, "loss": 0.6387, "step": 31569 }, { "epoch": 0.9217249131412222, "grad_norm": 0.7424036173471984, "learning_rate": 4.3487429034874297e-07, "loss": 0.659, "step": 31570 }, { "epoch": 0.9217541093690695, "grad_norm": 0.7687209807662491, "learning_rate": 4.347120843471209e-07, "loss": 0.6698, "step": 31571 }, { "epoch": 0.9217833055969169, "grad_norm": 0.6662087729686785, "learning_rate": 4.3454987834549883e-07, "loss": 0.5402, "step": 31572 }, { "epoch": 0.9218125018247643, "grad_norm": 0.7237280601687247, "learning_rate": 4.343876723438768e-07, "loss": 0.6349, "step": 31573 }, { "epoch": 0.9218416980526116, "grad_norm": 0.6868899855560994, "learning_rate": 4.342254663422547e-07, "loss": 0.5519, "step": 31574 }, { "epoch": 0.921870894280459, "grad_norm": 0.7267338490558013, "learning_rate": 4.3406326034063264e-07, "loss": 0.5847, "step": 31575 }, { "epoch": 0.9219000905083063, "grad_norm": 0.7023167446535875, "learning_rate": 4.3390105433901054e-07, "loss": 0.5837, "step": 31576 }, { "epoch": 0.9219292867361537, "grad_norm": 0.8136717686858499, "learning_rate": 4.3373884833738854e-07, "loss": 0.6932, "step": 31577 }, { "epoch": 0.9219584829640011, "grad_norm": 0.68092219377658, "learning_rate": 4.3357664233576644e-07, "loss": 0.5567, "step": 31578 }, { "epoch": 0.9219876791918484, "grad_norm": 0.7478525561265034, "learning_rate": 4.3341443633414445e-07, "loss": 0.6749, "step": 31579 }, { "epoch": 0.9220168754196958, "grad_norm": 0.7874745848400977, "learning_rate": 4.3325223033252235e-07, "loss": 0.7117, "step": 31580 }, { "epoch": 0.9220460716475432, "grad_norm": 0.7376357288161638, "learning_rate": 4.3309002433090025e-07, "loss": 0.6544, "step": 31581 }, { "epoch": 0.9220752678753905, "grad_norm": 0.7079267880287989, "learning_rate": 4.329278183292782e-07, "loss": 0.6621, "step": 31582 }, { "epoch": 0.9221044641032379, "grad_norm": 0.7347321840088105, "learning_rate": 4.3276561232765616e-07, "loss": 0.6046, "step": 31583 }, { "epoch": 0.9221336603310852, "grad_norm": 0.9864628833050909, "learning_rate": 4.326034063260341e-07, "loss": 0.6568, "step": 31584 }, { "epoch": 0.9221628565589326, "grad_norm": 0.7044102768269322, "learning_rate": 4.32441200324412e-07, "loss": 0.6332, "step": 31585 }, { "epoch": 0.92219205278678, "grad_norm": 0.7312775282672319, "learning_rate": 4.3227899432279e-07, "loss": 0.6046, "step": 31586 }, { "epoch": 0.9222212490146273, "grad_norm": 0.710086926957955, "learning_rate": 4.321167883211679e-07, "loss": 0.6134, "step": 31587 }, { "epoch": 0.9222504452424747, "grad_norm": 0.7561818447012129, "learning_rate": 4.319545823195458e-07, "loss": 0.6782, "step": 31588 }, { "epoch": 0.922279641470322, "grad_norm": 0.6912796254078051, "learning_rate": 4.3179237631792383e-07, "loss": 0.5799, "step": 31589 }, { "epoch": 0.9223088376981694, "grad_norm": 0.6805870673609651, "learning_rate": 4.3163017031630173e-07, "loss": 0.6014, "step": 31590 }, { "epoch": 0.9223380339260168, "grad_norm": 0.7388729066675859, "learning_rate": 4.314679643146797e-07, "loss": 0.6274, "step": 31591 }, { "epoch": 0.9223672301538641, "grad_norm": 0.7272549388432015, "learning_rate": 4.313057583130576e-07, "loss": 0.6367, "step": 31592 }, { "epoch": 0.9223964263817115, "grad_norm": 0.7580987335790728, "learning_rate": 4.311435523114356e-07, "loss": 0.5651, "step": 31593 }, { "epoch": 0.9224256226095588, "grad_norm": 0.671538395999353, "learning_rate": 4.309813463098135e-07, "loss": 0.577, "step": 31594 }, { "epoch": 0.9224548188374062, "grad_norm": 0.7337585109683997, "learning_rate": 4.308191403081914e-07, "loss": 0.6643, "step": 31595 }, { "epoch": 0.9224840150652536, "grad_norm": 0.7285374998177406, "learning_rate": 4.306569343065694e-07, "loss": 0.5929, "step": 31596 }, { "epoch": 0.9225132112931009, "grad_norm": 0.70270055674436, "learning_rate": 4.304947283049473e-07, "loss": 0.5729, "step": 31597 }, { "epoch": 0.9225424075209483, "grad_norm": 0.7479136905042586, "learning_rate": 4.303325223033253e-07, "loss": 0.6909, "step": 31598 }, { "epoch": 0.9225716037487957, "grad_norm": 0.7138312107582968, "learning_rate": 4.301703163017032e-07, "loss": 0.5914, "step": 31599 }, { "epoch": 0.922600799976643, "grad_norm": 0.7142787449081809, "learning_rate": 4.300081103000811e-07, "loss": 0.6421, "step": 31600 }, { "epoch": 0.9226299962044904, "grad_norm": 0.691747254222208, "learning_rate": 4.2984590429845907e-07, "loss": 0.6002, "step": 31601 }, { "epoch": 0.9226591924323377, "grad_norm": 0.7336444497360549, "learning_rate": 4.29683698296837e-07, "loss": 0.6573, "step": 31602 }, { "epoch": 0.9226883886601851, "grad_norm": 0.7201971504205159, "learning_rate": 4.29521492295215e-07, "loss": 0.6261, "step": 31603 }, { "epoch": 0.9227175848880325, "grad_norm": 0.7261200534550011, "learning_rate": 4.293592862935929e-07, "loss": 0.6674, "step": 31604 }, { "epoch": 0.9227467811158798, "grad_norm": 0.7390919475173054, "learning_rate": 4.291970802919709e-07, "loss": 0.6039, "step": 31605 }, { "epoch": 0.9227759773437272, "grad_norm": 0.7006120545365859, "learning_rate": 4.290348742903488e-07, "loss": 0.5703, "step": 31606 }, { "epoch": 0.9228051735715745, "grad_norm": 0.7892755656813754, "learning_rate": 4.288726682887267e-07, "loss": 0.7321, "step": 31607 }, { "epoch": 0.9228343697994219, "grad_norm": 0.7323055780131029, "learning_rate": 4.287104622871047e-07, "loss": 0.5368, "step": 31608 }, { "epoch": 0.9228635660272693, "grad_norm": 0.7042094583441055, "learning_rate": 4.285482562854826e-07, "loss": 0.5891, "step": 31609 }, { "epoch": 0.9228927622551166, "grad_norm": 0.6730091474821785, "learning_rate": 4.2838605028386054e-07, "loss": 0.5706, "step": 31610 }, { "epoch": 0.922921958482964, "grad_norm": 0.7503582885195968, "learning_rate": 4.2822384428223845e-07, "loss": 0.634, "step": 31611 }, { "epoch": 0.9229511547108114, "grad_norm": 0.7202702157275919, "learning_rate": 4.2806163828061645e-07, "loss": 0.6638, "step": 31612 }, { "epoch": 0.9229803509386587, "grad_norm": 0.7131719340854705, "learning_rate": 4.2789943227899435e-07, "loss": 0.6339, "step": 31613 }, { "epoch": 0.9230095471665061, "grad_norm": 0.793372200601716, "learning_rate": 4.2773722627737225e-07, "loss": 0.5852, "step": 31614 }, { "epoch": 0.9230387433943534, "grad_norm": 0.6887718225121464, "learning_rate": 4.2757502027575026e-07, "loss": 0.6011, "step": 31615 }, { "epoch": 0.9230679396222008, "grad_norm": 0.787504897103965, "learning_rate": 4.2741281427412816e-07, "loss": 0.6326, "step": 31616 }, { "epoch": 0.9230971358500482, "grad_norm": 0.8097490247191634, "learning_rate": 4.272506082725061e-07, "loss": 0.5814, "step": 31617 }, { "epoch": 0.9231263320778955, "grad_norm": 0.7885491996462438, "learning_rate": 4.2708840227088407e-07, "loss": 0.6934, "step": 31618 }, { "epoch": 0.9231555283057429, "grad_norm": 0.7694599384324163, "learning_rate": 4.26926196269262e-07, "loss": 0.7368, "step": 31619 }, { "epoch": 0.9231847245335902, "grad_norm": 0.7240686546849633, "learning_rate": 4.267639902676399e-07, "loss": 0.6406, "step": 31620 }, { "epoch": 0.9232139207614376, "grad_norm": 0.7086913048143897, "learning_rate": 4.266017842660178e-07, "loss": 0.6126, "step": 31621 }, { "epoch": 0.923243116989285, "grad_norm": 0.7559784540067491, "learning_rate": 4.2643957826439583e-07, "loss": 0.6829, "step": 31622 }, { "epoch": 0.9232723132171323, "grad_norm": 0.9035747036047601, "learning_rate": 4.2627737226277373e-07, "loss": 0.6915, "step": 31623 }, { "epoch": 0.9233015094449797, "grad_norm": 0.6742778625760735, "learning_rate": 4.2611516626115174e-07, "loss": 0.5614, "step": 31624 }, { "epoch": 0.923330705672827, "grad_norm": 0.7085203076806009, "learning_rate": 4.2595296025952964e-07, "loss": 0.6162, "step": 31625 }, { "epoch": 0.9233599019006744, "grad_norm": 0.7422585901349458, "learning_rate": 4.2579075425790754e-07, "loss": 0.6844, "step": 31626 }, { "epoch": 0.9233890981285218, "grad_norm": 0.6988726227776176, "learning_rate": 4.2562854825628555e-07, "loss": 0.6267, "step": 31627 }, { "epoch": 0.9234182943563691, "grad_norm": 0.703164084513998, "learning_rate": 4.2546634225466345e-07, "loss": 0.6319, "step": 31628 }, { "epoch": 0.9234474905842165, "grad_norm": 0.7094355062996363, "learning_rate": 4.253041362530414e-07, "loss": 0.6164, "step": 31629 }, { "epoch": 0.9234766868120639, "grad_norm": 0.7623400206561342, "learning_rate": 4.251419302514193e-07, "loss": 0.6882, "step": 31630 }, { "epoch": 0.9235058830399112, "grad_norm": 0.7327952430340864, "learning_rate": 4.249797242497973e-07, "loss": 0.6728, "step": 31631 }, { "epoch": 0.9235350792677586, "grad_norm": 0.7085038056749466, "learning_rate": 4.248175182481752e-07, "loss": 0.6169, "step": 31632 }, { "epoch": 0.9235642754956059, "grad_norm": 0.7225416336947632, "learning_rate": 4.246553122465531e-07, "loss": 0.6505, "step": 31633 }, { "epoch": 0.9235934717234533, "grad_norm": 0.7131767620093308, "learning_rate": 4.244931062449311e-07, "loss": 0.6114, "step": 31634 }, { "epoch": 0.9236226679513007, "grad_norm": 0.717033055693043, "learning_rate": 4.24330900243309e-07, "loss": 0.6696, "step": 31635 }, { "epoch": 0.923651864179148, "grad_norm": 0.7032330650116458, "learning_rate": 4.24168694241687e-07, "loss": 0.6051, "step": 31636 }, { "epoch": 0.9236810604069954, "grad_norm": 0.794167491590487, "learning_rate": 4.2400648824006493e-07, "loss": 0.7412, "step": 31637 }, { "epoch": 0.9237102566348427, "grad_norm": 0.7156700753268851, "learning_rate": 4.238442822384429e-07, "loss": 0.6456, "step": 31638 }, { "epoch": 0.9237394528626901, "grad_norm": 0.7357236824246856, "learning_rate": 4.236820762368208e-07, "loss": 0.6216, "step": 31639 }, { "epoch": 0.9237686490905375, "grad_norm": 0.7814862088411516, "learning_rate": 4.235198702351987e-07, "loss": 0.6485, "step": 31640 }, { "epoch": 0.9237978453183848, "grad_norm": 0.7063849929533491, "learning_rate": 4.233576642335767e-07, "loss": 0.5722, "step": 31641 }, { "epoch": 0.9238270415462322, "grad_norm": 0.708030024347322, "learning_rate": 4.231954582319546e-07, "loss": 0.611, "step": 31642 }, { "epoch": 0.9238562377740795, "grad_norm": 0.728576260436214, "learning_rate": 4.230332522303326e-07, "loss": 0.6538, "step": 31643 }, { "epoch": 0.9238854340019269, "grad_norm": 0.7047261072570052, "learning_rate": 4.228710462287105e-07, "loss": 0.614, "step": 31644 }, { "epoch": 0.9239146302297743, "grad_norm": 0.762209444407579, "learning_rate": 4.2270884022708845e-07, "loss": 0.6609, "step": 31645 }, { "epoch": 0.9239438264576216, "grad_norm": 0.681955375057833, "learning_rate": 4.2254663422546635e-07, "loss": 0.5514, "step": 31646 }, { "epoch": 0.923973022685469, "grad_norm": 0.7483393249043175, "learning_rate": 4.223844282238443e-07, "loss": 0.6856, "step": 31647 }, { "epoch": 0.9240022189133164, "grad_norm": 0.6997331151204752, "learning_rate": 4.2222222222222226e-07, "loss": 0.6102, "step": 31648 }, { "epoch": 0.9240314151411637, "grad_norm": 0.6646503044592184, "learning_rate": 4.2206001622060016e-07, "loss": 0.5489, "step": 31649 }, { "epoch": 0.9240606113690111, "grad_norm": 0.7183726942402114, "learning_rate": 4.2189781021897817e-07, "loss": 0.589, "step": 31650 }, { "epoch": 0.9240898075968584, "grad_norm": 0.8090497023429176, "learning_rate": 4.2173560421735607e-07, "loss": 0.7097, "step": 31651 }, { "epoch": 0.9241190038247058, "grad_norm": 0.7388197511101016, "learning_rate": 4.21573398215734e-07, "loss": 0.5952, "step": 31652 }, { "epoch": 0.9241482000525532, "grad_norm": 0.7142974307060014, "learning_rate": 4.21411192214112e-07, "loss": 0.6184, "step": 31653 }, { "epoch": 0.9241773962804005, "grad_norm": 0.7404010410762729, "learning_rate": 4.212489862124899e-07, "loss": 0.5799, "step": 31654 }, { "epoch": 0.9242065925082479, "grad_norm": 0.7185257519107954, "learning_rate": 4.2108678021086783e-07, "loss": 0.6894, "step": 31655 }, { "epoch": 0.9242357887360952, "grad_norm": 0.6925338345431707, "learning_rate": 4.209245742092458e-07, "loss": 0.5717, "step": 31656 }, { "epoch": 0.9242649849639426, "grad_norm": 0.6839635517508271, "learning_rate": 4.2076236820762374e-07, "loss": 0.5935, "step": 31657 }, { "epoch": 0.92429418119179, "grad_norm": 0.7987088099262092, "learning_rate": 4.2060016220600164e-07, "loss": 0.6961, "step": 31658 }, { "epoch": 0.9243233774196374, "grad_norm": 0.6965585982172858, "learning_rate": 4.2043795620437954e-07, "loss": 0.6442, "step": 31659 }, { "epoch": 0.9243525736474848, "grad_norm": 0.7604409890655038, "learning_rate": 4.2027575020275755e-07, "loss": 0.6585, "step": 31660 }, { "epoch": 0.9243817698753322, "grad_norm": 0.653990028680453, "learning_rate": 4.2011354420113545e-07, "loss": 0.5721, "step": 31661 }, { "epoch": 0.9244109661031795, "grad_norm": 0.7898441773812531, "learning_rate": 4.1995133819951346e-07, "loss": 0.7706, "step": 31662 }, { "epoch": 0.9244401623310269, "grad_norm": 0.7371022925779854, "learning_rate": 4.1978913219789136e-07, "loss": 0.6359, "step": 31663 }, { "epoch": 0.9244693585588742, "grad_norm": 0.7195713490283054, "learning_rate": 4.196269261962693e-07, "loss": 0.6455, "step": 31664 }, { "epoch": 0.9244985547867216, "grad_norm": 0.718020203332815, "learning_rate": 4.194647201946472e-07, "loss": 0.5981, "step": 31665 }, { "epoch": 0.924527751014569, "grad_norm": 0.7455259295485376, "learning_rate": 4.1930251419302517e-07, "loss": 0.6148, "step": 31666 }, { "epoch": 0.9245569472424163, "grad_norm": 0.6615232419251394, "learning_rate": 4.191403081914031e-07, "loss": 0.5551, "step": 31667 }, { "epoch": 0.9245861434702637, "grad_norm": 0.7526946470678382, "learning_rate": 4.18978102189781e-07, "loss": 0.634, "step": 31668 }, { "epoch": 0.924615339698111, "grad_norm": 0.8392602919424668, "learning_rate": 4.1881589618815903e-07, "loss": 0.603, "step": 31669 }, { "epoch": 0.9246445359259584, "grad_norm": 0.7523484349140538, "learning_rate": 4.1865369018653693e-07, "loss": 0.7023, "step": 31670 }, { "epoch": 0.9246737321538058, "grad_norm": 0.7226845003867098, "learning_rate": 4.184914841849149e-07, "loss": 0.672, "step": 31671 }, { "epoch": 0.9247029283816531, "grad_norm": 0.7163034399310209, "learning_rate": 4.1832927818329284e-07, "loss": 0.6318, "step": 31672 }, { "epoch": 0.9247321246095005, "grad_norm": 0.7239395621346383, "learning_rate": 4.1816707218167074e-07, "loss": 0.5981, "step": 31673 }, { "epoch": 0.9247613208373479, "grad_norm": 0.7198452175216025, "learning_rate": 4.180048661800487e-07, "loss": 0.6252, "step": 31674 }, { "epoch": 0.9247905170651952, "grad_norm": 0.6434058390181938, "learning_rate": 4.178426601784266e-07, "loss": 0.5378, "step": 31675 }, { "epoch": 0.9248197132930426, "grad_norm": 0.7350029112776082, "learning_rate": 4.176804541768046e-07, "loss": 0.6418, "step": 31676 }, { "epoch": 0.9248489095208899, "grad_norm": 0.7323963900009405, "learning_rate": 4.175182481751825e-07, "loss": 0.7003, "step": 31677 }, { "epoch": 0.9248781057487373, "grad_norm": 0.7149234044080794, "learning_rate": 4.173560421735605e-07, "loss": 0.6318, "step": 31678 }, { "epoch": 0.9249073019765847, "grad_norm": 0.7751647676300861, "learning_rate": 4.171938361719384e-07, "loss": 0.6747, "step": 31679 }, { "epoch": 0.924936498204432, "grad_norm": 0.7481895042057717, "learning_rate": 4.170316301703163e-07, "loss": 0.6549, "step": 31680 }, { "epoch": 0.9249656944322794, "grad_norm": 0.7379782277597307, "learning_rate": 4.168694241686943e-07, "loss": 0.6602, "step": 31681 }, { "epoch": 0.9249948906601267, "grad_norm": 0.755936997435903, "learning_rate": 4.167072181670722e-07, "loss": 0.6488, "step": 31682 }, { "epoch": 0.9250240868879741, "grad_norm": 0.7064889461406342, "learning_rate": 4.1654501216545017e-07, "loss": 0.6166, "step": 31683 }, { "epoch": 0.9250532831158215, "grad_norm": 0.6931987791136359, "learning_rate": 4.1638280616382807e-07, "loss": 0.5674, "step": 31684 }, { "epoch": 0.9250824793436688, "grad_norm": 0.6893412665878694, "learning_rate": 4.16220600162206e-07, "loss": 0.5763, "step": 31685 }, { "epoch": 0.9251116755715162, "grad_norm": 0.7474247732265441, "learning_rate": 4.16058394160584e-07, "loss": 0.6732, "step": 31686 }, { "epoch": 0.9251408717993636, "grad_norm": 0.7826340992306493, "learning_rate": 4.158961881589619e-07, "loss": 0.6792, "step": 31687 }, { "epoch": 0.9251700680272109, "grad_norm": 0.7030669053557832, "learning_rate": 4.157339821573399e-07, "loss": 0.5949, "step": 31688 }, { "epoch": 0.9251992642550583, "grad_norm": 0.6858734557254497, "learning_rate": 4.155717761557178e-07, "loss": 0.6158, "step": 31689 }, { "epoch": 0.9252284604829056, "grad_norm": 0.7100049865855379, "learning_rate": 4.1540957015409574e-07, "loss": 0.641, "step": 31690 }, { "epoch": 0.925257656710753, "grad_norm": 0.7645300572070427, "learning_rate": 4.152473641524737e-07, "loss": 0.6338, "step": 31691 }, { "epoch": 0.9252868529386004, "grad_norm": 0.7049447754016694, "learning_rate": 4.150851581508516e-07, "loss": 0.6437, "step": 31692 }, { "epoch": 0.9253160491664477, "grad_norm": 0.7059589388873968, "learning_rate": 4.1492295214922955e-07, "loss": 0.605, "step": 31693 }, { "epoch": 0.9253452453942951, "grad_norm": 0.6815717881666049, "learning_rate": 4.1476074614760745e-07, "loss": 0.5603, "step": 31694 }, { "epoch": 0.9253744416221424, "grad_norm": 0.6787194218102958, "learning_rate": 4.1459854014598546e-07, "loss": 0.5702, "step": 31695 }, { "epoch": 0.9254036378499898, "grad_norm": 0.7141505389669566, "learning_rate": 4.1443633414436336e-07, "loss": 0.6233, "step": 31696 }, { "epoch": 0.9254328340778372, "grad_norm": 0.7531973029189362, "learning_rate": 4.1427412814274137e-07, "loss": 0.6965, "step": 31697 }, { "epoch": 0.9254620303056845, "grad_norm": 0.6936645552579814, "learning_rate": 4.1411192214111927e-07, "loss": 0.5701, "step": 31698 }, { "epoch": 0.9254912265335319, "grad_norm": 0.7261965616428325, "learning_rate": 4.1394971613949717e-07, "loss": 0.6723, "step": 31699 }, { "epoch": 0.9255204227613792, "grad_norm": 0.7774313806846427, "learning_rate": 4.137875101378751e-07, "loss": 0.6875, "step": 31700 }, { "epoch": 0.9255496189892266, "grad_norm": 0.7265153078466539, "learning_rate": 4.136253041362531e-07, "loss": 0.6384, "step": 31701 }, { "epoch": 0.925578815217074, "grad_norm": 0.7103829860175562, "learning_rate": 4.1346309813463103e-07, "loss": 0.603, "step": 31702 }, { "epoch": 0.9256080114449213, "grad_norm": 0.7220338526699228, "learning_rate": 4.1330089213300893e-07, "loss": 0.6347, "step": 31703 }, { "epoch": 0.9256372076727687, "grad_norm": 0.7187920556618042, "learning_rate": 4.1313868613138694e-07, "loss": 0.6348, "step": 31704 }, { "epoch": 0.925666403900616, "grad_norm": 0.7486156547162207, "learning_rate": 4.1297648012976484e-07, "loss": 0.6421, "step": 31705 }, { "epoch": 0.9256956001284634, "grad_norm": 0.7233552724736758, "learning_rate": 4.1281427412814274e-07, "loss": 0.6962, "step": 31706 }, { "epoch": 0.9257247963563108, "grad_norm": 0.7389633546918801, "learning_rate": 4.1265206812652075e-07, "loss": 0.6607, "step": 31707 }, { "epoch": 0.9257539925841581, "grad_norm": 0.7995149480565067, "learning_rate": 4.1248986212489865e-07, "loss": 0.7951, "step": 31708 }, { "epoch": 0.9257831888120055, "grad_norm": 0.7602037464142904, "learning_rate": 4.123276561232766e-07, "loss": 0.6412, "step": 31709 }, { "epoch": 0.9258123850398529, "grad_norm": 0.7420447836098502, "learning_rate": 4.1216545012165455e-07, "loss": 0.6782, "step": 31710 }, { "epoch": 0.9258415812677002, "grad_norm": 0.7798496809925017, "learning_rate": 4.120032441200325e-07, "loss": 0.668, "step": 31711 }, { "epoch": 0.9258707774955476, "grad_norm": 0.7337027292113586, "learning_rate": 4.118410381184104e-07, "loss": 0.6124, "step": 31712 }, { "epoch": 0.9258999737233949, "grad_norm": 0.6900661205291909, "learning_rate": 4.116788321167883e-07, "loss": 0.5994, "step": 31713 }, { "epoch": 0.9259291699512423, "grad_norm": 0.7315245583306693, "learning_rate": 4.115166261151663e-07, "loss": 0.6452, "step": 31714 }, { "epoch": 0.9259583661790897, "grad_norm": 0.7230019598941044, "learning_rate": 4.113544201135442e-07, "loss": 0.628, "step": 31715 }, { "epoch": 0.925987562406937, "grad_norm": 0.6974834388189665, "learning_rate": 4.111922141119222e-07, "loss": 0.6035, "step": 31716 }, { "epoch": 0.9260167586347844, "grad_norm": 0.6758901333861691, "learning_rate": 4.110300081103001e-07, "loss": 0.5767, "step": 31717 }, { "epoch": 0.9260459548626317, "grad_norm": 0.769653985316757, "learning_rate": 4.10867802108678e-07, "loss": 0.7251, "step": 31718 }, { "epoch": 0.9260751510904791, "grad_norm": 0.7276907120821423, "learning_rate": 4.10705596107056e-07, "loss": 0.6491, "step": 31719 }, { "epoch": 0.9261043473183265, "grad_norm": 0.7121336901738915, "learning_rate": 4.1054339010543393e-07, "loss": 0.5885, "step": 31720 }, { "epoch": 0.9261335435461738, "grad_norm": 0.7245706830188702, "learning_rate": 4.103811841038119e-07, "loss": 0.6532, "step": 31721 }, { "epoch": 0.9261627397740212, "grad_norm": 0.6858933932520253, "learning_rate": 4.102189781021898e-07, "loss": 0.5978, "step": 31722 }, { "epoch": 0.9261919360018686, "grad_norm": 0.7214453675757013, "learning_rate": 4.100567721005678e-07, "loss": 0.69, "step": 31723 }, { "epoch": 0.9262211322297159, "grad_norm": 0.7344775942267854, "learning_rate": 4.098945660989457e-07, "loss": 0.6709, "step": 31724 }, { "epoch": 0.9262503284575633, "grad_norm": 0.7638099596589365, "learning_rate": 4.097323600973236e-07, "loss": 0.7005, "step": 31725 }, { "epoch": 0.9262795246854106, "grad_norm": 0.7153369561261191, "learning_rate": 4.095701540957016e-07, "loss": 0.6181, "step": 31726 }, { "epoch": 0.926308720913258, "grad_norm": 0.6826519800872355, "learning_rate": 4.094079480940795e-07, "loss": 0.578, "step": 31727 }, { "epoch": 0.9263379171411054, "grad_norm": 0.7468051746092271, "learning_rate": 4.0924574209245746e-07, "loss": 0.6735, "step": 31728 }, { "epoch": 0.9263671133689527, "grad_norm": 0.7524933857132999, "learning_rate": 4.0908353609083536e-07, "loss": 0.6581, "step": 31729 }, { "epoch": 0.9263963095968001, "grad_norm": 0.7709580225130637, "learning_rate": 4.0892133008921337e-07, "loss": 0.7358, "step": 31730 }, { "epoch": 0.9264255058246474, "grad_norm": 0.680595461692217, "learning_rate": 4.0875912408759127e-07, "loss": 0.5981, "step": 31731 }, { "epoch": 0.9264547020524948, "grad_norm": 0.7493299335394835, "learning_rate": 4.0859691808596917e-07, "loss": 0.642, "step": 31732 }, { "epoch": 0.9264838982803422, "grad_norm": 0.7289121033022312, "learning_rate": 4.084347120843472e-07, "loss": 0.622, "step": 31733 }, { "epoch": 0.9265130945081895, "grad_norm": 0.8232784580111738, "learning_rate": 4.082725060827251e-07, "loss": 0.7435, "step": 31734 }, { "epoch": 0.9265422907360369, "grad_norm": 0.7473498264532178, "learning_rate": 4.0811030008110303e-07, "loss": 0.5752, "step": 31735 }, { "epoch": 0.9265714869638842, "grad_norm": 0.7102758221961594, "learning_rate": 4.07948094079481e-07, "loss": 0.5748, "step": 31736 }, { "epoch": 0.9266006831917316, "grad_norm": 0.6760956166491433, "learning_rate": 4.0778588807785894e-07, "loss": 0.5828, "step": 31737 }, { "epoch": 0.926629879419579, "grad_norm": 0.6925791515802543, "learning_rate": 4.0762368207623684e-07, "loss": 0.6193, "step": 31738 }, { "epoch": 0.9266590756474263, "grad_norm": 0.7743535917295338, "learning_rate": 4.074614760746148e-07, "loss": 0.6791, "step": 31739 }, { "epoch": 0.9266882718752737, "grad_norm": 0.7856914092528545, "learning_rate": 4.0729927007299275e-07, "loss": 0.7255, "step": 31740 }, { "epoch": 0.9267174681031211, "grad_norm": 0.7321889621740507, "learning_rate": 4.0713706407137065e-07, "loss": 0.7068, "step": 31741 }, { "epoch": 0.9267466643309684, "grad_norm": 0.6889138573027774, "learning_rate": 4.0697485806974865e-07, "loss": 0.559, "step": 31742 }, { "epoch": 0.9267758605588158, "grad_norm": 0.6924729138392276, "learning_rate": 4.0681265206812656e-07, "loss": 0.5897, "step": 31743 }, { "epoch": 0.9268050567866631, "grad_norm": 0.6890975475246033, "learning_rate": 4.066504460665045e-07, "loss": 0.5989, "step": 31744 }, { "epoch": 0.9268342530145105, "grad_norm": 0.7561603398813324, "learning_rate": 4.0648824006488246e-07, "loss": 0.6778, "step": 31745 }, { "epoch": 0.9268634492423579, "grad_norm": 0.7527083040196066, "learning_rate": 4.0632603406326036e-07, "loss": 0.6332, "step": 31746 }, { "epoch": 0.9268926454702052, "grad_norm": 0.7315961488205589, "learning_rate": 4.061638280616383e-07, "loss": 0.6029, "step": 31747 }, { "epoch": 0.9269218416980526, "grad_norm": 0.7181268399378287, "learning_rate": 4.060016220600162e-07, "loss": 0.6376, "step": 31748 }, { "epoch": 0.9269510379259, "grad_norm": 0.7892259667382067, "learning_rate": 4.058394160583942e-07, "loss": 0.7745, "step": 31749 }, { "epoch": 0.9269802341537473, "grad_norm": 0.6910063527464856, "learning_rate": 4.056772100567721e-07, "loss": 0.5927, "step": 31750 }, { "epoch": 0.9270094303815947, "grad_norm": 0.7476819718302662, "learning_rate": 4.0551500405515003e-07, "loss": 0.6623, "step": 31751 }, { "epoch": 0.927038626609442, "grad_norm": 0.7640130038969956, "learning_rate": 4.0535279805352803e-07, "loss": 0.6108, "step": 31752 }, { "epoch": 0.9270678228372894, "grad_norm": 0.7097249489061055, "learning_rate": 4.0519059205190593e-07, "loss": 0.6607, "step": 31753 }, { "epoch": 0.9270970190651368, "grad_norm": 0.746560693148076, "learning_rate": 4.050283860502839e-07, "loss": 0.695, "step": 31754 }, { "epoch": 0.9271262152929841, "grad_norm": 0.7212164200095957, "learning_rate": 4.0486618004866184e-07, "loss": 0.6595, "step": 31755 }, { "epoch": 0.9271554115208315, "grad_norm": 0.6854458639860312, "learning_rate": 4.047039740470398e-07, "loss": 0.6017, "step": 31756 }, { "epoch": 0.9271846077486788, "grad_norm": 0.7772144013433616, "learning_rate": 4.045417680454177e-07, "loss": 0.7348, "step": 31757 }, { "epoch": 0.9272138039765262, "grad_norm": 0.7031948307407072, "learning_rate": 4.043795620437956e-07, "loss": 0.6361, "step": 31758 }, { "epoch": 0.9272430002043736, "grad_norm": 0.7470565337806591, "learning_rate": 4.042173560421736e-07, "loss": 0.6675, "step": 31759 }, { "epoch": 0.9272721964322209, "grad_norm": 0.7635886712295848, "learning_rate": 4.040551500405515e-07, "loss": 0.6535, "step": 31760 }, { "epoch": 0.9273013926600683, "grad_norm": 0.7126344137988503, "learning_rate": 4.038929440389295e-07, "loss": 0.6028, "step": 31761 }, { "epoch": 0.9273305888879156, "grad_norm": 0.758128702216839, "learning_rate": 4.037307380373074e-07, "loss": 0.6886, "step": 31762 }, { "epoch": 0.927359785115763, "grad_norm": 0.7378063143790031, "learning_rate": 4.0356853203568537e-07, "loss": 0.6839, "step": 31763 }, { "epoch": 0.9273889813436104, "grad_norm": 0.6934774095505108, "learning_rate": 4.0340632603406327e-07, "loss": 0.5859, "step": 31764 }, { "epoch": 0.9274181775714577, "grad_norm": 0.7913017531771901, "learning_rate": 4.032441200324412e-07, "loss": 0.7572, "step": 31765 }, { "epoch": 0.9274473737993051, "grad_norm": 0.6966469657379734, "learning_rate": 4.030819140308192e-07, "loss": 0.5886, "step": 31766 }, { "epoch": 0.9274765700271524, "grad_norm": 0.64387792435727, "learning_rate": 4.029197080291971e-07, "loss": 0.4877, "step": 31767 }, { "epoch": 0.9275057662549998, "grad_norm": 0.7246733616202817, "learning_rate": 4.027575020275751e-07, "loss": 0.6466, "step": 31768 }, { "epoch": 0.9275349624828472, "grad_norm": 0.6870148060878285, "learning_rate": 4.02595296025953e-07, "loss": 0.5993, "step": 31769 }, { "epoch": 0.9275641587106945, "grad_norm": 0.7713843921989639, "learning_rate": 4.02433090024331e-07, "loss": 0.7211, "step": 31770 }, { "epoch": 0.9275933549385419, "grad_norm": 0.7335729808669088, "learning_rate": 4.022708840227089e-07, "loss": 0.6644, "step": 31771 }, { "epoch": 0.9276225511663893, "grad_norm": 0.7047550453807025, "learning_rate": 4.021086780210868e-07, "loss": 0.5929, "step": 31772 }, { "epoch": 0.9276517473942366, "grad_norm": 0.7117463226188825, "learning_rate": 4.0194647201946475e-07, "loss": 0.5868, "step": 31773 }, { "epoch": 0.927680943622084, "grad_norm": 0.7767217113857171, "learning_rate": 4.017842660178427e-07, "loss": 0.6872, "step": 31774 }, { "epoch": 0.9277101398499313, "grad_norm": 0.6894193523821952, "learning_rate": 4.0162206001622066e-07, "loss": 0.534, "step": 31775 }, { "epoch": 0.9277393360777787, "grad_norm": 0.7029809653536536, "learning_rate": 4.0145985401459856e-07, "loss": 0.5811, "step": 31776 }, { "epoch": 0.9277685323056261, "grad_norm": 0.6738192186266806, "learning_rate": 4.0129764801297646e-07, "loss": 0.5938, "step": 31777 }, { "epoch": 0.9277977285334734, "grad_norm": 0.6999478767230929, "learning_rate": 4.0113544201135446e-07, "loss": 0.6483, "step": 31778 }, { "epoch": 0.9278269247613209, "grad_norm": 0.7149655818801728, "learning_rate": 4.0097323600973236e-07, "loss": 0.6424, "step": 31779 }, { "epoch": 0.9278561209891683, "grad_norm": 0.6893247330067638, "learning_rate": 4.0081103000811037e-07, "loss": 0.585, "step": 31780 }, { "epoch": 0.9278853172170156, "grad_norm": 0.7065459264439524, "learning_rate": 4.0064882400648827e-07, "loss": 0.6081, "step": 31781 }, { "epoch": 0.927914513444863, "grad_norm": 0.6859726819589774, "learning_rate": 4.004866180048662e-07, "loss": 0.5184, "step": 31782 }, { "epoch": 0.9279437096727103, "grad_norm": 0.7663306398395666, "learning_rate": 4.0032441200324413e-07, "loss": 0.6618, "step": 31783 }, { "epoch": 0.9279729059005577, "grad_norm": 0.7606947615914692, "learning_rate": 4.001622060016221e-07, "loss": 0.7299, "step": 31784 }, { "epoch": 0.9280021021284051, "grad_norm": 0.7064771103447615, "learning_rate": 4.0000000000000003e-07, "loss": 0.608, "step": 31785 }, { "epoch": 0.9280312983562524, "grad_norm": 0.7625215970004872, "learning_rate": 3.9983779399837794e-07, "loss": 0.6563, "step": 31786 }, { "epoch": 0.9280604945840998, "grad_norm": 0.7833772782851112, "learning_rate": 3.9967558799675594e-07, "loss": 0.6658, "step": 31787 }, { "epoch": 0.9280896908119471, "grad_norm": 0.7514423139261697, "learning_rate": 3.9951338199513384e-07, "loss": 0.7245, "step": 31788 }, { "epoch": 0.9281188870397945, "grad_norm": 0.780428448633526, "learning_rate": 3.993511759935118e-07, "loss": 0.6861, "step": 31789 }, { "epoch": 0.9281480832676419, "grad_norm": 0.7090038922509231, "learning_rate": 3.9918896999188975e-07, "loss": 0.6404, "step": 31790 }, { "epoch": 0.9281772794954892, "grad_norm": 0.6680184301232192, "learning_rate": 3.9902676399026765e-07, "loss": 0.5374, "step": 31791 }, { "epoch": 0.9282064757233366, "grad_norm": 0.7019576602079539, "learning_rate": 3.988645579886456e-07, "loss": 0.6181, "step": 31792 }, { "epoch": 0.928235671951184, "grad_norm": 0.7222962096005444, "learning_rate": 3.9870235198702356e-07, "loss": 0.5988, "step": 31793 }, { "epoch": 0.9282648681790313, "grad_norm": 0.672665243105314, "learning_rate": 3.985401459854015e-07, "loss": 0.5454, "step": 31794 }, { "epoch": 0.9282940644068787, "grad_norm": 0.8030917778363663, "learning_rate": 3.983779399837794e-07, "loss": 0.6193, "step": 31795 }, { "epoch": 0.928323260634726, "grad_norm": 0.7048742219590185, "learning_rate": 3.982157339821574e-07, "loss": 0.6231, "step": 31796 }, { "epoch": 0.9283524568625734, "grad_norm": 0.7367315057382026, "learning_rate": 3.980535279805353e-07, "loss": 0.663, "step": 31797 }, { "epoch": 0.9283816530904208, "grad_norm": 0.7508096194761017, "learning_rate": 3.978913219789132e-07, "loss": 0.5798, "step": 31798 }, { "epoch": 0.9284108493182681, "grad_norm": 0.7464922294603668, "learning_rate": 3.9772911597729123e-07, "loss": 0.6346, "step": 31799 }, { "epoch": 0.9284400455461155, "grad_norm": 0.8835054954931852, "learning_rate": 3.9756690997566913e-07, "loss": 0.6381, "step": 31800 }, { "epoch": 0.9284692417739628, "grad_norm": 0.7187624335712086, "learning_rate": 3.974047039740471e-07, "loss": 0.6777, "step": 31801 }, { "epoch": 0.9284984380018102, "grad_norm": 0.6922118215853612, "learning_rate": 3.97242497972425e-07, "loss": 0.5423, "step": 31802 }, { "epoch": 0.9285276342296576, "grad_norm": 0.7057130933148639, "learning_rate": 3.97080291970803e-07, "loss": 0.5961, "step": 31803 }, { "epoch": 0.9285568304575049, "grad_norm": 0.686743657748167, "learning_rate": 3.969180859691809e-07, "loss": 0.6142, "step": 31804 }, { "epoch": 0.9285860266853523, "grad_norm": 0.7041259680267543, "learning_rate": 3.967558799675588e-07, "loss": 0.6267, "step": 31805 }, { "epoch": 0.9286152229131996, "grad_norm": 0.7378683440277035, "learning_rate": 3.965936739659368e-07, "loss": 0.6681, "step": 31806 }, { "epoch": 0.928644419141047, "grad_norm": 0.6951490251958893, "learning_rate": 3.964314679643147e-07, "loss": 0.6039, "step": 31807 }, { "epoch": 0.9286736153688944, "grad_norm": 0.7075882288196792, "learning_rate": 3.9626926196269266e-07, "loss": 0.6143, "step": 31808 }, { "epoch": 0.9287028115967417, "grad_norm": 0.8111730157093997, "learning_rate": 3.961070559610706e-07, "loss": 0.6629, "step": 31809 }, { "epoch": 0.9287320078245891, "grad_norm": 0.738132943145426, "learning_rate": 3.959448499594485e-07, "loss": 0.5932, "step": 31810 }, { "epoch": 0.9287612040524365, "grad_norm": 0.7618547182535673, "learning_rate": 3.9578264395782646e-07, "loss": 0.7197, "step": 31811 }, { "epoch": 0.9287904002802838, "grad_norm": 0.7116611776025782, "learning_rate": 3.9562043795620437e-07, "loss": 0.6287, "step": 31812 }, { "epoch": 0.9288195965081312, "grad_norm": 0.7168171635220825, "learning_rate": 3.9545823195458237e-07, "loss": 0.6117, "step": 31813 }, { "epoch": 0.9288487927359785, "grad_norm": 0.7423241416285413, "learning_rate": 3.9529602595296027e-07, "loss": 0.6367, "step": 31814 }, { "epoch": 0.9288779889638259, "grad_norm": 0.7312278838496538, "learning_rate": 3.951338199513383e-07, "loss": 0.6514, "step": 31815 }, { "epoch": 0.9289071851916733, "grad_norm": 0.694343656972748, "learning_rate": 3.949716139497162e-07, "loss": 0.5744, "step": 31816 }, { "epoch": 0.9289363814195206, "grad_norm": 0.7326671685069869, "learning_rate": 3.948094079480941e-07, "loss": 0.6416, "step": 31817 }, { "epoch": 0.928965577647368, "grad_norm": 0.7065604713801859, "learning_rate": 3.9464720194647204e-07, "loss": 0.6125, "step": 31818 }, { "epoch": 0.9289947738752153, "grad_norm": 0.7343338805325894, "learning_rate": 3.9448499594485e-07, "loss": 0.6495, "step": 31819 }, { "epoch": 0.9290239701030627, "grad_norm": 0.6804276824934943, "learning_rate": 3.9432278994322794e-07, "loss": 0.5996, "step": 31820 }, { "epoch": 0.9290531663309101, "grad_norm": 0.7444790376760124, "learning_rate": 3.9416058394160584e-07, "loss": 0.6652, "step": 31821 }, { "epoch": 0.9290823625587574, "grad_norm": 0.6760589942026266, "learning_rate": 3.9399837793998385e-07, "loss": 0.5827, "step": 31822 }, { "epoch": 0.9291115587866048, "grad_norm": 0.7327330291059563, "learning_rate": 3.9383617193836175e-07, "loss": 0.6183, "step": 31823 }, { "epoch": 0.9291407550144521, "grad_norm": 0.7135168335632606, "learning_rate": 3.9367396593673965e-07, "loss": 0.6508, "step": 31824 }, { "epoch": 0.9291699512422995, "grad_norm": 0.7191028002650526, "learning_rate": 3.9351175993511766e-07, "loss": 0.6155, "step": 31825 }, { "epoch": 0.9291991474701469, "grad_norm": 0.8001446821303139, "learning_rate": 3.9334955393349556e-07, "loss": 0.6995, "step": 31826 }, { "epoch": 0.9292283436979942, "grad_norm": 0.7737592499191466, "learning_rate": 3.931873479318735e-07, "loss": 0.6835, "step": 31827 }, { "epoch": 0.9292575399258416, "grad_norm": 0.7308535218281742, "learning_rate": 3.9302514193025147e-07, "loss": 0.6377, "step": 31828 }, { "epoch": 0.929286736153689, "grad_norm": 1.013529359372214, "learning_rate": 3.928629359286294e-07, "loss": 0.712, "step": 31829 }, { "epoch": 0.9293159323815363, "grad_norm": 0.7631805399896114, "learning_rate": 3.927007299270073e-07, "loss": 0.6846, "step": 31830 }, { "epoch": 0.9293451286093837, "grad_norm": 0.7188449378934552, "learning_rate": 3.925385239253852e-07, "loss": 0.6572, "step": 31831 }, { "epoch": 0.929374324837231, "grad_norm": 0.7464998286754789, "learning_rate": 3.9237631792376323e-07, "loss": 0.6554, "step": 31832 }, { "epoch": 0.9294035210650784, "grad_norm": 0.6754180341371483, "learning_rate": 3.9221411192214113e-07, "loss": 0.5747, "step": 31833 }, { "epoch": 0.9294327172929258, "grad_norm": 0.7745784616383978, "learning_rate": 3.9205190592051914e-07, "loss": 0.6784, "step": 31834 }, { "epoch": 0.9294619135207731, "grad_norm": 0.7337006620194501, "learning_rate": 3.9188969991889704e-07, "loss": 0.643, "step": 31835 }, { "epoch": 0.9294911097486205, "grad_norm": 0.7066811150002847, "learning_rate": 3.9172749391727494e-07, "loss": 0.5805, "step": 31836 }, { "epoch": 0.9295203059764678, "grad_norm": 0.7216755850808695, "learning_rate": 3.915652879156529e-07, "loss": 0.633, "step": 31837 }, { "epoch": 0.9295495022043152, "grad_norm": 0.7214173060395341, "learning_rate": 3.9140308191403085e-07, "loss": 0.578, "step": 31838 }, { "epoch": 0.9295786984321626, "grad_norm": 0.7668184838638091, "learning_rate": 3.912408759124088e-07, "loss": 0.6919, "step": 31839 }, { "epoch": 0.9296078946600099, "grad_norm": 0.7308045166472714, "learning_rate": 3.910786699107867e-07, "loss": 0.659, "step": 31840 }, { "epoch": 0.9296370908878573, "grad_norm": 0.6873802089224419, "learning_rate": 3.909164639091647e-07, "loss": 0.653, "step": 31841 }, { "epoch": 0.9296662871157046, "grad_norm": 0.7631635214032388, "learning_rate": 3.907542579075426e-07, "loss": 0.6832, "step": 31842 }, { "epoch": 0.929695483343552, "grad_norm": 0.706230310023073, "learning_rate": 3.905920519059205e-07, "loss": 0.6287, "step": 31843 }, { "epoch": 0.9297246795713994, "grad_norm": 0.696221446656279, "learning_rate": 3.904298459042985e-07, "loss": 0.6236, "step": 31844 }, { "epoch": 0.9297538757992467, "grad_norm": 0.745782424333266, "learning_rate": 3.902676399026764e-07, "loss": 0.6831, "step": 31845 }, { "epoch": 0.9297830720270941, "grad_norm": 0.7558813701436778, "learning_rate": 3.9010543390105437e-07, "loss": 0.6297, "step": 31846 }, { "epoch": 0.9298122682549415, "grad_norm": 0.7051848401039533, "learning_rate": 3.899432278994323e-07, "loss": 0.5423, "step": 31847 }, { "epoch": 0.9298414644827888, "grad_norm": 0.725291292962067, "learning_rate": 3.897810218978103e-07, "loss": 0.5996, "step": 31848 }, { "epoch": 0.9298706607106362, "grad_norm": 0.6916646937086335, "learning_rate": 3.896188158961882e-07, "loss": 0.6317, "step": 31849 }, { "epoch": 0.9298998569384835, "grad_norm": 0.722487641008946, "learning_rate": 3.894566098945661e-07, "loss": 0.6242, "step": 31850 }, { "epoch": 0.9299290531663309, "grad_norm": 0.711368343628542, "learning_rate": 3.892944038929441e-07, "loss": 0.599, "step": 31851 }, { "epoch": 0.9299582493941783, "grad_norm": 0.6614809494762216, "learning_rate": 3.89132197891322e-07, "loss": 0.5274, "step": 31852 }, { "epoch": 0.9299874456220256, "grad_norm": 0.6695013946157722, "learning_rate": 3.889699918897e-07, "loss": 0.5853, "step": 31853 }, { "epoch": 0.930016641849873, "grad_norm": 0.7526883868153063, "learning_rate": 3.888077858880779e-07, "loss": 0.694, "step": 31854 }, { "epoch": 0.9300458380777203, "grad_norm": 0.7354340218742206, "learning_rate": 3.8864557988645585e-07, "loss": 0.7117, "step": 31855 }, { "epoch": 0.9300750343055677, "grad_norm": 0.7279682598042584, "learning_rate": 3.8848337388483375e-07, "loss": 0.6502, "step": 31856 }, { "epoch": 0.9301042305334151, "grad_norm": 0.67155919215775, "learning_rate": 3.883211678832117e-07, "loss": 0.5684, "step": 31857 }, { "epoch": 0.9301334267612624, "grad_norm": 0.7400460728912652, "learning_rate": 3.8815896188158966e-07, "loss": 0.6488, "step": 31858 }, { "epoch": 0.9301626229891098, "grad_norm": 0.6922415036342937, "learning_rate": 3.8799675587996756e-07, "loss": 0.619, "step": 31859 }, { "epoch": 0.9301918192169571, "grad_norm": 0.7182898176812995, "learning_rate": 3.8783454987834557e-07, "loss": 0.6696, "step": 31860 }, { "epoch": 0.9302210154448045, "grad_norm": 0.6932828581899995, "learning_rate": 3.8767234387672347e-07, "loss": 0.5485, "step": 31861 }, { "epoch": 0.9302502116726519, "grad_norm": 0.7339182998257433, "learning_rate": 3.875101378751014e-07, "loss": 0.6246, "step": 31862 }, { "epoch": 0.9302794079004992, "grad_norm": 0.6885559223270591, "learning_rate": 3.873479318734794e-07, "loss": 0.575, "step": 31863 }, { "epoch": 0.9303086041283466, "grad_norm": 0.6999125007323981, "learning_rate": 3.871857258718573e-07, "loss": 0.6196, "step": 31864 }, { "epoch": 0.930337800356194, "grad_norm": 0.765991893766579, "learning_rate": 3.8702351987023523e-07, "loss": 0.7399, "step": 31865 }, { "epoch": 0.9303669965840413, "grad_norm": 0.7404134156037125, "learning_rate": 3.8686131386861313e-07, "loss": 0.6051, "step": 31866 }, { "epoch": 0.9303961928118887, "grad_norm": 0.709180174272603, "learning_rate": 3.8669910786699114e-07, "loss": 0.6449, "step": 31867 }, { "epoch": 0.930425389039736, "grad_norm": 0.7022269412169405, "learning_rate": 3.8653690186536904e-07, "loss": 0.5905, "step": 31868 }, { "epoch": 0.9304545852675834, "grad_norm": 0.7430369398426365, "learning_rate": 3.8637469586374694e-07, "loss": 0.6488, "step": 31869 }, { "epoch": 0.9304837814954308, "grad_norm": 0.7239520365143118, "learning_rate": 3.8621248986212495e-07, "loss": 0.627, "step": 31870 }, { "epoch": 0.9305129777232781, "grad_norm": 0.7155484658411021, "learning_rate": 3.8605028386050285e-07, "loss": 0.6238, "step": 31871 }, { "epoch": 0.9305421739511255, "grad_norm": 0.7277951244344664, "learning_rate": 3.858880778588808e-07, "loss": 0.6374, "step": 31872 }, { "epoch": 0.9305713701789728, "grad_norm": 0.7021361399544704, "learning_rate": 3.8572587185725876e-07, "loss": 0.62, "step": 31873 }, { "epoch": 0.9306005664068202, "grad_norm": 0.7644623980656716, "learning_rate": 3.855636658556367e-07, "loss": 0.6796, "step": 31874 }, { "epoch": 0.9306297626346676, "grad_norm": 0.6857827579590321, "learning_rate": 3.854014598540146e-07, "loss": 0.6048, "step": 31875 }, { "epoch": 0.9306589588625149, "grad_norm": 0.7741284490899508, "learning_rate": 3.8523925385239257e-07, "loss": 0.7059, "step": 31876 }, { "epoch": 0.9306881550903623, "grad_norm": 0.7329382418391205, "learning_rate": 3.850770478507705e-07, "loss": 0.599, "step": 31877 }, { "epoch": 0.9307173513182097, "grad_norm": 0.7200790210569129, "learning_rate": 3.849148418491484e-07, "loss": 0.646, "step": 31878 }, { "epoch": 0.930746547546057, "grad_norm": 0.737985283512705, "learning_rate": 3.8475263584752643e-07, "loss": 0.6549, "step": 31879 }, { "epoch": 0.9307757437739044, "grad_norm": 0.7182361714488134, "learning_rate": 3.8459042984590433e-07, "loss": 0.623, "step": 31880 }, { "epoch": 0.9308049400017517, "grad_norm": 0.7223251023471992, "learning_rate": 3.844282238442823e-07, "loss": 0.6473, "step": 31881 }, { "epoch": 0.9308341362295991, "grad_norm": 0.8534521514317457, "learning_rate": 3.8426601784266024e-07, "loss": 0.7293, "step": 31882 }, { "epoch": 0.9308633324574465, "grad_norm": 0.6874390249944352, "learning_rate": 3.8410381184103814e-07, "loss": 0.5742, "step": 31883 }, { "epoch": 0.9308925286852938, "grad_norm": 0.7461903378330742, "learning_rate": 3.839416058394161e-07, "loss": 0.6244, "step": 31884 }, { "epoch": 0.9309217249131412, "grad_norm": 0.7842371171357603, "learning_rate": 3.83779399837794e-07, "loss": 0.6987, "step": 31885 }, { "epoch": 0.9309509211409885, "grad_norm": 0.7308989971586888, "learning_rate": 3.83617193836172e-07, "loss": 0.61, "step": 31886 }, { "epoch": 0.9309801173688359, "grad_norm": 0.746303887793995, "learning_rate": 3.834549878345499e-07, "loss": 0.6341, "step": 31887 }, { "epoch": 0.9310093135966833, "grad_norm": 0.6932892378489955, "learning_rate": 3.832927818329279e-07, "loss": 0.586, "step": 31888 }, { "epoch": 0.9310385098245306, "grad_norm": 0.7180052138795613, "learning_rate": 3.831305758313058e-07, "loss": 0.6046, "step": 31889 }, { "epoch": 0.931067706052378, "grad_norm": 0.710088746443769, "learning_rate": 3.829683698296837e-07, "loss": 0.5629, "step": 31890 }, { "epoch": 0.9310969022802253, "grad_norm": 0.7671961902575826, "learning_rate": 3.8280616382806166e-07, "loss": 0.6807, "step": 31891 }, { "epoch": 0.9311260985080727, "grad_norm": 0.696914263364801, "learning_rate": 3.826439578264396e-07, "loss": 0.6208, "step": 31892 }, { "epoch": 0.9311552947359201, "grad_norm": 0.6767453080221242, "learning_rate": 3.8248175182481757e-07, "loss": 0.547, "step": 31893 }, { "epoch": 0.9311844909637674, "grad_norm": 0.7234052782483551, "learning_rate": 3.8231954582319547e-07, "loss": 0.6052, "step": 31894 }, { "epoch": 0.9312136871916148, "grad_norm": 0.7286408705798085, "learning_rate": 3.8215733982157337e-07, "loss": 0.6855, "step": 31895 }, { "epoch": 0.9312428834194622, "grad_norm": 0.6809443824445354, "learning_rate": 3.819951338199514e-07, "loss": 0.5976, "step": 31896 }, { "epoch": 0.9312720796473095, "grad_norm": 0.6968103414882562, "learning_rate": 3.818329278183293e-07, "loss": 0.6169, "step": 31897 }, { "epoch": 0.9313012758751569, "grad_norm": 0.7796091395086682, "learning_rate": 3.816707218167073e-07, "loss": 0.6886, "step": 31898 }, { "epoch": 0.9313304721030042, "grad_norm": 0.7077471749441859, "learning_rate": 3.815085158150852e-07, "loss": 0.6299, "step": 31899 }, { "epoch": 0.9313596683308517, "grad_norm": 0.7353863033738931, "learning_rate": 3.8134630981346314e-07, "loss": 0.6092, "step": 31900 }, { "epoch": 0.9313888645586991, "grad_norm": 0.7486899207257177, "learning_rate": 3.8118410381184104e-07, "loss": 0.668, "step": 31901 }, { "epoch": 0.9314180607865464, "grad_norm": 0.7193929617101257, "learning_rate": 3.81021897810219e-07, "loss": 0.6028, "step": 31902 }, { "epoch": 0.9314472570143938, "grad_norm": 0.6642121271865383, "learning_rate": 3.8085969180859695e-07, "loss": 0.5543, "step": 31903 }, { "epoch": 0.9314764532422412, "grad_norm": 0.7151407098957857, "learning_rate": 3.8069748580697485e-07, "loss": 0.6599, "step": 31904 }, { "epoch": 0.9315056494700885, "grad_norm": 0.6755022595403061, "learning_rate": 3.8053527980535286e-07, "loss": 0.5916, "step": 31905 }, { "epoch": 0.9315348456979359, "grad_norm": 0.7322660272825705, "learning_rate": 3.8037307380373076e-07, "loss": 0.6497, "step": 31906 }, { "epoch": 0.9315640419257832, "grad_norm": 0.7550550387199891, "learning_rate": 3.802108678021087e-07, "loss": 0.6615, "step": 31907 }, { "epoch": 0.9315932381536306, "grad_norm": 0.7071825064316221, "learning_rate": 3.8004866180048667e-07, "loss": 0.6199, "step": 31908 }, { "epoch": 0.931622434381478, "grad_norm": 0.7104724307873781, "learning_rate": 3.7988645579886457e-07, "loss": 0.6287, "step": 31909 }, { "epoch": 0.9316516306093253, "grad_norm": 0.7276092221721638, "learning_rate": 3.797242497972425e-07, "loss": 0.6511, "step": 31910 }, { "epoch": 0.9316808268371727, "grad_norm": 0.6691410050553273, "learning_rate": 3.795620437956205e-07, "loss": 0.5668, "step": 31911 }, { "epoch": 0.93171002306502, "grad_norm": 0.6844832690328597, "learning_rate": 3.7939983779399843e-07, "loss": 0.6018, "step": 31912 }, { "epoch": 0.9317392192928674, "grad_norm": 0.7418739167145535, "learning_rate": 3.7923763179237633e-07, "loss": 0.6522, "step": 31913 }, { "epoch": 0.9317684155207148, "grad_norm": 0.7410320023701075, "learning_rate": 3.7907542579075434e-07, "loss": 0.6772, "step": 31914 }, { "epoch": 0.9317976117485621, "grad_norm": 0.7065642264168426, "learning_rate": 3.7891321978913224e-07, "loss": 0.6103, "step": 31915 }, { "epoch": 0.9318268079764095, "grad_norm": 0.7042130226978056, "learning_rate": 3.7875101378751014e-07, "loss": 0.63, "step": 31916 }, { "epoch": 0.9318560042042568, "grad_norm": 0.7538181234386032, "learning_rate": 3.7858880778588814e-07, "loss": 0.7218, "step": 31917 }, { "epoch": 0.9318852004321042, "grad_norm": 0.6875756978577973, "learning_rate": 3.7842660178426605e-07, "loss": 0.5689, "step": 31918 }, { "epoch": 0.9319143966599516, "grad_norm": 0.7169078203962432, "learning_rate": 3.78264395782644e-07, "loss": 0.6356, "step": 31919 }, { "epoch": 0.9319435928877989, "grad_norm": 0.7635833959850901, "learning_rate": 3.781021897810219e-07, "loss": 0.7161, "step": 31920 }, { "epoch": 0.9319727891156463, "grad_norm": 0.9967381787821586, "learning_rate": 3.779399837793999e-07, "loss": 0.633, "step": 31921 }, { "epoch": 0.9320019853434937, "grad_norm": 0.7204685678433991, "learning_rate": 3.777777777777778e-07, "loss": 0.6416, "step": 31922 }, { "epoch": 0.932031181571341, "grad_norm": 0.7382445111713305, "learning_rate": 3.776155717761557e-07, "loss": 0.6453, "step": 31923 }, { "epoch": 0.9320603777991884, "grad_norm": 0.7112285659475439, "learning_rate": 3.774533657745337e-07, "loss": 0.6157, "step": 31924 }, { "epoch": 0.9320895740270357, "grad_norm": 0.776158116111757, "learning_rate": 3.772911597729116e-07, "loss": 0.6486, "step": 31925 }, { "epoch": 0.9321187702548831, "grad_norm": 1.292949753173693, "learning_rate": 3.7712895377128957e-07, "loss": 0.7381, "step": 31926 }, { "epoch": 0.9321479664827305, "grad_norm": 0.7019527433154357, "learning_rate": 3.769667477696675e-07, "loss": 0.6104, "step": 31927 }, { "epoch": 0.9321771627105778, "grad_norm": 0.660631055684704, "learning_rate": 3.768045417680454e-07, "loss": 0.5388, "step": 31928 }, { "epoch": 0.9322063589384252, "grad_norm": 0.7292788876477468, "learning_rate": 3.766423357664234e-07, "loss": 0.6871, "step": 31929 }, { "epoch": 0.9322355551662725, "grad_norm": 0.6998957824356246, "learning_rate": 3.764801297648013e-07, "loss": 0.6094, "step": 31930 }, { "epoch": 0.9322647513941199, "grad_norm": 0.9185867679040238, "learning_rate": 3.763179237631793e-07, "loss": 0.5453, "step": 31931 }, { "epoch": 0.9322939476219673, "grad_norm": 0.6865434424533325, "learning_rate": 3.761557177615572e-07, "loss": 0.5897, "step": 31932 }, { "epoch": 0.9323231438498146, "grad_norm": 0.751078724563556, "learning_rate": 3.759935117599352e-07, "loss": 0.6894, "step": 31933 }, { "epoch": 0.932352340077662, "grad_norm": 0.6979432697891758, "learning_rate": 3.758313057583131e-07, "loss": 0.6087, "step": 31934 }, { "epoch": 0.9323815363055094, "grad_norm": 0.7099247171029659, "learning_rate": 3.75669099756691e-07, "loss": 0.6275, "step": 31935 }, { "epoch": 0.9324107325333567, "grad_norm": 0.7722039320543695, "learning_rate": 3.75506893755069e-07, "loss": 0.6797, "step": 31936 }, { "epoch": 0.9324399287612041, "grad_norm": 0.7077183842620777, "learning_rate": 3.753446877534469e-07, "loss": 0.6025, "step": 31937 }, { "epoch": 0.9324691249890514, "grad_norm": 0.6599442272771866, "learning_rate": 3.7518248175182486e-07, "loss": 0.5349, "step": 31938 }, { "epoch": 0.9324983212168988, "grad_norm": 0.7444189171236295, "learning_rate": 3.7502027575020276e-07, "loss": 0.6766, "step": 31939 }, { "epoch": 0.9325275174447462, "grad_norm": 0.7150316307486188, "learning_rate": 3.7485806974858077e-07, "loss": 0.6469, "step": 31940 }, { "epoch": 0.9325567136725935, "grad_norm": 0.7146850487497948, "learning_rate": 3.7469586374695867e-07, "loss": 0.6749, "step": 31941 }, { "epoch": 0.9325859099004409, "grad_norm": 0.6986797594322167, "learning_rate": 3.7453365774533657e-07, "loss": 0.5862, "step": 31942 }, { "epoch": 0.9326151061282882, "grad_norm": 0.7207732580419287, "learning_rate": 3.743714517437146e-07, "loss": 0.6419, "step": 31943 }, { "epoch": 0.9326443023561356, "grad_norm": 0.6754419792089458, "learning_rate": 3.742092457420925e-07, "loss": 0.5904, "step": 31944 }, { "epoch": 0.932673498583983, "grad_norm": 0.7194114969687939, "learning_rate": 3.7404703974047043e-07, "loss": 0.6282, "step": 31945 }, { "epoch": 0.9327026948118303, "grad_norm": 0.6956732556765542, "learning_rate": 3.738848337388484e-07, "loss": 0.6127, "step": 31946 }, { "epoch": 0.9327318910396777, "grad_norm": 0.7133578990748713, "learning_rate": 3.7372262773722634e-07, "loss": 0.5664, "step": 31947 }, { "epoch": 0.932761087267525, "grad_norm": 0.7115270032445663, "learning_rate": 3.7356042173560424e-07, "loss": 0.61, "step": 31948 }, { "epoch": 0.9327902834953724, "grad_norm": 0.7231291950039375, "learning_rate": 3.7339821573398214e-07, "loss": 0.5808, "step": 31949 }, { "epoch": 0.9328194797232198, "grad_norm": 0.7026095064585669, "learning_rate": 3.7323600973236015e-07, "loss": 0.5992, "step": 31950 }, { "epoch": 0.9328486759510671, "grad_norm": 0.7664858122317839, "learning_rate": 3.7307380373073805e-07, "loss": 0.6952, "step": 31951 }, { "epoch": 0.9328778721789145, "grad_norm": 0.7292485499978653, "learning_rate": 3.7291159772911605e-07, "loss": 0.6516, "step": 31952 }, { "epoch": 0.9329070684067619, "grad_norm": 0.8730688622603552, "learning_rate": 3.7274939172749395e-07, "loss": 0.7463, "step": 31953 }, { "epoch": 0.9329362646346092, "grad_norm": 0.7511734272766017, "learning_rate": 3.725871857258719e-07, "loss": 0.7127, "step": 31954 }, { "epoch": 0.9329654608624566, "grad_norm": 0.7381870951490107, "learning_rate": 3.724249797242498e-07, "loss": 0.6563, "step": 31955 }, { "epoch": 0.9329946570903039, "grad_norm": 0.697811258648426, "learning_rate": 3.7226277372262776e-07, "loss": 0.5783, "step": 31956 }, { "epoch": 0.9330238533181513, "grad_norm": 0.6335266548686956, "learning_rate": 3.721005677210057e-07, "loss": 0.5234, "step": 31957 }, { "epoch": 0.9330530495459987, "grad_norm": 0.7130260799617654, "learning_rate": 3.719383617193836e-07, "loss": 0.6501, "step": 31958 }, { "epoch": 0.933082245773846, "grad_norm": 0.7462645531718674, "learning_rate": 3.717761557177616e-07, "loss": 0.6884, "step": 31959 }, { "epoch": 0.9331114420016934, "grad_norm": 0.7272100406640433, "learning_rate": 3.716139497161395e-07, "loss": 0.6129, "step": 31960 }, { "epoch": 0.9331406382295407, "grad_norm": 0.7843217850219262, "learning_rate": 3.714517437145174e-07, "loss": 0.716, "step": 31961 }, { "epoch": 0.9331698344573881, "grad_norm": 0.8148309480237075, "learning_rate": 3.7128953771289543e-07, "loss": 0.6383, "step": 31962 }, { "epoch": 0.9331990306852355, "grad_norm": 0.7075922624757552, "learning_rate": 3.7112733171127333e-07, "loss": 0.6327, "step": 31963 }, { "epoch": 0.9332282269130828, "grad_norm": 0.6592632061643943, "learning_rate": 3.709651257096513e-07, "loss": 0.563, "step": 31964 }, { "epoch": 0.9332574231409302, "grad_norm": 0.7588148875691746, "learning_rate": 3.7080291970802924e-07, "loss": 0.6849, "step": 31965 }, { "epoch": 0.9332866193687775, "grad_norm": 0.7043131911300852, "learning_rate": 3.706407137064072e-07, "loss": 0.6065, "step": 31966 }, { "epoch": 0.9333158155966249, "grad_norm": 0.7585147105743021, "learning_rate": 3.704785077047851e-07, "loss": 0.717, "step": 31967 }, { "epoch": 0.9333450118244723, "grad_norm": 0.7093428287286278, "learning_rate": 3.70316301703163e-07, "loss": 0.6215, "step": 31968 }, { "epoch": 0.9333742080523196, "grad_norm": 0.7563183196732031, "learning_rate": 3.70154095701541e-07, "loss": 0.6904, "step": 31969 }, { "epoch": 0.933403404280167, "grad_norm": 0.7194580058817859, "learning_rate": 3.699918896999189e-07, "loss": 0.6503, "step": 31970 }, { "epoch": 0.9334326005080144, "grad_norm": 0.6991335123394831, "learning_rate": 3.698296836982969e-07, "loss": 0.5854, "step": 31971 }, { "epoch": 0.9334617967358617, "grad_norm": 0.7514049325157379, "learning_rate": 3.696674776966748e-07, "loss": 0.6525, "step": 31972 }, { "epoch": 0.9334909929637091, "grad_norm": 0.7350185299382695, "learning_rate": 3.6950527169505277e-07, "loss": 0.6569, "step": 31973 }, { "epoch": 0.9335201891915564, "grad_norm": 0.6501574780131432, "learning_rate": 3.6934306569343067e-07, "loss": 0.5384, "step": 31974 }, { "epoch": 0.9335493854194038, "grad_norm": 0.7501274318294244, "learning_rate": 3.691808596918086e-07, "loss": 0.6889, "step": 31975 }, { "epoch": 0.9335785816472512, "grad_norm": 0.7535377384114834, "learning_rate": 3.690186536901866e-07, "loss": 0.6704, "step": 31976 }, { "epoch": 0.9336077778750985, "grad_norm": 0.6922426434207779, "learning_rate": 3.688564476885645e-07, "loss": 0.5857, "step": 31977 }, { "epoch": 0.9336369741029459, "grad_norm": 0.728728170681617, "learning_rate": 3.686942416869425e-07, "loss": 0.6006, "step": 31978 }, { "epoch": 0.9336661703307932, "grad_norm": 0.8087890491450196, "learning_rate": 3.685320356853204e-07, "loss": 0.7059, "step": 31979 }, { "epoch": 0.9336953665586406, "grad_norm": 0.6517636511787426, "learning_rate": 3.6836982968369834e-07, "loss": 0.5306, "step": 31980 }, { "epoch": 0.933724562786488, "grad_norm": 0.7699052771892667, "learning_rate": 3.682076236820763e-07, "loss": 0.6808, "step": 31981 }, { "epoch": 0.9337537590143353, "grad_norm": 0.7411281647188789, "learning_rate": 3.680454176804542e-07, "loss": 0.6172, "step": 31982 }, { "epoch": 0.9337829552421827, "grad_norm": 0.737515974580347, "learning_rate": 3.6788321167883215e-07, "loss": 0.6324, "step": 31983 }, { "epoch": 0.93381215147003, "grad_norm": 0.7303424149621092, "learning_rate": 3.6772100567721005e-07, "loss": 0.6798, "step": 31984 }, { "epoch": 0.9338413476978774, "grad_norm": 0.7421523465335911, "learning_rate": 3.6755879967558805e-07, "loss": 0.6706, "step": 31985 }, { "epoch": 0.9338705439257248, "grad_norm": 0.7278846868888753, "learning_rate": 3.6739659367396595e-07, "loss": 0.6651, "step": 31986 }, { "epoch": 0.9338997401535721, "grad_norm": 0.6407787905565686, "learning_rate": 3.6723438767234386e-07, "loss": 0.5078, "step": 31987 }, { "epoch": 0.9339289363814195, "grad_norm": 0.6948733223336181, "learning_rate": 3.6707218167072186e-07, "loss": 0.5569, "step": 31988 }, { "epoch": 0.9339581326092669, "grad_norm": 0.7533010739527033, "learning_rate": 3.6690997566909976e-07, "loss": 0.6915, "step": 31989 }, { "epoch": 0.9339873288371142, "grad_norm": 0.7324509386211274, "learning_rate": 3.667477696674777e-07, "loss": 0.6742, "step": 31990 }, { "epoch": 0.9340165250649616, "grad_norm": 0.7839799827040452, "learning_rate": 3.6658556366585567e-07, "loss": 0.6107, "step": 31991 }, { "epoch": 0.9340457212928089, "grad_norm": 0.6707266845163902, "learning_rate": 3.664233576642336e-07, "loss": 0.5319, "step": 31992 }, { "epoch": 0.9340749175206563, "grad_norm": 0.6869325333212586, "learning_rate": 3.662611516626115e-07, "loss": 0.6115, "step": 31993 }, { "epoch": 0.9341041137485037, "grad_norm": 0.765565096351346, "learning_rate": 3.660989456609895e-07, "loss": 0.6809, "step": 31994 }, { "epoch": 0.934133309976351, "grad_norm": 0.6967555700967577, "learning_rate": 3.6593673965936743e-07, "loss": 0.6175, "step": 31995 }, { "epoch": 0.9341625062041984, "grad_norm": 0.7521508288447559, "learning_rate": 3.6577453365774533e-07, "loss": 0.697, "step": 31996 }, { "epoch": 0.9341917024320457, "grad_norm": 0.7307734468439443, "learning_rate": 3.6561232765612334e-07, "loss": 0.6718, "step": 31997 }, { "epoch": 0.9342208986598931, "grad_norm": 0.7109652755267453, "learning_rate": 3.6545012165450124e-07, "loss": 0.6449, "step": 31998 }, { "epoch": 0.9342500948877405, "grad_norm": 0.7052295983578277, "learning_rate": 3.652879156528792e-07, "loss": 0.6124, "step": 31999 }, { "epoch": 0.9342792911155878, "grad_norm": 0.7015785351529394, "learning_rate": 3.6512570965125715e-07, "loss": 0.6318, "step": 32000 }, { "epoch": 0.9343084873434352, "grad_norm": 0.6890268427520442, "learning_rate": 3.6496350364963505e-07, "loss": 0.6075, "step": 32001 }, { "epoch": 0.9343376835712826, "grad_norm": 0.6773943981943551, "learning_rate": 3.64801297648013e-07, "loss": 0.5486, "step": 32002 }, { "epoch": 0.9343668797991299, "grad_norm": 0.6536800129719288, "learning_rate": 3.646390916463909e-07, "loss": 0.552, "step": 32003 }, { "epoch": 0.9343960760269773, "grad_norm": 0.7356975039803839, "learning_rate": 3.644768856447689e-07, "loss": 0.6851, "step": 32004 }, { "epoch": 0.9344252722548246, "grad_norm": 0.7000526077505139, "learning_rate": 3.643146796431468e-07, "loss": 0.5904, "step": 32005 }, { "epoch": 0.934454468482672, "grad_norm": 0.7100658680026095, "learning_rate": 3.641524736415248e-07, "loss": 0.5855, "step": 32006 }, { "epoch": 0.9344836647105194, "grad_norm": 0.6957428596792055, "learning_rate": 3.639902676399027e-07, "loss": 0.5767, "step": 32007 }, { "epoch": 0.9345128609383667, "grad_norm": 0.7138548473107624, "learning_rate": 3.638280616382806e-07, "loss": 0.6269, "step": 32008 }, { "epoch": 0.9345420571662141, "grad_norm": 0.6998185236461446, "learning_rate": 3.636658556366586e-07, "loss": 0.6431, "step": 32009 }, { "epoch": 0.9345712533940614, "grad_norm": 0.6833750118410691, "learning_rate": 3.6350364963503653e-07, "loss": 0.5882, "step": 32010 }, { "epoch": 0.9346004496219088, "grad_norm": 0.6843901573650476, "learning_rate": 3.633414436334145e-07, "loss": 0.5582, "step": 32011 }, { "epoch": 0.9346296458497562, "grad_norm": 0.7748105187065761, "learning_rate": 3.631792376317924e-07, "loss": 0.7181, "step": 32012 }, { "epoch": 0.9346588420776035, "grad_norm": 0.6708825550232257, "learning_rate": 3.630170316301704e-07, "loss": 0.541, "step": 32013 }, { "epoch": 0.9346880383054509, "grad_norm": 0.7299120129112463, "learning_rate": 3.628548256285483e-07, "loss": 0.6695, "step": 32014 }, { "epoch": 0.9347172345332982, "grad_norm": 0.735181689196891, "learning_rate": 3.626926196269262e-07, "loss": 0.6505, "step": 32015 }, { "epoch": 0.9347464307611456, "grad_norm": 0.6900412703854156, "learning_rate": 3.625304136253042e-07, "loss": 0.5919, "step": 32016 }, { "epoch": 0.934775626988993, "grad_norm": 0.7539426998858917, "learning_rate": 3.623682076236821e-07, "loss": 0.6049, "step": 32017 }, { "epoch": 0.9348048232168403, "grad_norm": 0.6964754595583136, "learning_rate": 3.6220600162206005e-07, "loss": 0.6094, "step": 32018 }, { "epoch": 0.9348340194446877, "grad_norm": 0.7557994165837341, "learning_rate": 3.62043795620438e-07, "loss": 0.7176, "step": 32019 }, { "epoch": 0.9348632156725352, "grad_norm": 0.759709773079089, "learning_rate": 3.618815896188159e-07, "loss": 0.6819, "step": 32020 }, { "epoch": 0.9348924119003825, "grad_norm": 0.7689806515596936, "learning_rate": 3.6171938361719386e-07, "loss": 0.6712, "step": 32021 }, { "epoch": 0.9349216081282299, "grad_norm": 0.7092480416056482, "learning_rate": 3.6155717761557176e-07, "loss": 0.5655, "step": 32022 }, { "epoch": 0.9349508043560772, "grad_norm": 0.7820018095948873, "learning_rate": 3.6139497161394977e-07, "loss": 0.7, "step": 32023 }, { "epoch": 0.9349800005839246, "grad_norm": 0.7740046040755805, "learning_rate": 3.6123276561232767e-07, "loss": 0.5909, "step": 32024 }, { "epoch": 0.935009196811772, "grad_norm": 0.6711525915224215, "learning_rate": 3.610705596107057e-07, "loss": 0.621, "step": 32025 }, { "epoch": 0.9350383930396193, "grad_norm": 0.7193608884700519, "learning_rate": 3.609083536090836e-07, "loss": 0.606, "step": 32026 }, { "epoch": 0.9350675892674667, "grad_norm": 0.7161555811886642, "learning_rate": 3.607461476074615e-07, "loss": 0.603, "step": 32027 }, { "epoch": 0.935096785495314, "grad_norm": 0.776735015584234, "learning_rate": 3.6058394160583943e-07, "loss": 0.7301, "step": 32028 }, { "epoch": 0.9351259817231614, "grad_norm": 0.6547510502853919, "learning_rate": 3.604217356042174e-07, "loss": 0.5627, "step": 32029 }, { "epoch": 0.9351551779510088, "grad_norm": 0.7207787587583808, "learning_rate": 3.6025952960259534e-07, "loss": 0.613, "step": 32030 }, { "epoch": 0.9351843741788561, "grad_norm": 0.7245276805974272, "learning_rate": 3.6009732360097324e-07, "loss": 0.6359, "step": 32031 }, { "epoch": 0.9352135704067035, "grad_norm": 0.7350428489992117, "learning_rate": 3.5993511759935125e-07, "loss": 0.6686, "step": 32032 }, { "epoch": 0.9352427666345509, "grad_norm": 0.6629173807859894, "learning_rate": 3.5977291159772915e-07, "loss": 0.5686, "step": 32033 }, { "epoch": 0.9352719628623982, "grad_norm": 0.7086526821467732, "learning_rate": 3.5961070559610705e-07, "loss": 0.5925, "step": 32034 }, { "epoch": 0.9353011590902456, "grad_norm": 0.7508148808497603, "learning_rate": 3.5944849959448506e-07, "loss": 0.706, "step": 32035 }, { "epoch": 0.9353303553180929, "grad_norm": 0.6441576505635455, "learning_rate": 3.5928629359286296e-07, "loss": 0.5236, "step": 32036 }, { "epoch": 0.9353595515459403, "grad_norm": 0.768944403841671, "learning_rate": 3.591240875912409e-07, "loss": 0.7501, "step": 32037 }, { "epoch": 0.9353887477737877, "grad_norm": 0.7229392294441904, "learning_rate": 3.589618815896188e-07, "loss": 0.6334, "step": 32038 }, { "epoch": 0.935417944001635, "grad_norm": 0.6975348671266646, "learning_rate": 3.587996755879968e-07, "loss": 0.6009, "step": 32039 }, { "epoch": 0.9354471402294824, "grad_norm": 0.7195159889309309, "learning_rate": 3.586374695863747e-07, "loss": 0.6355, "step": 32040 }, { "epoch": 0.9354763364573297, "grad_norm": 0.7167934434722518, "learning_rate": 3.584752635847526e-07, "loss": 0.6597, "step": 32041 }, { "epoch": 0.9355055326851771, "grad_norm": 0.7152162574961752, "learning_rate": 3.5831305758313063e-07, "loss": 0.6252, "step": 32042 }, { "epoch": 0.9355347289130245, "grad_norm": 0.6852548455456094, "learning_rate": 3.5815085158150853e-07, "loss": 0.5577, "step": 32043 }, { "epoch": 0.9355639251408718, "grad_norm": 0.7107915384012546, "learning_rate": 3.579886455798865e-07, "loss": 0.6371, "step": 32044 }, { "epoch": 0.9355931213687192, "grad_norm": 0.7537991285658242, "learning_rate": 3.5782643957826444e-07, "loss": 0.6513, "step": 32045 }, { "epoch": 0.9356223175965666, "grad_norm": 0.8567986357059177, "learning_rate": 3.5766423357664234e-07, "loss": 0.7501, "step": 32046 }, { "epoch": 0.9356515138244139, "grad_norm": 0.7227528124488141, "learning_rate": 3.575020275750203e-07, "loss": 0.6214, "step": 32047 }, { "epoch": 0.9356807100522613, "grad_norm": 0.7523885077785148, "learning_rate": 3.5733982157339825e-07, "loss": 0.6824, "step": 32048 }, { "epoch": 0.9357099062801086, "grad_norm": 0.7340604003854694, "learning_rate": 3.571776155717762e-07, "loss": 0.6411, "step": 32049 }, { "epoch": 0.935739102507956, "grad_norm": 0.7622723247007724, "learning_rate": 3.570154095701541e-07, "loss": 0.663, "step": 32050 }, { "epoch": 0.9357682987358034, "grad_norm": 0.7398445498020346, "learning_rate": 3.568532035685321e-07, "loss": 0.6463, "step": 32051 }, { "epoch": 0.9357974949636507, "grad_norm": 0.6839599786251755, "learning_rate": 3.5669099756691e-07, "loss": 0.6022, "step": 32052 }, { "epoch": 0.9358266911914981, "grad_norm": 0.7013325615740654, "learning_rate": 3.565287915652879e-07, "loss": 0.5914, "step": 32053 }, { "epoch": 0.9358558874193454, "grad_norm": 0.7074756997623078, "learning_rate": 3.563665855636659e-07, "loss": 0.6236, "step": 32054 }, { "epoch": 0.9358850836471928, "grad_norm": 0.7224553139222211, "learning_rate": 3.562043795620438e-07, "loss": 0.6639, "step": 32055 }, { "epoch": 0.9359142798750402, "grad_norm": 0.7480466983123193, "learning_rate": 3.5604217356042177e-07, "loss": 0.5823, "step": 32056 }, { "epoch": 0.9359434761028875, "grad_norm": 0.746991522110721, "learning_rate": 3.5587996755879967e-07, "loss": 0.6635, "step": 32057 }, { "epoch": 0.9359726723307349, "grad_norm": 0.6754664053333612, "learning_rate": 3.557177615571777e-07, "loss": 0.5966, "step": 32058 }, { "epoch": 0.9360018685585823, "grad_norm": 0.7200795050293615, "learning_rate": 3.555555555555556e-07, "loss": 0.6466, "step": 32059 }, { "epoch": 0.9360310647864296, "grad_norm": 0.7425451753291249, "learning_rate": 3.553933495539335e-07, "loss": 0.6663, "step": 32060 }, { "epoch": 0.936060261014277, "grad_norm": 0.6766929824884169, "learning_rate": 3.552311435523115e-07, "loss": 0.5749, "step": 32061 }, { "epoch": 0.9360894572421243, "grad_norm": 0.7489650816538657, "learning_rate": 3.550689375506894e-07, "loss": 0.6494, "step": 32062 }, { "epoch": 0.9361186534699717, "grad_norm": 0.7604920738404549, "learning_rate": 3.5490673154906734e-07, "loss": 0.6386, "step": 32063 }, { "epoch": 0.9361478496978191, "grad_norm": 0.7287894420789165, "learning_rate": 3.547445255474453e-07, "loss": 0.6524, "step": 32064 }, { "epoch": 0.9361770459256664, "grad_norm": 0.7267150910466471, "learning_rate": 3.5458231954582325e-07, "loss": 0.6363, "step": 32065 }, { "epoch": 0.9362062421535138, "grad_norm": 0.6921490341381389, "learning_rate": 3.5442011354420115e-07, "loss": 0.6356, "step": 32066 }, { "epoch": 0.9362354383813611, "grad_norm": 0.7190189663141466, "learning_rate": 3.5425790754257905e-07, "loss": 0.6858, "step": 32067 }, { "epoch": 0.9362646346092085, "grad_norm": 0.7032975407897936, "learning_rate": 3.5409570154095706e-07, "loss": 0.6642, "step": 32068 }, { "epoch": 0.9362938308370559, "grad_norm": 0.712380297668424, "learning_rate": 3.5393349553933496e-07, "loss": 0.6223, "step": 32069 }, { "epoch": 0.9363230270649032, "grad_norm": 0.729085682740765, "learning_rate": 3.5377128953771297e-07, "loss": 0.7, "step": 32070 }, { "epoch": 0.9363522232927506, "grad_norm": 0.7147622311418932, "learning_rate": 3.5360908353609087e-07, "loss": 0.5867, "step": 32071 }, { "epoch": 0.936381419520598, "grad_norm": 0.7864407861529004, "learning_rate": 3.534468775344688e-07, "loss": 0.6139, "step": 32072 }, { "epoch": 0.9364106157484453, "grad_norm": 0.8655474373939784, "learning_rate": 3.532846715328467e-07, "loss": 0.7316, "step": 32073 }, { "epoch": 0.9364398119762927, "grad_norm": 0.7718934651659239, "learning_rate": 3.531224655312247e-07, "loss": 0.7284, "step": 32074 }, { "epoch": 0.93646900820414, "grad_norm": 0.734288543499761, "learning_rate": 3.5296025952960263e-07, "loss": 0.678, "step": 32075 }, { "epoch": 0.9364982044319874, "grad_norm": 0.7811227150543131, "learning_rate": 3.5279805352798053e-07, "loss": 0.7594, "step": 32076 }, { "epoch": 0.9365274006598348, "grad_norm": 0.7397265197862825, "learning_rate": 3.5263584752635854e-07, "loss": 0.6526, "step": 32077 }, { "epoch": 0.9365565968876821, "grad_norm": 0.7119375627383302, "learning_rate": 3.5247364152473644e-07, "loss": 0.6545, "step": 32078 }, { "epoch": 0.9365857931155295, "grad_norm": 0.7790900635349755, "learning_rate": 3.5231143552311434e-07, "loss": 0.771, "step": 32079 }, { "epoch": 0.9366149893433768, "grad_norm": 0.7037669348576976, "learning_rate": 3.5214922952149235e-07, "loss": 0.6096, "step": 32080 }, { "epoch": 0.9366441855712242, "grad_norm": 0.7333503755556329, "learning_rate": 3.5198702351987025e-07, "loss": 0.592, "step": 32081 }, { "epoch": 0.9366733817990716, "grad_norm": 0.7191572862323121, "learning_rate": 3.518248175182482e-07, "loss": 0.6378, "step": 32082 }, { "epoch": 0.9367025780269189, "grad_norm": 0.7332393028233073, "learning_rate": 3.5166261151662616e-07, "loss": 0.6648, "step": 32083 }, { "epoch": 0.9367317742547663, "grad_norm": 0.735533786672767, "learning_rate": 3.515004055150041e-07, "loss": 0.6718, "step": 32084 }, { "epoch": 0.9367609704826136, "grad_norm": 0.7467404874434059, "learning_rate": 3.51338199513382e-07, "loss": 0.6656, "step": 32085 }, { "epoch": 0.936790166710461, "grad_norm": 0.7655443389062777, "learning_rate": 3.511759935117599e-07, "loss": 0.6902, "step": 32086 }, { "epoch": 0.9368193629383084, "grad_norm": 0.7699856107739453, "learning_rate": 3.510137875101379e-07, "loss": 0.6732, "step": 32087 }, { "epoch": 0.9368485591661557, "grad_norm": 0.7143704630503646, "learning_rate": 3.508515815085158e-07, "loss": 0.6185, "step": 32088 }, { "epoch": 0.9368777553940031, "grad_norm": 0.7069807473824022, "learning_rate": 3.506893755068938e-07, "loss": 0.6173, "step": 32089 }, { "epoch": 0.9369069516218504, "grad_norm": 0.7571282993459639, "learning_rate": 3.5052716950527173e-07, "loss": 0.7067, "step": 32090 }, { "epoch": 0.9369361478496978, "grad_norm": 0.7369225206023302, "learning_rate": 3.503649635036497e-07, "loss": 0.6522, "step": 32091 }, { "epoch": 0.9369653440775452, "grad_norm": 0.7455200041950262, "learning_rate": 3.502027575020276e-07, "loss": 0.6328, "step": 32092 }, { "epoch": 0.9369945403053925, "grad_norm": 0.7629583161064415, "learning_rate": 3.5004055150040554e-07, "loss": 0.6834, "step": 32093 }, { "epoch": 0.9370237365332399, "grad_norm": 0.6973399023290882, "learning_rate": 3.498783454987835e-07, "loss": 0.6405, "step": 32094 }, { "epoch": 0.9370529327610873, "grad_norm": 0.7460208291957443, "learning_rate": 3.497161394971614e-07, "loss": 0.7402, "step": 32095 }, { "epoch": 0.9370821289889346, "grad_norm": 0.7029749728242318, "learning_rate": 3.495539334955394e-07, "loss": 0.6533, "step": 32096 }, { "epoch": 0.937111325216782, "grad_norm": 0.7579288894144982, "learning_rate": 3.493917274939173e-07, "loss": 0.6916, "step": 32097 }, { "epoch": 0.9371405214446293, "grad_norm": 0.6709816154568068, "learning_rate": 3.4922952149229525e-07, "loss": 0.5618, "step": 32098 }, { "epoch": 0.9371697176724767, "grad_norm": 0.6954148622998323, "learning_rate": 3.490673154906732e-07, "loss": 0.5744, "step": 32099 }, { "epoch": 0.9371989139003241, "grad_norm": 0.7060229639671372, "learning_rate": 3.489051094890511e-07, "loss": 0.6043, "step": 32100 }, { "epoch": 0.9372281101281714, "grad_norm": 0.7640391917556402, "learning_rate": 3.4874290348742906e-07, "loss": 0.6784, "step": 32101 }, { "epoch": 0.9372573063560188, "grad_norm": 0.7238253216007535, "learning_rate": 3.4858069748580696e-07, "loss": 0.6403, "step": 32102 }, { "epoch": 0.9372865025838661, "grad_norm": 0.7099278335888094, "learning_rate": 3.4841849148418497e-07, "loss": 0.6555, "step": 32103 }, { "epoch": 0.9373156988117135, "grad_norm": 0.726550431477844, "learning_rate": 3.4825628548256287e-07, "loss": 0.637, "step": 32104 }, { "epoch": 0.9373448950395609, "grad_norm": 0.7107672160532367, "learning_rate": 3.4809407948094077e-07, "loss": 0.6544, "step": 32105 }, { "epoch": 0.9373740912674082, "grad_norm": 0.7047555420394978, "learning_rate": 3.479318734793188e-07, "loss": 0.6406, "step": 32106 }, { "epoch": 0.9374032874952556, "grad_norm": 0.7062070787990342, "learning_rate": 3.477696674776967e-07, "loss": 0.6358, "step": 32107 }, { "epoch": 0.937432483723103, "grad_norm": 0.7849199699173101, "learning_rate": 3.476074614760747e-07, "loss": 0.6966, "step": 32108 }, { "epoch": 0.9374616799509503, "grad_norm": 0.8353975707720281, "learning_rate": 3.474452554744526e-07, "loss": 0.78, "step": 32109 }, { "epoch": 0.9374908761787977, "grad_norm": 0.7169222897477323, "learning_rate": 3.4728304947283054e-07, "loss": 0.6074, "step": 32110 }, { "epoch": 0.937520072406645, "grad_norm": 0.7197052095016284, "learning_rate": 3.4712084347120844e-07, "loss": 0.6019, "step": 32111 }, { "epoch": 0.9375492686344924, "grad_norm": 0.6776503909024815, "learning_rate": 3.469586374695864e-07, "loss": 0.5496, "step": 32112 }, { "epoch": 0.9375784648623398, "grad_norm": 0.8099799702153531, "learning_rate": 3.4679643146796435e-07, "loss": 0.6775, "step": 32113 }, { "epoch": 0.9376076610901871, "grad_norm": 0.7041075074872695, "learning_rate": 3.4663422546634225e-07, "loss": 0.5976, "step": 32114 }, { "epoch": 0.9376368573180345, "grad_norm": 0.7035194397148896, "learning_rate": 3.4647201946472026e-07, "loss": 0.5996, "step": 32115 }, { "epoch": 0.9376660535458818, "grad_norm": 0.7281426284225921, "learning_rate": 3.4630981346309816e-07, "loss": 0.6976, "step": 32116 }, { "epoch": 0.9376952497737292, "grad_norm": 0.652630951993896, "learning_rate": 3.461476074614761e-07, "loss": 0.5303, "step": 32117 }, { "epoch": 0.9377244460015766, "grad_norm": 0.7260044722187924, "learning_rate": 3.4598540145985406e-07, "loss": 0.6151, "step": 32118 }, { "epoch": 0.9377536422294239, "grad_norm": 0.7388690918262482, "learning_rate": 3.4582319545823197e-07, "loss": 0.708, "step": 32119 }, { "epoch": 0.9377828384572713, "grad_norm": 0.7842609905514002, "learning_rate": 3.456609894566099e-07, "loss": 0.7129, "step": 32120 }, { "epoch": 0.9378120346851186, "grad_norm": 0.6994314503442778, "learning_rate": 3.454987834549878e-07, "loss": 0.565, "step": 32121 }, { "epoch": 0.937841230912966, "grad_norm": 0.6937711596300588, "learning_rate": 3.4533657745336583e-07, "loss": 0.5853, "step": 32122 }, { "epoch": 0.9378704271408134, "grad_norm": 0.739120539162917, "learning_rate": 3.4517437145174373e-07, "loss": 0.6536, "step": 32123 }, { "epoch": 0.9378996233686607, "grad_norm": 0.7268259531471667, "learning_rate": 3.4501216545012173e-07, "loss": 0.6331, "step": 32124 }, { "epoch": 0.9379288195965081, "grad_norm": 0.7084625645243714, "learning_rate": 3.4484995944849964e-07, "loss": 0.6326, "step": 32125 }, { "epoch": 0.9379580158243555, "grad_norm": 0.7709945871065662, "learning_rate": 3.4468775344687754e-07, "loss": 0.6688, "step": 32126 }, { "epoch": 0.9379872120522028, "grad_norm": 0.7273450333709428, "learning_rate": 3.445255474452555e-07, "loss": 0.6178, "step": 32127 }, { "epoch": 0.9380164082800502, "grad_norm": 0.7868459988618097, "learning_rate": 3.4436334144363344e-07, "loss": 0.7652, "step": 32128 }, { "epoch": 0.9380456045078975, "grad_norm": 0.6940950068216895, "learning_rate": 3.442011354420114e-07, "loss": 0.6033, "step": 32129 }, { "epoch": 0.9380748007357449, "grad_norm": 0.7443080097787063, "learning_rate": 3.440389294403893e-07, "loss": 0.6239, "step": 32130 }, { "epoch": 0.9381039969635923, "grad_norm": 0.7658453754798592, "learning_rate": 3.438767234387673e-07, "loss": 0.6963, "step": 32131 }, { "epoch": 0.9381331931914396, "grad_norm": 0.7604110298696901, "learning_rate": 3.437145174371452e-07, "loss": 0.7057, "step": 32132 }, { "epoch": 0.938162389419287, "grad_norm": 0.6570200331882671, "learning_rate": 3.435523114355231e-07, "loss": 0.5608, "step": 32133 }, { "epoch": 0.9381915856471343, "grad_norm": 0.7333150103069946, "learning_rate": 3.433901054339011e-07, "loss": 0.731, "step": 32134 }, { "epoch": 0.9382207818749817, "grad_norm": 0.7375933714430154, "learning_rate": 3.43227899432279e-07, "loss": 0.6675, "step": 32135 }, { "epoch": 0.9382499781028291, "grad_norm": 0.6573253091448332, "learning_rate": 3.4306569343065697e-07, "loss": 0.5427, "step": 32136 }, { "epoch": 0.9382791743306764, "grad_norm": 0.7264769832048845, "learning_rate": 3.429034874290349e-07, "loss": 0.6256, "step": 32137 }, { "epoch": 0.9383083705585238, "grad_norm": 0.701213436307504, "learning_rate": 3.427412814274128e-07, "loss": 0.6154, "step": 32138 }, { "epoch": 0.9383375667863711, "grad_norm": 0.7242365609534763, "learning_rate": 3.425790754257908e-07, "loss": 0.6238, "step": 32139 }, { "epoch": 0.9383667630142185, "grad_norm": 0.7196154529378442, "learning_rate": 3.424168694241687e-07, "loss": 0.6124, "step": 32140 }, { "epoch": 0.938395959242066, "grad_norm": 0.7568438847108817, "learning_rate": 3.422546634225467e-07, "loss": 0.6973, "step": 32141 }, { "epoch": 0.9384251554699133, "grad_norm": 0.7262115171385451, "learning_rate": 3.420924574209246e-07, "loss": 0.6892, "step": 32142 }, { "epoch": 0.9384543516977607, "grad_norm": 0.7529573266380944, "learning_rate": 3.419302514193026e-07, "loss": 0.6651, "step": 32143 }, { "epoch": 0.9384835479256081, "grad_norm": 0.749505612004314, "learning_rate": 3.417680454176805e-07, "loss": 0.6425, "step": 32144 }, { "epoch": 0.9385127441534554, "grad_norm": 0.8158724081025273, "learning_rate": 3.416058394160584e-07, "loss": 0.7521, "step": 32145 }, { "epoch": 0.9385419403813028, "grad_norm": 0.729820590374517, "learning_rate": 3.4144363341443635e-07, "loss": 0.6585, "step": 32146 }, { "epoch": 0.9385711366091501, "grad_norm": 0.7007152456695891, "learning_rate": 3.412814274128143e-07, "loss": 0.5874, "step": 32147 }, { "epoch": 0.9386003328369975, "grad_norm": 0.7213851056545938, "learning_rate": 3.4111922141119226e-07, "loss": 0.6581, "step": 32148 }, { "epoch": 0.9386295290648449, "grad_norm": 0.7968277216457592, "learning_rate": 3.4095701540957016e-07, "loss": 0.66, "step": 32149 }, { "epoch": 0.9386587252926922, "grad_norm": 0.7276318943308642, "learning_rate": 3.4079480940794816e-07, "loss": 0.6299, "step": 32150 }, { "epoch": 0.9386879215205396, "grad_norm": 0.6830882258363137, "learning_rate": 3.4063260340632607e-07, "loss": 0.5938, "step": 32151 }, { "epoch": 0.938717117748387, "grad_norm": 0.721848624897217, "learning_rate": 3.4047039740470397e-07, "loss": 0.6342, "step": 32152 }, { "epoch": 0.9387463139762343, "grad_norm": 0.7204318611907474, "learning_rate": 3.4030819140308197e-07, "loss": 0.6057, "step": 32153 }, { "epoch": 0.9387755102040817, "grad_norm": 0.6514762320244365, "learning_rate": 3.401459854014599e-07, "loss": 0.5491, "step": 32154 }, { "epoch": 0.938804706431929, "grad_norm": 0.6864175507531164, "learning_rate": 3.3998377939983783e-07, "loss": 0.6115, "step": 32155 }, { "epoch": 0.9388339026597764, "grad_norm": 0.7152226875496838, "learning_rate": 3.3982157339821573e-07, "loss": 0.5684, "step": 32156 }, { "epoch": 0.9388630988876238, "grad_norm": 0.7161065570625845, "learning_rate": 3.3965936739659374e-07, "loss": 0.5949, "step": 32157 }, { "epoch": 0.9388922951154711, "grad_norm": 0.690225428085445, "learning_rate": 3.3949716139497164e-07, "loss": 0.6054, "step": 32158 }, { "epoch": 0.9389214913433185, "grad_norm": 0.7484671633814247, "learning_rate": 3.3933495539334954e-07, "loss": 0.6897, "step": 32159 }, { "epoch": 0.9389506875711658, "grad_norm": 0.6789703525983266, "learning_rate": 3.3917274939172754e-07, "loss": 0.6099, "step": 32160 }, { "epoch": 0.9389798837990132, "grad_norm": 0.7180929380252123, "learning_rate": 3.3901054339010544e-07, "loss": 0.6952, "step": 32161 }, { "epoch": 0.9390090800268606, "grad_norm": 0.7033939427386754, "learning_rate": 3.3884833738848345e-07, "loss": 0.6311, "step": 32162 }, { "epoch": 0.9390382762547079, "grad_norm": 0.6923533295371698, "learning_rate": 3.3868613138686135e-07, "loss": 0.5804, "step": 32163 }, { "epoch": 0.9390674724825553, "grad_norm": 0.787022900656772, "learning_rate": 3.385239253852393e-07, "loss": 0.6645, "step": 32164 }, { "epoch": 0.9390966687104026, "grad_norm": 0.7898972288231818, "learning_rate": 3.383617193836172e-07, "loss": 0.7403, "step": 32165 }, { "epoch": 0.93912586493825, "grad_norm": 0.7025180646113841, "learning_rate": 3.3819951338199516e-07, "loss": 0.6385, "step": 32166 }, { "epoch": 0.9391550611660974, "grad_norm": 0.7180625834254196, "learning_rate": 3.380373073803731e-07, "loss": 0.6062, "step": 32167 }, { "epoch": 0.9391842573939447, "grad_norm": 0.6824823188872225, "learning_rate": 3.37875101378751e-07, "loss": 0.5523, "step": 32168 }, { "epoch": 0.9392134536217921, "grad_norm": 0.7457380887014959, "learning_rate": 3.37712895377129e-07, "loss": 0.6489, "step": 32169 }, { "epoch": 0.9392426498496395, "grad_norm": 0.6833153297961468, "learning_rate": 3.375506893755069e-07, "loss": 0.5421, "step": 32170 }, { "epoch": 0.9392718460774868, "grad_norm": 0.7486666440909638, "learning_rate": 3.373884833738848e-07, "loss": 0.711, "step": 32171 }, { "epoch": 0.9393010423053342, "grad_norm": 0.7234712805167857, "learning_rate": 3.3722627737226283e-07, "loss": 0.6506, "step": 32172 }, { "epoch": 0.9393302385331815, "grad_norm": 0.7290835330548487, "learning_rate": 3.3706407137064073e-07, "loss": 0.6901, "step": 32173 }, { "epoch": 0.9393594347610289, "grad_norm": 0.7195125030156018, "learning_rate": 3.369018653690187e-07, "loss": 0.6481, "step": 32174 }, { "epoch": 0.9393886309888763, "grad_norm": 0.7803387444206833, "learning_rate": 3.367396593673966e-07, "loss": 0.6818, "step": 32175 }, { "epoch": 0.9394178272167236, "grad_norm": 0.7372108102403097, "learning_rate": 3.365774533657746e-07, "loss": 0.6615, "step": 32176 }, { "epoch": 0.939447023444571, "grad_norm": 0.7581311762429256, "learning_rate": 3.364152473641525e-07, "loss": 0.6556, "step": 32177 }, { "epoch": 0.9394762196724183, "grad_norm": 0.6438850877604362, "learning_rate": 3.362530413625304e-07, "loss": 0.5618, "step": 32178 }, { "epoch": 0.9395054159002657, "grad_norm": 0.6851207798324169, "learning_rate": 3.360908353609084e-07, "loss": 0.6114, "step": 32179 }, { "epoch": 0.9395346121281131, "grad_norm": 0.7222476038741092, "learning_rate": 3.359286293592863e-07, "loss": 0.5815, "step": 32180 }, { "epoch": 0.9395638083559604, "grad_norm": 0.7393917177523741, "learning_rate": 3.3576642335766426e-07, "loss": 0.663, "step": 32181 }, { "epoch": 0.9395930045838078, "grad_norm": 0.7382829371485485, "learning_rate": 3.356042173560422e-07, "loss": 0.6588, "step": 32182 }, { "epoch": 0.9396222008116552, "grad_norm": 0.7195197891945871, "learning_rate": 3.3544201135442017e-07, "loss": 0.6433, "step": 32183 }, { "epoch": 0.9396513970395025, "grad_norm": 0.7641003829855849, "learning_rate": 3.3527980535279807e-07, "loss": 0.6497, "step": 32184 }, { "epoch": 0.9396805932673499, "grad_norm": 0.7758311836043721, "learning_rate": 3.3511759935117597e-07, "loss": 0.6254, "step": 32185 }, { "epoch": 0.9397097894951972, "grad_norm": 0.6894520510493166, "learning_rate": 3.34955393349554e-07, "loss": 0.6469, "step": 32186 }, { "epoch": 0.9397389857230446, "grad_norm": 0.7361296966024202, "learning_rate": 3.347931873479319e-07, "loss": 0.6446, "step": 32187 }, { "epoch": 0.939768181950892, "grad_norm": 0.6603769450073075, "learning_rate": 3.346309813463099e-07, "loss": 0.5513, "step": 32188 }, { "epoch": 0.9397973781787393, "grad_norm": 0.7021488593822564, "learning_rate": 3.344687753446878e-07, "loss": 0.6154, "step": 32189 }, { "epoch": 0.9398265744065867, "grad_norm": 0.7741176756739502, "learning_rate": 3.3430656934306574e-07, "loss": 0.6329, "step": 32190 }, { "epoch": 0.939855770634434, "grad_norm": 0.7373876847107032, "learning_rate": 3.341443633414437e-07, "loss": 0.6369, "step": 32191 }, { "epoch": 0.9398849668622814, "grad_norm": 0.7414975675740917, "learning_rate": 3.339821573398216e-07, "loss": 0.6917, "step": 32192 }, { "epoch": 0.9399141630901288, "grad_norm": 0.7284614670324437, "learning_rate": 3.3381995133819954e-07, "loss": 0.664, "step": 32193 }, { "epoch": 0.9399433593179761, "grad_norm": 0.7981504040101268, "learning_rate": 3.3365774533657745e-07, "loss": 0.6824, "step": 32194 }, { "epoch": 0.9399725555458235, "grad_norm": 0.6520494064664334, "learning_rate": 3.3349553933495545e-07, "loss": 0.5658, "step": 32195 }, { "epoch": 0.9400017517736708, "grad_norm": 0.7419594650100314, "learning_rate": 3.3333333333333335e-07, "loss": 0.6214, "step": 32196 }, { "epoch": 0.9400309480015182, "grad_norm": 0.7070451129428473, "learning_rate": 3.3317112733171125e-07, "loss": 0.6275, "step": 32197 }, { "epoch": 0.9400601442293656, "grad_norm": 0.6856303827747777, "learning_rate": 3.3300892133008926e-07, "loss": 0.6031, "step": 32198 }, { "epoch": 0.9400893404572129, "grad_norm": 0.6640696500259401, "learning_rate": 3.3284671532846716e-07, "loss": 0.5526, "step": 32199 }, { "epoch": 0.9401185366850603, "grad_norm": 0.7175370387226101, "learning_rate": 3.326845093268451e-07, "loss": 0.5896, "step": 32200 }, { "epoch": 0.9401477329129077, "grad_norm": 1.1960592799404912, "learning_rate": 3.3252230332522307e-07, "loss": 0.6624, "step": 32201 }, { "epoch": 0.940176929140755, "grad_norm": 0.731913741149526, "learning_rate": 3.32360097323601e-07, "loss": 0.6984, "step": 32202 }, { "epoch": 0.9402061253686024, "grad_norm": 0.7438093336728208, "learning_rate": 3.321978913219789e-07, "loss": 0.6499, "step": 32203 }, { "epoch": 0.9402353215964497, "grad_norm": 0.7375644409875179, "learning_rate": 3.320356853203568e-07, "loss": 0.6878, "step": 32204 }, { "epoch": 0.9402645178242971, "grad_norm": 0.6707822877314951, "learning_rate": 3.3187347931873483e-07, "loss": 0.5472, "step": 32205 }, { "epoch": 0.9402937140521445, "grad_norm": 0.6361076172178839, "learning_rate": 3.3171127331711273e-07, "loss": 0.5517, "step": 32206 }, { "epoch": 0.9403229102799918, "grad_norm": 0.735271161363974, "learning_rate": 3.3154906731549074e-07, "loss": 0.6143, "step": 32207 }, { "epoch": 0.9403521065078392, "grad_norm": 0.7581028077098089, "learning_rate": 3.3138686131386864e-07, "loss": 0.698, "step": 32208 }, { "epoch": 0.9403813027356865, "grad_norm": 0.7101781586141438, "learning_rate": 3.312246553122466e-07, "loss": 0.6173, "step": 32209 }, { "epoch": 0.9404104989635339, "grad_norm": 0.7567078579681107, "learning_rate": 3.310624493106245e-07, "loss": 0.7118, "step": 32210 }, { "epoch": 0.9404396951913813, "grad_norm": 0.7779147483317718, "learning_rate": 3.3090024330900245e-07, "loss": 0.6907, "step": 32211 }, { "epoch": 0.9404688914192286, "grad_norm": 0.7027089565618412, "learning_rate": 3.307380373073804e-07, "loss": 0.5915, "step": 32212 }, { "epoch": 0.940498087647076, "grad_norm": 0.7234131376708657, "learning_rate": 3.305758313057583e-07, "loss": 0.6547, "step": 32213 }, { "epoch": 0.9405272838749233, "grad_norm": 0.7161251292710352, "learning_rate": 3.304136253041363e-07, "loss": 0.6107, "step": 32214 }, { "epoch": 0.9405564801027707, "grad_norm": 0.6889485851806431, "learning_rate": 3.302514193025142e-07, "loss": 0.5851, "step": 32215 }, { "epoch": 0.9405856763306181, "grad_norm": 0.7464657562223992, "learning_rate": 3.3008921330089217e-07, "loss": 0.6752, "step": 32216 }, { "epoch": 0.9406148725584654, "grad_norm": 0.766621683689287, "learning_rate": 3.299270072992701e-07, "loss": 0.6924, "step": 32217 }, { "epoch": 0.9406440687863128, "grad_norm": 0.772169775435965, "learning_rate": 3.29764801297648e-07, "loss": 0.673, "step": 32218 }, { "epoch": 0.9406732650141602, "grad_norm": 0.7320256991464773, "learning_rate": 3.29602595296026e-07, "loss": 0.6371, "step": 32219 }, { "epoch": 0.9407024612420075, "grad_norm": 0.684524055485719, "learning_rate": 3.2944038929440393e-07, "loss": 0.5373, "step": 32220 }, { "epoch": 0.9407316574698549, "grad_norm": 0.8037312807089511, "learning_rate": 3.292781832927819e-07, "loss": 0.7625, "step": 32221 }, { "epoch": 0.9407608536977022, "grad_norm": 0.7235677638309674, "learning_rate": 3.291159772911598e-07, "loss": 0.6287, "step": 32222 }, { "epoch": 0.9407900499255496, "grad_norm": 0.7108153776122573, "learning_rate": 3.289537712895378e-07, "loss": 0.654, "step": 32223 }, { "epoch": 0.940819246153397, "grad_norm": 0.7604263790399411, "learning_rate": 3.287915652879157e-07, "loss": 0.7318, "step": 32224 }, { "epoch": 0.9408484423812443, "grad_norm": 0.680791939835619, "learning_rate": 3.286293592862936e-07, "loss": 0.5793, "step": 32225 }, { "epoch": 0.9408776386090917, "grad_norm": 0.9420537999166404, "learning_rate": 3.284671532846716e-07, "loss": 0.7285, "step": 32226 }, { "epoch": 0.940906834836939, "grad_norm": 0.7598665837342635, "learning_rate": 3.283049472830495e-07, "loss": 0.6906, "step": 32227 }, { "epoch": 0.9409360310647864, "grad_norm": 0.6465794971682829, "learning_rate": 3.2814274128142745e-07, "loss": 0.5359, "step": 32228 }, { "epoch": 0.9409652272926338, "grad_norm": 0.7941582706374763, "learning_rate": 3.2798053527980535e-07, "loss": 0.695, "step": 32229 }, { "epoch": 0.9409944235204811, "grad_norm": 0.7991910400710982, "learning_rate": 3.278183292781833e-07, "loss": 0.5138, "step": 32230 }, { "epoch": 0.9410236197483285, "grad_norm": 0.7644158489093884, "learning_rate": 3.2765612327656126e-07, "loss": 0.6749, "step": 32231 }, { "epoch": 0.9410528159761758, "grad_norm": 0.7832271712427145, "learning_rate": 3.2749391727493916e-07, "loss": 0.7421, "step": 32232 }, { "epoch": 0.9410820122040232, "grad_norm": 0.7350327458825975, "learning_rate": 3.2733171127331717e-07, "loss": 0.6681, "step": 32233 }, { "epoch": 0.9411112084318706, "grad_norm": 0.7516086039349694, "learning_rate": 3.2716950527169507e-07, "loss": 0.6503, "step": 32234 }, { "epoch": 0.9411404046597179, "grad_norm": 0.7475318429395384, "learning_rate": 3.27007299270073e-07, "loss": 0.6872, "step": 32235 }, { "epoch": 0.9411696008875653, "grad_norm": 0.6996820268930407, "learning_rate": 3.26845093268451e-07, "loss": 0.6243, "step": 32236 }, { "epoch": 0.9411987971154127, "grad_norm": 0.7335644764460206, "learning_rate": 3.266828872668289e-07, "loss": 0.6524, "step": 32237 }, { "epoch": 0.94122799334326, "grad_norm": 0.7580041334067505, "learning_rate": 3.2652068126520683e-07, "loss": 0.6985, "step": 32238 }, { "epoch": 0.9412571895711074, "grad_norm": 0.7112017870000577, "learning_rate": 3.2635847526358473e-07, "loss": 0.6141, "step": 32239 }, { "epoch": 0.9412863857989547, "grad_norm": 0.7135797644961517, "learning_rate": 3.2619626926196274e-07, "loss": 0.6518, "step": 32240 }, { "epoch": 0.9413155820268021, "grad_norm": 0.708194406765105, "learning_rate": 3.2603406326034064e-07, "loss": 0.6594, "step": 32241 }, { "epoch": 0.9413447782546495, "grad_norm": 0.8062958084544939, "learning_rate": 3.2587185725871865e-07, "loss": 0.703, "step": 32242 }, { "epoch": 0.9413739744824968, "grad_norm": 0.6452497891748791, "learning_rate": 3.2570965125709655e-07, "loss": 0.5494, "step": 32243 }, { "epoch": 0.9414031707103442, "grad_norm": 0.7105914588887214, "learning_rate": 3.2554744525547445e-07, "loss": 0.6132, "step": 32244 }, { "epoch": 0.9414323669381915, "grad_norm": 0.7438316762208345, "learning_rate": 3.253852392538524e-07, "loss": 0.6328, "step": 32245 }, { "epoch": 0.9414615631660389, "grad_norm": 0.7124057615338901, "learning_rate": 3.2522303325223036e-07, "loss": 0.6149, "step": 32246 }, { "epoch": 0.9414907593938863, "grad_norm": 0.7045469292617306, "learning_rate": 3.250608272506083e-07, "loss": 0.5885, "step": 32247 }, { "epoch": 0.9415199556217336, "grad_norm": 0.7915832115790898, "learning_rate": 3.248986212489862e-07, "loss": 0.7826, "step": 32248 }, { "epoch": 0.941549151849581, "grad_norm": 0.6988051284520693, "learning_rate": 3.247364152473642e-07, "loss": 0.6175, "step": 32249 }, { "epoch": 0.9415783480774284, "grad_norm": 0.760239111629205, "learning_rate": 3.245742092457421e-07, "loss": 0.6863, "step": 32250 }, { "epoch": 0.9416075443052757, "grad_norm": 0.7263920445983176, "learning_rate": 3.2441200324412e-07, "loss": 0.6057, "step": 32251 }, { "epoch": 0.9416367405331231, "grad_norm": 0.6794950747848011, "learning_rate": 3.2424979724249803e-07, "loss": 0.5602, "step": 32252 }, { "epoch": 0.9416659367609704, "grad_norm": 0.6943762958337999, "learning_rate": 3.2408759124087593e-07, "loss": 0.6157, "step": 32253 }, { "epoch": 0.9416951329888178, "grad_norm": 0.7562609150434889, "learning_rate": 3.239253852392539e-07, "loss": 0.6624, "step": 32254 }, { "epoch": 0.9417243292166652, "grad_norm": 0.6971950717167387, "learning_rate": 3.2376317923763184e-07, "loss": 0.5889, "step": 32255 }, { "epoch": 0.9417535254445125, "grad_norm": 0.7835206630912179, "learning_rate": 3.2360097323600974e-07, "loss": 0.7267, "step": 32256 }, { "epoch": 0.9417827216723599, "grad_norm": 0.7097738337850916, "learning_rate": 3.234387672343877e-07, "loss": 0.667, "step": 32257 }, { "epoch": 0.9418119179002072, "grad_norm": 0.7636526375687493, "learning_rate": 3.232765612327656e-07, "loss": 0.7115, "step": 32258 }, { "epoch": 0.9418411141280546, "grad_norm": 0.7106509732768317, "learning_rate": 3.231143552311436e-07, "loss": 0.5953, "step": 32259 }, { "epoch": 0.941870310355902, "grad_norm": 0.6901654415832158, "learning_rate": 3.229521492295215e-07, "loss": 0.6075, "step": 32260 }, { "epoch": 0.9418995065837493, "grad_norm": 0.6817209492947196, "learning_rate": 3.227899432278995e-07, "loss": 0.5796, "step": 32261 }, { "epoch": 0.9419287028115968, "grad_norm": 0.701178401365132, "learning_rate": 3.226277372262774e-07, "loss": 0.6148, "step": 32262 }, { "epoch": 0.9419578990394442, "grad_norm": 0.7093782183947624, "learning_rate": 3.224655312246553e-07, "loss": 0.6121, "step": 32263 }, { "epoch": 0.9419870952672915, "grad_norm": 0.7530354363526974, "learning_rate": 3.2230332522303326e-07, "loss": 0.6405, "step": 32264 }, { "epoch": 0.9420162914951389, "grad_norm": 0.7557040890471127, "learning_rate": 3.221411192214112e-07, "loss": 0.6717, "step": 32265 }, { "epoch": 0.9420454877229862, "grad_norm": 0.6674679778075937, "learning_rate": 3.2197891321978917e-07, "loss": 0.5608, "step": 32266 }, { "epoch": 0.9420746839508336, "grad_norm": 0.7707098811049333, "learning_rate": 3.2181670721816707e-07, "loss": 0.747, "step": 32267 }, { "epoch": 0.942103880178681, "grad_norm": 0.7528079358521543, "learning_rate": 3.216545012165451e-07, "loss": 0.6513, "step": 32268 }, { "epoch": 0.9421330764065283, "grad_norm": 0.7552026853245372, "learning_rate": 3.21492295214923e-07, "loss": 0.6711, "step": 32269 }, { "epoch": 0.9421622726343757, "grad_norm": 0.717397993302757, "learning_rate": 3.213300892133009e-07, "loss": 0.6662, "step": 32270 }, { "epoch": 0.942191468862223, "grad_norm": 0.7833021546560808, "learning_rate": 3.211678832116789e-07, "loss": 0.5652, "step": 32271 }, { "epoch": 0.9422206650900704, "grad_norm": 0.748389810282119, "learning_rate": 3.210056772100568e-07, "loss": 0.6238, "step": 32272 }, { "epoch": 0.9422498613179178, "grad_norm": 0.7340633091173834, "learning_rate": 3.2084347120843474e-07, "loss": 0.6631, "step": 32273 }, { "epoch": 0.9422790575457651, "grad_norm": 0.7076147262529799, "learning_rate": 3.206812652068127e-07, "loss": 0.6113, "step": 32274 }, { "epoch": 0.9423082537736125, "grad_norm": 0.8338938124153162, "learning_rate": 3.2051905920519065e-07, "loss": 0.6499, "step": 32275 }, { "epoch": 0.9423374500014599, "grad_norm": 0.7630713779605343, "learning_rate": 3.2035685320356855e-07, "loss": 0.7226, "step": 32276 }, { "epoch": 0.9423666462293072, "grad_norm": 0.6370785862312137, "learning_rate": 3.2019464720194645e-07, "loss": 0.5264, "step": 32277 }, { "epoch": 0.9423958424571546, "grad_norm": 0.7863289243084182, "learning_rate": 3.2003244120032446e-07, "loss": 0.7424, "step": 32278 }, { "epoch": 0.9424250386850019, "grad_norm": 0.7430030559639174, "learning_rate": 3.1987023519870236e-07, "loss": 0.6535, "step": 32279 }, { "epoch": 0.9424542349128493, "grad_norm": 0.6790241988019122, "learning_rate": 3.1970802919708037e-07, "loss": 0.5832, "step": 32280 }, { "epoch": 0.9424834311406967, "grad_norm": 0.7579267573094778, "learning_rate": 3.1954582319545827e-07, "loss": 0.6897, "step": 32281 }, { "epoch": 0.942512627368544, "grad_norm": 0.6680478879448727, "learning_rate": 3.193836171938362e-07, "loss": 0.5819, "step": 32282 }, { "epoch": 0.9425418235963914, "grad_norm": 0.6647514640048864, "learning_rate": 3.192214111922141e-07, "loss": 0.5736, "step": 32283 }, { "epoch": 0.9425710198242387, "grad_norm": 0.7881288898479788, "learning_rate": 3.190592051905921e-07, "loss": 0.6347, "step": 32284 }, { "epoch": 0.9426002160520861, "grad_norm": 0.7422878234813532, "learning_rate": 3.1889699918897003e-07, "loss": 0.6575, "step": 32285 }, { "epoch": 0.9426294122799335, "grad_norm": 0.7522900232949908, "learning_rate": 3.1873479318734793e-07, "loss": 0.6627, "step": 32286 }, { "epoch": 0.9426586085077808, "grad_norm": 0.8855150865869511, "learning_rate": 3.1857258718572594e-07, "loss": 0.7898, "step": 32287 }, { "epoch": 0.9426878047356282, "grad_norm": 0.725211837109818, "learning_rate": 3.1841038118410384e-07, "loss": 0.6175, "step": 32288 }, { "epoch": 0.9427170009634755, "grad_norm": 0.7079165645229698, "learning_rate": 3.1824817518248174e-07, "loss": 0.623, "step": 32289 }, { "epoch": 0.9427461971913229, "grad_norm": 0.7271120438075054, "learning_rate": 3.1808596918085975e-07, "loss": 0.6536, "step": 32290 }, { "epoch": 0.9427753934191703, "grad_norm": 0.7632298358872214, "learning_rate": 3.1792376317923765e-07, "loss": 0.6234, "step": 32291 }, { "epoch": 0.9428045896470176, "grad_norm": 0.6960469759205723, "learning_rate": 3.177615571776156e-07, "loss": 0.5851, "step": 32292 }, { "epoch": 0.942833785874865, "grad_norm": 0.7367153862811163, "learning_rate": 3.175993511759935e-07, "loss": 0.682, "step": 32293 }, { "epoch": 0.9428629821027124, "grad_norm": 0.7156328464792441, "learning_rate": 3.174371451743715e-07, "loss": 0.5718, "step": 32294 }, { "epoch": 0.9428921783305597, "grad_norm": 0.731810758831873, "learning_rate": 3.172749391727494e-07, "loss": 0.6393, "step": 32295 }, { "epoch": 0.9429213745584071, "grad_norm": 0.7432293570481514, "learning_rate": 3.171127331711273e-07, "loss": 0.6746, "step": 32296 }, { "epoch": 0.9429505707862544, "grad_norm": 0.7549057104501958, "learning_rate": 3.169505271695053e-07, "loss": 0.7295, "step": 32297 }, { "epoch": 0.9429797670141018, "grad_norm": 0.6749040910605129, "learning_rate": 3.167883211678832e-07, "loss": 0.5295, "step": 32298 }, { "epoch": 0.9430089632419492, "grad_norm": 0.7108988758575401, "learning_rate": 3.1662611516626117e-07, "loss": 0.6848, "step": 32299 }, { "epoch": 0.9430381594697965, "grad_norm": 0.7090138123852184, "learning_rate": 3.164639091646391e-07, "loss": 0.5847, "step": 32300 }, { "epoch": 0.9430673556976439, "grad_norm": 0.7454571974477633, "learning_rate": 3.163017031630171e-07, "loss": 0.6901, "step": 32301 }, { "epoch": 0.9430965519254912, "grad_norm": 0.6794844828643392, "learning_rate": 3.16139497161395e-07, "loss": 0.6001, "step": 32302 }, { "epoch": 0.9431257481533386, "grad_norm": 0.7206317528500528, "learning_rate": 3.1597729115977293e-07, "loss": 0.6554, "step": 32303 }, { "epoch": 0.943154944381186, "grad_norm": 0.7641672982487866, "learning_rate": 3.158150851581509e-07, "loss": 0.7012, "step": 32304 }, { "epoch": 0.9431841406090333, "grad_norm": 0.7913363641795572, "learning_rate": 3.156528791565288e-07, "loss": 0.678, "step": 32305 }, { "epoch": 0.9432133368368807, "grad_norm": 0.669731849589029, "learning_rate": 3.154906731549068e-07, "loss": 0.5734, "step": 32306 }, { "epoch": 0.943242533064728, "grad_norm": 0.8074341481450122, "learning_rate": 3.153284671532847e-07, "loss": 0.6764, "step": 32307 }, { "epoch": 0.9432717292925754, "grad_norm": 0.731297243145908, "learning_rate": 3.1516626115166265e-07, "loss": 0.577, "step": 32308 }, { "epoch": 0.9433009255204228, "grad_norm": 0.6735000031522148, "learning_rate": 3.150040551500406e-07, "loss": 0.5548, "step": 32309 }, { "epoch": 0.9433301217482701, "grad_norm": 0.7602550681753883, "learning_rate": 3.148418491484185e-07, "loss": 0.6574, "step": 32310 }, { "epoch": 0.9433593179761175, "grad_norm": 0.757121861885899, "learning_rate": 3.1467964314679646e-07, "loss": 0.7028, "step": 32311 }, { "epoch": 0.9433885142039649, "grad_norm": 0.7503898072724609, "learning_rate": 3.1451743714517436e-07, "loss": 0.6427, "step": 32312 }, { "epoch": 0.9434177104318122, "grad_norm": 0.9956370887412694, "learning_rate": 3.1435523114355237e-07, "loss": 0.5988, "step": 32313 }, { "epoch": 0.9434469066596596, "grad_norm": 0.8482362503916422, "learning_rate": 3.1419302514193027e-07, "loss": 0.6941, "step": 32314 }, { "epoch": 0.9434761028875069, "grad_norm": 0.7061883037745249, "learning_rate": 3.140308191403083e-07, "loss": 0.6142, "step": 32315 }, { "epoch": 0.9435052991153543, "grad_norm": 0.7381003669566824, "learning_rate": 3.138686131386862e-07, "loss": 0.6182, "step": 32316 }, { "epoch": 0.9435344953432017, "grad_norm": 0.7636586179119451, "learning_rate": 3.137064071370641e-07, "loss": 0.6981, "step": 32317 }, { "epoch": 0.943563691571049, "grad_norm": 0.7836165301547345, "learning_rate": 3.1354420113544203e-07, "loss": 0.7459, "step": 32318 }, { "epoch": 0.9435928877988964, "grad_norm": 0.7253066343635138, "learning_rate": 3.1338199513382e-07, "loss": 0.6045, "step": 32319 }, { "epoch": 0.9436220840267437, "grad_norm": 0.7732050689909048, "learning_rate": 3.1321978913219794e-07, "loss": 0.644, "step": 32320 }, { "epoch": 0.9436512802545911, "grad_norm": 0.6907951123654514, "learning_rate": 3.1305758313057584e-07, "loss": 0.5987, "step": 32321 }, { "epoch": 0.9436804764824385, "grad_norm": 0.7641738172543456, "learning_rate": 3.1289537712895374e-07, "loss": 0.6142, "step": 32322 }, { "epoch": 0.9437096727102858, "grad_norm": 0.8063144204247061, "learning_rate": 3.1273317112733175e-07, "loss": 0.6834, "step": 32323 }, { "epoch": 0.9437388689381332, "grad_norm": 0.7580750067542734, "learning_rate": 3.1257096512570965e-07, "loss": 0.6469, "step": 32324 }, { "epoch": 0.9437680651659806, "grad_norm": 0.7288419865330983, "learning_rate": 3.124087591240876e-07, "loss": 0.6668, "step": 32325 }, { "epoch": 0.9437972613938279, "grad_norm": 0.6781439426378282, "learning_rate": 3.1224655312246556e-07, "loss": 0.5845, "step": 32326 }, { "epoch": 0.9438264576216753, "grad_norm": 0.7390244873524515, "learning_rate": 3.120843471208435e-07, "loss": 0.6601, "step": 32327 }, { "epoch": 0.9438556538495226, "grad_norm": 0.7051623161801622, "learning_rate": 3.119221411192214e-07, "loss": 0.6319, "step": 32328 }, { "epoch": 0.94388485007737, "grad_norm": 0.7273012714375152, "learning_rate": 3.1175993511759936e-07, "loss": 0.644, "step": 32329 }, { "epoch": 0.9439140463052174, "grad_norm": 0.690721326091823, "learning_rate": 3.115977291159773e-07, "loss": 0.5563, "step": 32330 }, { "epoch": 0.9439432425330647, "grad_norm": 0.6595887614103065, "learning_rate": 3.1143552311435527e-07, "loss": 0.5632, "step": 32331 }, { "epoch": 0.9439724387609121, "grad_norm": 0.7578684292312915, "learning_rate": 3.1127331711273317e-07, "loss": 0.6443, "step": 32332 }, { "epoch": 0.9440016349887594, "grad_norm": 0.718662675216568, "learning_rate": 3.111111111111111e-07, "loss": 0.6093, "step": 32333 }, { "epoch": 0.9440308312166068, "grad_norm": 0.7505581593819298, "learning_rate": 3.109489051094891e-07, "loss": 0.6892, "step": 32334 }, { "epoch": 0.9440600274444542, "grad_norm": 0.6443495104577243, "learning_rate": 3.1078669910786703e-07, "loss": 0.5127, "step": 32335 }, { "epoch": 0.9440892236723015, "grad_norm": 0.7347025746457786, "learning_rate": 3.10624493106245e-07, "loss": 0.6382, "step": 32336 }, { "epoch": 0.9441184199001489, "grad_norm": 0.7467865163882905, "learning_rate": 3.104622871046229e-07, "loss": 0.7011, "step": 32337 }, { "epoch": 0.9441476161279962, "grad_norm": 0.7213916079003229, "learning_rate": 3.1030008110300084e-07, "loss": 0.6062, "step": 32338 }, { "epoch": 0.9441768123558436, "grad_norm": 0.7978625062755825, "learning_rate": 3.1013787510137874e-07, "loss": 0.783, "step": 32339 }, { "epoch": 0.944206008583691, "grad_norm": 0.7136896470440646, "learning_rate": 3.099756690997567e-07, "loss": 0.6206, "step": 32340 }, { "epoch": 0.9442352048115383, "grad_norm": 0.7168826583774599, "learning_rate": 3.0981346309813465e-07, "loss": 0.6517, "step": 32341 }, { "epoch": 0.9442644010393857, "grad_norm": 0.7487597371229562, "learning_rate": 3.096512570965126e-07, "loss": 0.7273, "step": 32342 }, { "epoch": 0.944293597267233, "grad_norm": 0.6989860437118871, "learning_rate": 3.0948905109489056e-07, "loss": 0.6427, "step": 32343 }, { "epoch": 0.9443227934950804, "grad_norm": 0.7016749939505165, "learning_rate": 3.093268450932685e-07, "loss": 0.5686, "step": 32344 }, { "epoch": 0.9443519897229278, "grad_norm": 0.6962586335580029, "learning_rate": 3.091646390916464e-07, "loss": 0.6194, "step": 32345 }, { "epoch": 0.9443811859507751, "grad_norm": 0.6625754976090712, "learning_rate": 3.0900243309002437e-07, "loss": 0.5884, "step": 32346 }, { "epoch": 0.9444103821786225, "grad_norm": 0.7053518988194565, "learning_rate": 3.0884022708840227e-07, "loss": 0.6281, "step": 32347 }, { "epoch": 0.9444395784064699, "grad_norm": 0.7067405747020971, "learning_rate": 3.086780210867802e-07, "loss": 0.5474, "step": 32348 }, { "epoch": 0.9444687746343172, "grad_norm": 0.6941563268191324, "learning_rate": 3.085158150851582e-07, "loss": 0.5896, "step": 32349 }, { "epoch": 0.9444979708621646, "grad_norm": 0.7098150595542149, "learning_rate": 3.0835360908353613e-07, "loss": 0.5785, "step": 32350 }, { "epoch": 0.9445271670900119, "grad_norm": 0.771179850520632, "learning_rate": 3.081914030819141e-07, "loss": 0.6833, "step": 32351 }, { "epoch": 0.9445563633178593, "grad_norm": 0.7464398400859097, "learning_rate": 3.08029197080292e-07, "loss": 0.664, "step": 32352 }, { "epoch": 0.9445855595457067, "grad_norm": 0.7491989494373419, "learning_rate": 3.0786699107866994e-07, "loss": 0.6315, "step": 32353 }, { "epoch": 0.944614755773554, "grad_norm": 0.6750284037161332, "learning_rate": 3.077047850770479e-07, "loss": 0.5303, "step": 32354 }, { "epoch": 0.9446439520014014, "grad_norm": 0.6754232529384886, "learning_rate": 3.075425790754258e-07, "loss": 0.5709, "step": 32355 }, { "epoch": 0.9446731482292487, "grad_norm": 0.6749182243152512, "learning_rate": 3.0738037307380375e-07, "loss": 0.5758, "step": 32356 }, { "epoch": 0.9447023444570961, "grad_norm": 0.71256451812128, "learning_rate": 3.072181670721817e-07, "loss": 0.6494, "step": 32357 }, { "epoch": 0.9447315406849435, "grad_norm": 0.7143926953470137, "learning_rate": 3.070559610705596e-07, "loss": 0.6537, "step": 32358 }, { "epoch": 0.9447607369127908, "grad_norm": 0.7454828475038038, "learning_rate": 3.0689375506893756e-07, "loss": 0.7263, "step": 32359 }, { "epoch": 0.9447899331406382, "grad_norm": 0.7364035032799972, "learning_rate": 3.067315490673155e-07, "loss": 0.637, "step": 32360 }, { "epoch": 0.9448191293684856, "grad_norm": 0.7563719249466826, "learning_rate": 3.0656934306569346e-07, "loss": 0.6562, "step": 32361 }, { "epoch": 0.9448483255963329, "grad_norm": 0.6994334029734043, "learning_rate": 3.064071370640714e-07, "loss": 0.5927, "step": 32362 }, { "epoch": 0.9448775218241803, "grad_norm": 0.7460709032991072, "learning_rate": 3.0624493106244937e-07, "loss": 0.623, "step": 32363 }, { "epoch": 0.9449067180520276, "grad_norm": 0.7321068461689929, "learning_rate": 3.0608272506082727e-07, "loss": 0.6426, "step": 32364 }, { "epoch": 0.944935914279875, "grad_norm": 0.7002240928900981, "learning_rate": 3.059205190592052e-07, "loss": 0.6037, "step": 32365 }, { "epoch": 0.9449651105077224, "grad_norm": 0.6788701514295158, "learning_rate": 3.0575831305758313e-07, "loss": 0.5784, "step": 32366 }, { "epoch": 0.9449943067355697, "grad_norm": 0.7161229201445353, "learning_rate": 3.055961070559611e-07, "loss": 0.7149, "step": 32367 }, { "epoch": 0.9450235029634171, "grad_norm": 0.7587337689620491, "learning_rate": 3.0543390105433904e-07, "loss": 0.6684, "step": 32368 }, { "epoch": 0.9450526991912644, "grad_norm": 0.7342211719474548, "learning_rate": 3.05271695052717e-07, "loss": 0.6301, "step": 32369 }, { "epoch": 0.9450818954191118, "grad_norm": 0.7375734046345891, "learning_rate": 3.0510948905109494e-07, "loss": 0.69, "step": 32370 }, { "epoch": 0.9451110916469592, "grad_norm": 0.7134663016056194, "learning_rate": 3.0494728304947284e-07, "loss": 0.5858, "step": 32371 }, { "epoch": 0.9451402878748065, "grad_norm": 0.7375587440137695, "learning_rate": 3.047850770478508e-07, "loss": 0.6094, "step": 32372 }, { "epoch": 0.9451694841026539, "grad_norm": 0.6891891505704136, "learning_rate": 3.0462287104622875e-07, "loss": 0.5396, "step": 32373 }, { "epoch": 0.9451986803305013, "grad_norm": 0.6551507589739263, "learning_rate": 3.0446066504460665e-07, "loss": 0.5283, "step": 32374 }, { "epoch": 0.9452278765583486, "grad_norm": 0.7543796958446229, "learning_rate": 3.042984590429846e-07, "loss": 0.6923, "step": 32375 }, { "epoch": 0.945257072786196, "grad_norm": 0.7063725113917546, "learning_rate": 3.0413625304136256e-07, "loss": 0.6295, "step": 32376 }, { "epoch": 0.9452862690140433, "grad_norm": 0.711804952777452, "learning_rate": 3.039740470397405e-07, "loss": 0.6391, "step": 32377 }, { "epoch": 0.9453154652418907, "grad_norm": 0.7290137639992983, "learning_rate": 3.038118410381184e-07, "loss": 0.6485, "step": 32378 }, { "epoch": 0.9453446614697381, "grad_norm": 0.7576130645285515, "learning_rate": 3.0364963503649637e-07, "loss": 0.6624, "step": 32379 }, { "epoch": 0.9453738576975854, "grad_norm": 0.7720859306113661, "learning_rate": 3.034874290348743e-07, "loss": 0.7386, "step": 32380 }, { "epoch": 0.9454030539254328, "grad_norm": 0.7247980728919609, "learning_rate": 3.033252230332523e-07, "loss": 0.6105, "step": 32381 }, { "epoch": 0.9454322501532803, "grad_norm": 0.66684424005392, "learning_rate": 3.031630170316302e-07, "loss": 0.5668, "step": 32382 }, { "epoch": 0.9454614463811276, "grad_norm": 0.6586339761315695, "learning_rate": 3.0300081103000813e-07, "loss": 0.5907, "step": 32383 }, { "epoch": 0.945490642608975, "grad_norm": 0.7170929791298947, "learning_rate": 3.0283860502838603e-07, "loss": 0.5926, "step": 32384 }, { "epoch": 0.9455198388368223, "grad_norm": 0.7803944052286705, "learning_rate": 3.02676399026764e-07, "loss": 0.6463, "step": 32385 }, { "epoch": 0.9455490350646697, "grad_norm": 0.7277976968912802, "learning_rate": 3.0251419302514194e-07, "loss": 0.6163, "step": 32386 }, { "epoch": 0.9455782312925171, "grad_norm": 0.7241320145623363, "learning_rate": 3.023519870235199e-07, "loss": 0.6487, "step": 32387 }, { "epoch": 0.9456074275203644, "grad_norm": 0.7111372764914454, "learning_rate": 3.0218978102189785e-07, "loss": 0.6058, "step": 32388 }, { "epoch": 0.9456366237482118, "grad_norm": 0.7370934642128598, "learning_rate": 3.020275750202758e-07, "loss": 0.6177, "step": 32389 }, { "epoch": 0.9456658199760591, "grad_norm": 0.7325823940200547, "learning_rate": 3.0186536901865376e-07, "loss": 0.6306, "step": 32390 }, { "epoch": 0.9456950162039065, "grad_norm": 0.749854149590482, "learning_rate": 3.0170316301703166e-07, "loss": 0.6225, "step": 32391 }, { "epoch": 0.9457242124317539, "grad_norm": 0.8347691709117545, "learning_rate": 3.015409570154096e-07, "loss": 0.7174, "step": 32392 }, { "epoch": 0.9457534086596012, "grad_norm": 0.6406886355604949, "learning_rate": 3.013787510137875e-07, "loss": 0.4727, "step": 32393 }, { "epoch": 0.9457826048874486, "grad_norm": 0.7519065750037144, "learning_rate": 3.0121654501216546e-07, "loss": 0.566, "step": 32394 }, { "epoch": 0.945811801115296, "grad_norm": 0.7231628043546688, "learning_rate": 3.010543390105434e-07, "loss": 0.6596, "step": 32395 }, { "epoch": 0.9458409973431433, "grad_norm": 0.7618020959354216, "learning_rate": 3.0089213300892137e-07, "loss": 0.7248, "step": 32396 }, { "epoch": 0.9458701935709907, "grad_norm": 0.7494622140249079, "learning_rate": 3.007299270072993e-07, "loss": 0.64, "step": 32397 }, { "epoch": 0.945899389798838, "grad_norm": 0.7513307140988857, "learning_rate": 3.0056772100567723e-07, "loss": 0.7089, "step": 32398 }, { "epoch": 0.9459285860266854, "grad_norm": 0.7797054735705693, "learning_rate": 3.004055150040552e-07, "loss": 0.6834, "step": 32399 }, { "epoch": 0.9459577822545328, "grad_norm": 0.7460267164381407, "learning_rate": 3.0024330900243314e-07, "loss": 0.6676, "step": 32400 }, { "epoch": 0.9459869784823801, "grad_norm": 0.7485123257176098, "learning_rate": 3.0008110300081104e-07, "loss": 0.7, "step": 32401 }, { "epoch": 0.9460161747102275, "grad_norm": 1.0624209682304546, "learning_rate": 2.99918896999189e-07, "loss": 0.6695, "step": 32402 }, { "epoch": 0.9460453709380748, "grad_norm": 0.7782599846837617, "learning_rate": 2.9975669099756694e-07, "loss": 0.6926, "step": 32403 }, { "epoch": 0.9460745671659222, "grad_norm": 0.7252805494894446, "learning_rate": 2.9959448499594484e-07, "loss": 0.6135, "step": 32404 }, { "epoch": 0.9461037633937696, "grad_norm": 0.7239000621796179, "learning_rate": 2.994322789943228e-07, "loss": 0.6294, "step": 32405 }, { "epoch": 0.9461329596216169, "grad_norm": 0.7010964264352175, "learning_rate": 2.9927007299270075e-07, "loss": 0.6402, "step": 32406 }, { "epoch": 0.9461621558494643, "grad_norm": 0.7518652199056911, "learning_rate": 2.991078669910787e-07, "loss": 0.7471, "step": 32407 }, { "epoch": 0.9461913520773116, "grad_norm": 0.7507298430323497, "learning_rate": 2.9894566098945666e-07, "loss": 0.6923, "step": 32408 }, { "epoch": 0.946220548305159, "grad_norm": 0.6946287519358955, "learning_rate": 2.9878345498783456e-07, "loss": 0.5555, "step": 32409 }, { "epoch": 0.9462497445330064, "grad_norm": 0.7207283672644735, "learning_rate": 2.986212489862125e-07, "loss": 0.628, "step": 32410 }, { "epoch": 0.9462789407608537, "grad_norm": 0.7281631299528946, "learning_rate": 2.984590429845904e-07, "loss": 0.6543, "step": 32411 }, { "epoch": 0.9463081369887011, "grad_norm": 0.7350063681922389, "learning_rate": 2.9829683698296837e-07, "loss": 0.646, "step": 32412 }, { "epoch": 0.9463373332165484, "grad_norm": 0.649497853701401, "learning_rate": 2.981346309813463e-07, "loss": 0.5545, "step": 32413 }, { "epoch": 0.9463665294443958, "grad_norm": 0.7310504337648251, "learning_rate": 2.979724249797243e-07, "loss": 0.6531, "step": 32414 }, { "epoch": 0.9463957256722432, "grad_norm": 0.8916385976189413, "learning_rate": 2.9781021897810223e-07, "loss": 0.6263, "step": 32415 }, { "epoch": 0.9464249219000905, "grad_norm": 0.7210215844388898, "learning_rate": 2.976480129764802e-07, "loss": 0.6648, "step": 32416 }, { "epoch": 0.9464541181279379, "grad_norm": 0.72863856947977, "learning_rate": 2.974858069748581e-07, "loss": 0.5885, "step": 32417 }, { "epoch": 0.9464833143557853, "grad_norm": 0.7235370293833004, "learning_rate": 2.9732360097323604e-07, "loss": 0.6349, "step": 32418 }, { "epoch": 0.9465125105836326, "grad_norm": 0.741289117026466, "learning_rate": 2.97161394971614e-07, "loss": 0.686, "step": 32419 }, { "epoch": 0.94654170681148, "grad_norm": 0.7588719872532151, "learning_rate": 2.969991889699919e-07, "loss": 0.6957, "step": 32420 }, { "epoch": 0.9465709030393273, "grad_norm": 0.7152838918810781, "learning_rate": 2.9683698296836985e-07, "loss": 0.6252, "step": 32421 }, { "epoch": 0.9466000992671747, "grad_norm": 0.7485245780783581, "learning_rate": 2.966747769667478e-07, "loss": 0.6264, "step": 32422 }, { "epoch": 0.9466292954950221, "grad_norm": 0.7206310964087441, "learning_rate": 2.9651257096512576e-07, "loss": 0.6239, "step": 32423 }, { "epoch": 0.9466584917228694, "grad_norm": 0.7320240543080062, "learning_rate": 2.9635036496350366e-07, "loss": 0.6492, "step": 32424 }, { "epoch": 0.9466876879507168, "grad_norm": 0.7682996966254565, "learning_rate": 2.961881589618816e-07, "loss": 0.7261, "step": 32425 }, { "epoch": 0.9467168841785641, "grad_norm": 0.786181880021919, "learning_rate": 2.9602595296025956e-07, "loss": 0.6747, "step": 32426 }, { "epoch": 0.9467460804064115, "grad_norm": 0.6990611641546274, "learning_rate": 2.958637469586375e-07, "loss": 0.5815, "step": 32427 }, { "epoch": 0.9467752766342589, "grad_norm": 0.7164642546494187, "learning_rate": 2.957015409570154e-07, "loss": 0.6366, "step": 32428 }, { "epoch": 0.9468044728621062, "grad_norm": 0.7210261202145735, "learning_rate": 2.9553933495539337e-07, "loss": 0.6363, "step": 32429 }, { "epoch": 0.9468336690899536, "grad_norm": 0.6420789107690879, "learning_rate": 2.953771289537713e-07, "loss": 0.5002, "step": 32430 }, { "epoch": 0.946862865317801, "grad_norm": 0.6748938560297044, "learning_rate": 2.9521492295214923e-07, "loss": 0.5621, "step": 32431 }, { "epoch": 0.9468920615456483, "grad_norm": 0.7734122277043218, "learning_rate": 2.950527169505272e-07, "loss": 0.6865, "step": 32432 }, { "epoch": 0.9469212577734957, "grad_norm": 0.6735705069911744, "learning_rate": 2.9489051094890514e-07, "loss": 0.557, "step": 32433 }, { "epoch": 0.946950454001343, "grad_norm": 0.6853078108614798, "learning_rate": 2.947283049472831e-07, "loss": 0.5722, "step": 32434 }, { "epoch": 0.9469796502291904, "grad_norm": 0.7140659701123068, "learning_rate": 2.9456609894566104e-07, "loss": 0.6288, "step": 32435 }, { "epoch": 0.9470088464570378, "grad_norm": 0.7118926819626835, "learning_rate": 2.9440389294403894e-07, "loss": 0.6354, "step": 32436 }, { "epoch": 0.9470380426848851, "grad_norm": 0.7216567962520326, "learning_rate": 2.942416869424169e-07, "loss": 0.6245, "step": 32437 }, { "epoch": 0.9470672389127325, "grad_norm": 0.7224893044964964, "learning_rate": 2.940794809407948e-07, "loss": 0.5759, "step": 32438 }, { "epoch": 0.9470964351405798, "grad_norm": 0.7056399829923815, "learning_rate": 2.9391727493917275e-07, "loss": 0.6157, "step": 32439 }, { "epoch": 0.9471256313684272, "grad_norm": 0.6610275668565763, "learning_rate": 2.937550689375507e-07, "loss": 0.5904, "step": 32440 }, { "epoch": 0.9471548275962746, "grad_norm": 0.6240498767891473, "learning_rate": 2.9359286293592866e-07, "loss": 0.4994, "step": 32441 }, { "epoch": 0.9471840238241219, "grad_norm": 0.7511298860008724, "learning_rate": 2.934306569343066e-07, "loss": 0.7142, "step": 32442 }, { "epoch": 0.9472132200519693, "grad_norm": 0.7798734521873197, "learning_rate": 2.932684509326845e-07, "loss": 0.6602, "step": 32443 }, { "epoch": 0.9472424162798166, "grad_norm": 0.6793461803785289, "learning_rate": 2.9310624493106247e-07, "loss": 0.5878, "step": 32444 }, { "epoch": 0.947271612507664, "grad_norm": 0.6799476621589228, "learning_rate": 2.929440389294404e-07, "loss": 0.5836, "step": 32445 }, { "epoch": 0.9473008087355114, "grad_norm": 0.7398264390586919, "learning_rate": 2.927818329278184e-07, "loss": 0.6596, "step": 32446 }, { "epoch": 0.9473300049633587, "grad_norm": 0.7131208118295755, "learning_rate": 2.926196269261963e-07, "loss": 0.6274, "step": 32447 }, { "epoch": 0.9473592011912061, "grad_norm": 0.7414158959761115, "learning_rate": 2.9245742092457423e-07, "loss": 0.6386, "step": 32448 }, { "epoch": 0.9473883974190535, "grad_norm": 0.7435034884353032, "learning_rate": 2.922952149229522e-07, "loss": 0.7158, "step": 32449 }, { "epoch": 0.9474175936469008, "grad_norm": 0.7192182340343272, "learning_rate": 2.921330089213301e-07, "loss": 0.5883, "step": 32450 }, { "epoch": 0.9474467898747482, "grad_norm": 0.7283364674476616, "learning_rate": 2.9197080291970804e-07, "loss": 0.6032, "step": 32451 }, { "epoch": 0.9474759861025955, "grad_norm": 0.8014972508119637, "learning_rate": 2.91808596918086e-07, "loss": 0.6672, "step": 32452 }, { "epoch": 0.9475051823304429, "grad_norm": 0.6560187315442466, "learning_rate": 2.9164639091646395e-07, "loss": 0.5887, "step": 32453 }, { "epoch": 0.9475343785582903, "grad_norm": 0.7200484663282293, "learning_rate": 2.914841849148419e-07, "loss": 0.5941, "step": 32454 }, { "epoch": 0.9475635747861376, "grad_norm": 0.7169259140147939, "learning_rate": 2.913219789132198e-07, "loss": 0.621, "step": 32455 }, { "epoch": 0.947592771013985, "grad_norm": 0.7254490874696117, "learning_rate": 2.9115977291159776e-07, "loss": 0.664, "step": 32456 }, { "epoch": 0.9476219672418323, "grad_norm": 0.7343955685783972, "learning_rate": 2.9099756690997566e-07, "loss": 0.6705, "step": 32457 }, { "epoch": 0.9476511634696797, "grad_norm": 0.706489255594329, "learning_rate": 2.908353609083536e-07, "loss": 0.5874, "step": 32458 }, { "epoch": 0.9476803596975271, "grad_norm": 0.706489467188055, "learning_rate": 2.9067315490673157e-07, "loss": 0.5984, "step": 32459 }, { "epoch": 0.9477095559253744, "grad_norm": 0.7305836205272688, "learning_rate": 2.905109489051095e-07, "loss": 0.6045, "step": 32460 }, { "epoch": 0.9477387521532218, "grad_norm": 0.7484308249126764, "learning_rate": 2.9034874290348747e-07, "loss": 0.6925, "step": 32461 }, { "epoch": 0.9477679483810691, "grad_norm": 0.7699692343380269, "learning_rate": 2.9018653690186543e-07, "loss": 0.6701, "step": 32462 }, { "epoch": 0.9477971446089165, "grad_norm": 0.6798140835777274, "learning_rate": 2.9002433090024333e-07, "loss": 0.5734, "step": 32463 }, { "epoch": 0.9478263408367639, "grad_norm": 0.7610198291223578, "learning_rate": 2.898621248986213e-07, "loss": 0.7102, "step": 32464 }, { "epoch": 0.9478555370646112, "grad_norm": 0.7234036196149044, "learning_rate": 2.896999188969992e-07, "loss": 0.6622, "step": 32465 }, { "epoch": 0.9478847332924586, "grad_norm": 0.750896889145269, "learning_rate": 2.8953771289537714e-07, "loss": 0.6727, "step": 32466 }, { "epoch": 0.947913929520306, "grad_norm": 0.7011088265134904, "learning_rate": 2.893755068937551e-07, "loss": 0.6065, "step": 32467 }, { "epoch": 0.9479431257481533, "grad_norm": 0.700930035209924, "learning_rate": 2.8921330089213304e-07, "loss": 0.5876, "step": 32468 }, { "epoch": 0.9479723219760007, "grad_norm": 0.721116447938526, "learning_rate": 2.89051094890511e-07, "loss": 0.6279, "step": 32469 }, { "epoch": 0.948001518203848, "grad_norm": 0.7392047999351471, "learning_rate": 2.888888888888889e-07, "loss": 0.6599, "step": 32470 }, { "epoch": 0.9480307144316954, "grad_norm": 0.7171858423302628, "learning_rate": 2.8872668288726685e-07, "loss": 0.6679, "step": 32471 }, { "epoch": 0.9480599106595428, "grad_norm": 0.6993644282141873, "learning_rate": 2.885644768856448e-07, "loss": 0.6008, "step": 32472 }, { "epoch": 0.9480891068873901, "grad_norm": 0.6799409138040963, "learning_rate": 2.8840227088402276e-07, "loss": 0.5826, "step": 32473 }, { "epoch": 0.9481183031152375, "grad_norm": 0.741032594849843, "learning_rate": 2.8824006488240066e-07, "loss": 0.6793, "step": 32474 }, { "epoch": 0.9481474993430848, "grad_norm": 0.7882535879494508, "learning_rate": 2.880778588807786e-07, "loss": 0.696, "step": 32475 }, { "epoch": 0.9481766955709322, "grad_norm": 0.6930403752224327, "learning_rate": 2.879156528791565e-07, "loss": 0.6119, "step": 32476 }, { "epoch": 0.9482058917987796, "grad_norm": 0.7509330846458985, "learning_rate": 2.8775344687753447e-07, "loss": 0.636, "step": 32477 }, { "epoch": 0.9482350880266269, "grad_norm": 0.6963867780137662, "learning_rate": 2.875912408759124e-07, "loss": 0.5766, "step": 32478 }, { "epoch": 0.9482642842544743, "grad_norm": 0.8078972682854358, "learning_rate": 2.874290348742904e-07, "loss": 0.6515, "step": 32479 }, { "epoch": 0.9482934804823216, "grad_norm": 0.7704582735032414, "learning_rate": 2.8726682887266833e-07, "loss": 0.7052, "step": 32480 }, { "epoch": 0.948322676710169, "grad_norm": 0.7039019957999412, "learning_rate": 2.871046228710463e-07, "loss": 0.6542, "step": 32481 }, { "epoch": 0.9483518729380164, "grad_norm": 0.7205327125831118, "learning_rate": 2.869424168694242e-07, "loss": 0.6606, "step": 32482 }, { "epoch": 0.9483810691658637, "grad_norm": 0.7508976123604231, "learning_rate": 2.8678021086780214e-07, "loss": 0.6442, "step": 32483 }, { "epoch": 0.9484102653937111, "grad_norm": 0.7604955999384914, "learning_rate": 2.8661800486618004e-07, "loss": 0.6622, "step": 32484 }, { "epoch": 0.9484394616215585, "grad_norm": 0.7810895787385979, "learning_rate": 2.86455798864558e-07, "loss": 0.7838, "step": 32485 }, { "epoch": 0.9484686578494058, "grad_norm": 0.6799123859991891, "learning_rate": 2.8629359286293595e-07, "loss": 0.5693, "step": 32486 }, { "epoch": 0.9484978540772532, "grad_norm": 0.7087367670633613, "learning_rate": 2.861313868613139e-07, "loss": 0.5915, "step": 32487 }, { "epoch": 0.9485270503051005, "grad_norm": 0.7600243480907347, "learning_rate": 2.8596918085969186e-07, "loss": 0.6711, "step": 32488 }, { "epoch": 0.9485562465329479, "grad_norm": 0.7360794039739145, "learning_rate": 2.8580697485806976e-07, "loss": 0.6855, "step": 32489 }, { "epoch": 0.9485854427607953, "grad_norm": 0.6974505308742157, "learning_rate": 2.856447688564477e-07, "loss": 0.6267, "step": 32490 }, { "epoch": 0.9486146389886426, "grad_norm": 0.7065923543699616, "learning_rate": 2.8548256285482567e-07, "loss": 0.6469, "step": 32491 }, { "epoch": 0.94864383521649, "grad_norm": 0.7684193495037109, "learning_rate": 2.8532035685320357e-07, "loss": 0.6895, "step": 32492 }, { "epoch": 0.9486730314443373, "grad_norm": 0.7115303140795641, "learning_rate": 2.851581508515815e-07, "loss": 0.5789, "step": 32493 }, { "epoch": 0.9487022276721847, "grad_norm": 0.6644893472947793, "learning_rate": 2.849959448499595e-07, "loss": 0.5471, "step": 32494 }, { "epoch": 0.9487314239000321, "grad_norm": 0.7553344236230697, "learning_rate": 2.8483373884833743e-07, "loss": 0.5424, "step": 32495 }, { "epoch": 0.9487606201278794, "grad_norm": 0.7837795333163128, "learning_rate": 2.8467153284671533e-07, "loss": 0.6525, "step": 32496 }, { "epoch": 0.9487898163557268, "grad_norm": 0.9033612084697549, "learning_rate": 2.845093268450933e-07, "loss": 0.6311, "step": 32497 }, { "epoch": 0.9488190125835742, "grad_norm": 0.7397133014707328, "learning_rate": 2.8434712084347124e-07, "loss": 0.7056, "step": 32498 }, { "epoch": 0.9488482088114215, "grad_norm": 0.718877792601949, "learning_rate": 2.841849148418492e-07, "loss": 0.6846, "step": 32499 }, { "epoch": 0.9488774050392689, "grad_norm": 0.6942894536745071, "learning_rate": 2.8402270884022714e-07, "loss": 0.6146, "step": 32500 }, { "epoch": 0.9489066012671162, "grad_norm": 0.7403425750580571, "learning_rate": 2.8386050283860505e-07, "loss": 0.6335, "step": 32501 }, { "epoch": 0.9489357974949636, "grad_norm": 0.7052606798496811, "learning_rate": 2.83698296836983e-07, "loss": 0.6465, "step": 32502 }, { "epoch": 0.9489649937228111, "grad_norm": 0.7643047053919154, "learning_rate": 2.835360908353609e-07, "loss": 0.6907, "step": 32503 }, { "epoch": 0.9489941899506584, "grad_norm": 0.6691855172554185, "learning_rate": 2.8337388483373885e-07, "loss": 0.573, "step": 32504 }, { "epoch": 0.9490233861785058, "grad_norm": 0.7691111473759719, "learning_rate": 2.832116788321168e-07, "loss": 0.6284, "step": 32505 }, { "epoch": 0.9490525824063532, "grad_norm": 0.7860557191116531, "learning_rate": 2.8304947283049476e-07, "loss": 0.6326, "step": 32506 }, { "epoch": 0.9490817786342005, "grad_norm": 0.7576680741037467, "learning_rate": 2.828872668288727e-07, "loss": 0.6777, "step": 32507 }, { "epoch": 0.9491109748620479, "grad_norm": 0.7228548587420282, "learning_rate": 2.8272506082725067e-07, "loss": 0.6627, "step": 32508 }, { "epoch": 0.9491401710898952, "grad_norm": 0.690534221434906, "learning_rate": 2.8256285482562857e-07, "loss": 0.5634, "step": 32509 }, { "epoch": 0.9491693673177426, "grad_norm": 0.6869335795375121, "learning_rate": 2.824006488240065e-07, "loss": 0.5742, "step": 32510 }, { "epoch": 0.94919856354559, "grad_norm": 0.629586894605021, "learning_rate": 2.822384428223844e-07, "loss": 0.5095, "step": 32511 }, { "epoch": 0.9492277597734373, "grad_norm": 0.7325482708576864, "learning_rate": 2.820762368207624e-07, "loss": 0.6464, "step": 32512 }, { "epoch": 0.9492569560012847, "grad_norm": 0.70444779416216, "learning_rate": 2.8191403081914033e-07, "loss": 0.6196, "step": 32513 }, { "epoch": 0.949286152229132, "grad_norm": 0.724718711518011, "learning_rate": 2.817518248175183e-07, "loss": 0.6667, "step": 32514 }, { "epoch": 0.9493153484569794, "grad_norm": 0.6654850315671169, "learning_rate": 2.8158961881589624e-07, "loss": 0.5235, "step": 32515 }, { "epoch": 0.9493445446848268, "grad_norm": 0.691046990904321, "learning_rate": 2.8142741281427414e-07, "loss": 0.573, "step": 32516 }, { "epoch": 0.9493737409126741, "grad_norm": 0.6879244309831141, "learning_rate": 2.812652068126521e-07, "loss": 0.6085, "step": 32517 }, { "epoch": 0.9494029371405215, "grad_norm": 0.7280580631099886, "learning_rate": 2.8110300081103005e-07, "loss": 0.6881, "step": 32518 }, { "epoch": 0.9494321333683688, "grad_norm": 0.843780553512674, "learning_rate": 2.8094079480940795e-07, "loss": 0.8062, "step": 32519 }, { "epoch": 0.9494613295962162, "grad_norm": 0.6649821477633285, "learning_rate": 2.807785888077859e-07, "loss": 0.5334, "step": 32520 }, { "epoch": 0.9494905258240636, "grad_norm": 0.7103318571653651, "learning_rate": 2.8061638280616386e-07, "loss": 0.6282, "step": 32521 }, { "epoch": 0.9495197220519109, "grad_norm": 0.8418022410320704, "learning_rate": 2.8045417680454176e-07, "loss": 0.7614, "step": 32522 }, { "epoch": 0.9495489182797583, "grad_norm": 0.7354309784553044, "learning_rate": 2.802919708029197e-07, "loss": 0.7359, "step": 32523 }, { "epoch": 0.9495781145076057, "grad_norm": 0.8241226034794756, "learning_rate": 2.8012976480129767e-07, "loss": 0.6147, "step": 32524 }, { "epoch": 0.949607310735453, "grad_norm": 0.7174590429828088, "learning_rate": 2.799675587996756e-07, "loss": 0.6705, "step": 32525 }, { "epoch": 0.9496365069633004, "grad_norm": 0.7071453151869325, "learning_rate": 2.798053527980536e-07, "loss": 0.6339, "step": 32526 }, { "epoch": 0.9496657031911477, "grad_norm": 0.7091615558543268, "learning_rate": 2.796431467964315e-07, "loss": 0.6342, "step": 32527 }, { "epoch": 0.9496948994189951, "grad_norm": 0.6903817460736019, "learning_rate": 2.7948094079480943e-07, "loss": 0.6068, "step": 32528 }, { "epoch": 0.9497240956468425, "grad_norm": 0.7359270318654185, "learning_rate": 2.793187347931874e-07, "loss": 0.6539, "step": 32529 }, { "epoch": 0.9497532918746898, "grad_norm": 0.7316655793589392, "learning_rate": 2.791565287915653e-07, "loss": 0.5991, "step": 32530 }, { "epoch": 0.9497824881025372, "grad_norm": 0.6836185238928341, "learning_rate": 2.7899432278994324e-07, "loss": 0.6048, "step": 32531 }, { "epoch": 0.9498116843303845, "grad_norm": 0.7181957773757622, "learning_rate": 2.788321167883212e-07, "loss": 0.6289, "step": 32532 }, { "epoch": 0.9498408805582319, "grad_norm": 0.727540684573033, "learning_rate": 2.7866991078669915e-07, "loss": 0.7046, "step": 32533 }, { "epoch": 0.9498700767860793, "grad_norm": 0.7688380078399761, "learning_rate": 2.785077047850771e-07, "loss": 0.749, "step": 32534 }, { "epoch": 0.9498992730139266, "grad_norm": 0.6765818414740484, "learning_rate": 2.78345498783455e-07, "loss": 0.5541, "step": 32535 }, { "epoch": 0.949928469241774, "grad_norm": 0.6700722169866036, "learning_rate": 2.7818329278183295e-07, "loss": 0.5497, "step": 32536 }, { "epoch": 0.9499576654696213, "grad_norm": 0.7004533427486783, "learning_rate": 2.780210867802109e-07, "loss": 0.6526, "step": 32537 }, { "epoch": 0.9499868616974687, "grad_norm": 0.7435903068265542, "learning_rate": 2.778588807785888e-07, "loss": 0.6756, "step": 32538 }, { "epoch": 0.9500160579253161, "grad_norm": 0.7278101967517092, "learning_rate": 2.7769667477696676e-07, "loss": 0.6734, "step": 32539 }, { "epoch": 0.9500452541531634, "grad_norm": 0.7129350396200358, "learning_rate": 2.775344687753447e-07, "loss": 0.6334, "step": 32540 }, { "epoch": 0.9500744503810108, "grad_norm": 0.7102241557763803, "learning_rate": 2.7737226277372267e-07, "loss": 0.6265, "step": 32541 }, { "epoch": 0.9501036466088582, "grad_norm": 0.7043214138106906, "learning_rate": 2.7721005677210057e-07, "loss": 0.5959, "step": 32542 }, { "epoch": 0.9501328428367055, "grad_norm": 0.7995263759148704, "learning_rate": 2.770478507704785e-07, "loss": 0.5624, "step": 32543 }, { "epoch": 0.9501620390645529, "grad_norm": 0.6936571034637555, "learning_rate": 2.768856447688565e-07, "loss": 0.6069, "step": 32544 }, { "epoch": 0.9501912352924002, "grad_norm": 0.7194895615950357, "learning_rate": 2.7672343876723443e-07, "loss": 0.5933, "step": 32545 }, { "epoch": 0.9502204315202476, "grad_norm": 0.8124462422648924, "learning_rate": 2.7656123276561233e-07, "loss": 0.6927, "step": 32546 }, { "epoch": 0.950249627748095, "grad_norm": 0.733710971205824, "learning_rate": 2.763990267639903e-07, "loss": 0.6648, "step": 32547 }, { "epoch": 0.9502788239759423, "grad_norm": 0.6851997955884904, "learning_rate": 2.762368207623682e-07, "loss": 0.5873, "step": 32548 }, { "epoch": 0.9503080202037897, "grad_norm": 0.7631628344108978, "learning_rate": 2.7607461476074614e-07, "loss": 0.6622, "step": 32549 }, { "epoch": 0.950337216431637, "grad_norm": 0.7387701051590045, "learning_rate": 2.759124087591241e-07, "loss": 0.6676, "step": 32550 }, { "epoch": 0.9503664126594844, "grad_norm": 0.7197703434085249, "learning_rate": 2.7575020275750205e-07, "loss": 0.6098, "step": 32551 }, { "epoch": 0.9503956088873318, "grad_norm": 0.7244744845484322, "learning_rate": 2.7558799675588e-07, "loss": 0.666, "step": 32552 }, { "epoch": 0.9504248051151791, "grad_norm": 0.7643940797381938, "learning_rate": 2.7542579075425796e-07, "loss": 0.6329, "step": 32553 }, { "epoch": 0.9504540013430265, "grad_norm": 0.7780704106601373, "learning_rate": 2.7526358475263586e-07, "loss": 0.7404, "step": 32554 }, { "epoch": 0.9504831975708739, "grad_norm": 0.7306884203274957, "learning_rate": 2.751013787510138e-07, "loss": 0.6665, "step": 32555 }, { "epoch": 0.9505123937987212, "grad_norm": 0.751515680669224, "learning_rate": 2.7493917274939177e-07, "loss": 0.6136, "step": 32556 }, { "epoch": 0.9505415900265686, "grad_norm": 0.6677427366934051, "learning_rate": 2.7477696674776967e-07, "loss": 0.5824, "step": 32557 }, { "epoch": 0.9505707862544159, "grad_norm": 0.726311655028058, "learning_rate": 2.746147607461476e-07, "loss": 0.6846, "step": 32558 }, { "epoch": 0.9505999824822633, "grad_norm": 0.6646892912978981, "learning_rate": 2.744525547445256e-07, "loss": 0.5674, "step": 32559 }, { "epoch": 0.9506291787101107, "grad_norm": 0.6854531573853743, "learning_rate": 2.7429034874290353e-07, "loss": 0.5846, "step": 32560 }, { "epoch": 0.950658374937958, "grad_norm": 0.7034687565127273, "learning_rate": 2.741281427412815e-07, "loss": 0.6213, "step": 32561 }, { "epoch": 0.9506875711658054, "grad_norm": 0.7210187272114231, "learning_rate": 2.739659367396594e-07, "loss": 0.6097, "step": 32562 }, { "epoch": 0.9507167673936527, "grad_norm": 0.7229585331576917, "learning_rate": 2.7380373073803734e-07, "loss": 0.6255, "step": 32563 }, { "epoch": 0.9507459636215001, "grad_norm": 0.7195935582610873, "learning_rate": 2.736415247364153e-07, "loss": 0.6433, "step": 32564 }, { "epoch": 0.9507751598493475, "grad_norm": 0.7214591086699979, "learning_rate": 2.734793187347932e-07, "loss": 0.6278, "step": 32565 }, { "epoch": 0.9508043560771948, "grad_norm": 0.776527424812942, "learning_rate": 2.7331711273317115e-07, "loss": 0.6394, "step": 32566 }, { "epoch": 0.9508335523050422, "grad_norm": 0.7738487427853894, "learning_rate": 2.731549067315491e-07, "loss": 0.7362, "step": 32567 }, { "epoch": 0.9508627485328895, "grad_norm": 0.7140941806702902, "learning_rate": 2.72992700729927e-07, "loss": 0.5584, "step": 32568 }, { "epoch": 0.9508919447607369, "grad_norm": 0.7087001993718068, "learning_rate": 2.7283049472830495e-07, "loss": 0.6581, "step": 32569 }, { "epoch": 0.9509211409885843, "grad_norm": 0.6955265246030153, "learning_rate": 2.726682887266829e-07, "loss": 0.6342, "step": 32570 }, { "epoch": 0.9509503372164316, "grad_norm": 0.7318488670525952, "learning_rate": 2.7250608272506086e-07, "loss": 0.6334, "step": 32571 }, { "epoch": 0.950979533444279, "grad_norm": 0.6978869633392463, "learning_rate": 2.723438767234388e-07, "loss": 0.5996, "step": 32572 }, { "epoch": 0.9510087296721264, "grad_norm": 0.7428196517902868, "learning_rate": 2.721816707218167e-07, "loss": 0.6683, "step": 32573 }, { "epoch": 0.9510379258999737, "grad_norm": 0.7095464412812842, "learning_rate": 2.7201946472019467e-07, "loss": 0.6248, "step": 32574 }, { "epoch": 0.9510671221278211, "grad_norm": 0.677578033742475, "learning_rate": 2.7185725871857257e-07, "loss": 0.6059, "step": 32575 }, { "epoch": 0.9510963183556684, "grad_norm": 0.79944586276635, "learning_rate": 2.716950527169505e-07, "loss": 0.6769, "step": 32576 }, { "epoch": 0.9511255145835158, "grad_norm": 0.700069716559279, "learning_rate": 2.715328467153285e-07, "loss": 0.5713, "step": 32577 }, { "epoch": 0.9511547108113632, "grad_norm": 0.742139541271523, "learning_rate": 2.7137064071370643e-07, "loss": 0.6726, "step": 32578 }, { "epoch": 0.9511839070392105, "grad_norm": 0.7123414304369633, "learning_rate": 2.712084347120844e-07, "loss": 0.6011, "step": 32579 }, { "epoch": 0.9512131032670579, "grad_norm": 0.7510332702543077, "learning_rate": 2.7104622871046234e-07, "loss": 0.5875, "step": 32580 }, { "epoch": 0.9512422994949052, "grad_norm": 0.6609864516980756, "learning_rate": 2.7088402270884024e-07, "loss": 0.5615, "step": 32581 }, { "epoch": 0.9512714957227526, "grad_norm": 0.7042953563847211, "learning_rate": 2.707218167072182e-07, "loss": 0.6365, "step": 32582 }, { "epoch": 0.9513006919506, "grad_norm": 0.7113949409379751, "learning_rate": 2.705596107055961e-07, "loss": 0.6349, "step": 32583 }, { "epoch": 0.9513298881784473, "grad_norm": 0.7306687748364055, "learning_rate": 2.7039740470397405e-07, "loss": 0.6457, "step": 32584 }, { "epoch": 0.9513590844062947, "grad_norm": 0.729523322772792, "learning_rate": 2.70235198702352e-07, "loss": 0.6491, "step": 32585 }, { "epoch": 0.951388280634142, "grad_norm": 0.7638092912163045, "learning_rate": 2.7007299270072996e-07, "loss": 0.6752, "step": 32586 }, { "epoch": 0.9514174768619894, "grad_norm": 0.7047537205050495, "learning_rate": 2.699107866991079e-07, "loss": 0.5644, "step": 32587 }, { "epoch": 0.9514466730898368, "grad_norm": 0.7160431806957867, "learning_rate": 2.697485806974858e-07, "loss": 0.638, "step": 32588 }, { "epoch": 0.9514758693176841, "grad_norm": 0.730287625882055, "learning_rate": 2.6958637469586377e-07, "loss": 0.6436, "step": 32589 }, { "epoch": 0.9515050655455315, "grad_norm": 0.7013275833908502, "learning_rate": 2.694241686942417e-07, "loss": 0.6046, "step": 32590 }, { "epoch": 0.9515342617733789, "grad_norm": 0.6850214038932078, "learning_rate": 2.692619626926197e-07, "loss": 0.5847, "step": 32591 }, { "epoch": 0.9515634580012262, "grad_norm": 0.7988707535376295, "learning_rate": 2.690997566909976e-07, "loss": 0.6669, "step": 32592 }, { "epoch": 0.9515926542290736, "grad_norm": 0.7342024555457803, "learning_rate": 2.6893755068937553e-07, "loss": 0.659, "step": 32593 }, { "epoch": 0.9516218504569209, "grad_norm": 0.641802836403749, "learning_rate": 2.6877534468775343e-07, "loss": 0.5303, "step": 32594 }, { "epoch": 0.9516510466847683, "grad_norm": 0.9131085742586406, "learning_rate": 2.686131386861314e-07, "loss": 0.758, "step": 32595 }, { "epoch": 0.9516802429126157, "grad_norm": 0.7279020031071293, "learning_rate": 2.6845093268450934e-07, "loss": 0.6829, "step": 32596 }, { "epoch": 0.951709439140463, "grad_norm": 0.6798669844698534, "learning_rate": 2.682887266828873e-07, "loss": 0.5534, "step": 32597 }, { "epoch": 0.9517386353683104, "grad_norm": 0.7068591207010767, "learning_rate": 2.6812652068126525e-07, "loss": 0.6417, "step": 32598 }, { "epoch": 0.9517678315961577, "grad_norm": 0.7106758114644697, "learning_rate": 2.679643146796432e-07, "loss": 0.6208, "step": 32599 }, { "epoch": 0.9517970278240051, "grad_norm": 0.7659669511929635, "learning_rate": 2.678021086780211e-07, "loss": 0.6553, "step": 32600 }, { "epoch": 0.9518262240518525, "grad_norm": 0.7845593172157266, "learning_rate": 2.6763990267639905e-07, "loss": 0.6296, "step": 32601 }, { "epoch": 0.9518554202796998, "grad_norm": 0.6954557973150517, "learning_rate": 2.6747769667477696e-07, "loss": 0.5699, "step": 32602 }, { "epoch": 0.9518846165075472, "grad_norm": 0.7843328629040286, "learning_rate": 2.673154906731549e-07, "loss": 0.7464, "step": 32603 }, { "epoch": 0.9519138127353945, "grad_norm": 0.753753944281747, "learning_rate": 2.6715328467153286e-07, "loss": 0.64, "step": 32604 }, { "epoch": 0.9519430089632419, "grad_norm": 0.6357713519332967, "learning_rate": 2.669910786699108e-07, "loss": 0.5277, "step": 32605 }, { "epoch": 0.9519722051910893, "grad_norm": 0.7680726536010715, "learning_rate": 2.6682887266828877e-07, "loss": 0.7434, "step": 32606 }, { "epoch": 0.9520014014189366, "grad_norm": 0.6942034242765949, "learning_rate": 2.666666666666667e-07, "loss": 0.5963, "step": 32607 }, { "epoch": 0.952030597646784, "grad_norm": 0.7132961554384172, "learning_rate": 2.665044606650446e-07, "loss": 0.5576, "step": 32608 }, { "epoch": 0.9520597938746314, "grad_norm": 0.7416688649424181, "learning_rate": 2.663422546634226e-07, "loss": 0.5972, "step": 32609 }, { "epoch": 0.9520889901024787, "grad_norm": 0.7551478353375352, "learning_rate": 2.661800486618005e-07, "loss": 0.6538, "step": 32610 }, { "epoch": 0.9521181863303261, "grad_norm": 0.7810713475233922, "learning_rate": 2.6601784266017843e-07, "loss": 0.7034, "step": 32611 }, { "epoch": 0.9521473825581734, "grad_norm": 0.7999402722613236, "learning_rate": 2.658556366585564e-07, "loss": 0.7162, "step": 32612 }, { "epoch": 0.9521765787860208, "grad_norm": 0.7719208108783779, "learning_rate": 2.6569343065693434e-07, "loss": 0.5913, "step": 32613 }, { "epoch": 0.9522057750138682, "grad_norm": 0.7713597044730649, "learning_rate": 2.6553122465531224e-07, "loss": 0.7458, "step": 32614 }, { "epoch": 0.9522349712417155, "grad_norm": 0.6262142727307733, "learning_rate": 2.653690186536902e-07, "loss": 0.5035, "step": 32615 }, { "epoch": 0.9522641674695629, "grad_norm": 0.7535673411690006, "learning_rate": 2.6520681265206815e-07, "loss": 0.678, "step": 32616 }, { "epoch": 0.9522933636974102, "grad_norm": 0.713838627454536, "learning_rate": 2.650446066504461e-07, "loss": 0.6356, "step": 32617 }, { "epoch": 0.9523225599252576, "grad_norm": 0.6839623656722826, "learning_rate": 2.6488240064882406e-07, "loss": 0.6396, "step": 32618 }, { "epoch": 0.952351756153105, "grad_norm": 0.712025317181936, "learning_rate": 2.6472019464720196e-07, "loss": 0.6433, "step": 32619 }, { "epoch": 0.9523809523809523, "grad_norm": 0.6754296149297958, "learning_rate": 2.645579886455799e-07, "loss": 0.593, "step": 32620 }, { "epoch": 0.9524101486087997, "grad_norm": 0.7035840644622754, "learning_rate": 2.643957826439578e-07, "loss": 0.6218, "step": 32621 }, { "epoch": 0.952439344836647, "grad_norm": 0.7772365067023779, "learning_rate": 2.6423357664233577e-07, "loss": 0.6992, "step": 32622 }, { "epoch": 0.9524685410644945, "grad_norm": 0.8041787044159531, "learning_rate": 2.640713706407137e-07, "loss": 0.7572, "step": 32623 }, { "epoch": 0.9524977372923419, "grad_norm": 0.6671486406490033, "learning_rate": 2.639091646390917e-07, "loss": 0.5515, "step": 32624 }, { "epoch": 0.9525269335201892, "grad_norm": 0.7430720583456923, "learning_rate": 2.6374695863746963e-07, "loss": 0.6291, "step": 32625 }, { "epoch": 0.9525561297480366, "grad_norm": 0.6887785741462492, "learning_rate": 2.635847526358476e-07, "loss": 0.5422, "step": 32626 }, { "epoch": 0.952585325975884, "grad_norm": 0.780489699710854, "learning_rate": 2.634225466342255e-07, "loss": 0.6575, "step": 32627 }, { "epoch": 0.9526145222037313, "grad_norm": 0.6826892878308432, "learning_rate": 2.6326034063260344e-07, "loss": 0.6089, "step": 32628 }, { "epoch": 0.9526437184315787, "grad_norm": 0.714551113250357, "learning_rate": 2.6309813463098134e-07, "loss": 0.6333, "step": 32629 }, { "epoch": 0.952672914659426, "grad_norm": 0.7313283366377277, "learning_rate": 2.629359286293593e-07, "loss": 0.6465, "step": 32630 }, { "epoch": 0.9527021108872734, "grad_norm": 0.7255807366398178, "learning_rate": 2.6277372262773725e-07, "loss": 0.6198, "step": 32631 }, { "epoch": 0.9527313071151208, "grad_norm": 0.7167632757764164, "learning_rate": 2.626115166261152e-07, "loss": 0.6502, "step": 32632 }, { "epoch": 0.9527605033429681, "grad_norm": 0.6897623095630335, "learning_rate": 2.6244931062449315e-07, "loss": 0.6334, "step": 32633 }, { "epoch": 0.9527896995708155, "grad_norm": 0.7601586504875865, "learning_rate": 2.6228710462287106e-07, "loss": 0.6902, "step": 32634 }, { "epoch": 0.9528188957986629, "grad_norm": 0.714275415837605, "learning_rate": 2.62124898621249e-07, "loss": 0.6444, "step": 32635 }, { "epoch": 0.9528480920265102, "grad_norm": 0.6865502189634562, "learning_rate": 2.6196269261962696e-07, "loss": 0.5567, "step": 32636 }, { "epoch": 0.9528772882543576, "grad_norm": 0.676286425236237, "learning_rate": 2.6180048661800486e-07, "loss": 0.5805, "step": 32637 }, { "epoch": 0.9529064844822049, "grad_norm": 0.6961094783881335, "learning_rate": 2.616382806163828e-07, "loss": 0.6105, "step": 32638 }, { "epoch": 0.9529356807100523, "grad_norm": 0.7537060881935566, "learning_rate": 2.6147607461476077e-07, "loss": 0.6888, "step": 32639 }, { "epoch": 0.9529648769378997, "grad_norm": 0.7290534789685841, "learning_rate": 2.6131386861313867e-07, "loss": 0.6754, "step": 32640 }, { "epoch": 0.952994073165747, "grad_norm": 0.7423769452225447, "learning_rate": 2.6115166261151663e-07, "loss": 0.7118, "step": 32641 }, { "epoch": 0.9530232693935944, "grad_norm": 0.7767837131676795, "learning_rate": 2.609894566098946e-07, "loss": 0.7283, "step": 32642 }, { "epoch": 0.9530524656214417, "grad_norm": 0.7333429193759048, "learning_rate": 2.6082725060827253e-07, "loss": 0.6724, "step": 32643 }, { "epoch": 0.9530816618492891, "grad_norm": 0.7062908327976472, "learning_rate": 2.606650446066505e-07, "loss": 0.5998, "step": 32644 }, { "epoch": 0.9531108580771365, "grad_norm": 0.735140021772414, "learning_rate": 2.6050283860502844e-07, "loss": 0.6686, "step": 32645 }, { "epoch": 0.9531400543049838, "grad_norm": 0.8328079109950183, "learning_rate": 2.6034063260340634e-07, "loss": 0.636, "step": 32646 }, { "epoch": 0.9531692505328312, "grad_norm": 0.6611566592074918, "learning_rate": 2.601784266017843e-07, "loss": 0.5254, "step": 32647 }, { "epoch": 0.9531984467606786, "grad_norm": 0.7428124613083885, "learning_rate": 2.600162206001622e-07, "loss": 0.6511, "step": 32648 }, { "epoch": 0.9532276429885259, "grad_norm": 0.7358685769424251, "learning_rate": 2.5985401459854015e-07, "loss": 0.6502, "step": 32649 }, { "epoch": 0.9532568392163733, "grad_norm": 0.7000017627781445, "learning_rate": 2.596918085969181e-07, "loss": 0.6196, "step": 32650 }, { "epoch": 0.9532860354442206, "grad_norm": 0.7096159051475316, "learning_rate": 2.5952960259529606e-07, "loss": 0.6335, "step": 32651 }, { "epoch": 0.953315231672068, "grad_norm": 0.7127261452537482, "learning_rate": 2.59367396593674e-07, "loss": 0.6472, "step": 32652 }, { "epoch": 0.9533444278999154, "grad_norm": 0.674858062305475, "learning_rate": 2.592051905920519e-07, "loss": 0.547, "step": 32653 }, { "epoch": 0.9533736241277627, "grad_norm": 0.6870386517035846, "learning_rate": 2.5904298459042987e-07, "loss": 0.5823, "step": 32654 }, { "epoch": 0.9534028203556101, "grad_norm": 0.7156794116743666, "learning_rate": 2.588807785888078e-07, "loss": 0.6605, "step": 32655 }, { "epoch": 0.9534320165834574, "grad_norm": 0.6962721781448453, "learning_rate": 2.587185725871857e-07, "loss": 0.6191, "step": 32656 }, { "epoch": 0.9534612128113048, "grad_norm": 0.6918603693749195, "learning_rate": 2.585563665855637e-07, "loss": 0.5898, "step": 32657 }, { "epoch": 0.9534904090391522, "grad_norm": 0.7576487865468181, "learning_rate": 2.5839416058394163e-07, "loss": 0.7006, "step": 32658 }, { "epoch": 0.9535196052669995, "grad_norm": 0.699128134680324, "learning_rate": 2.582319545823196e-07, "loss": 0.6385, "step": 32659 }, { "epoch": 0.9535488014948469, "grad_norm": 0.7627625309689866, "learning_rate": 2.580697485806975e-07, "loss": 0.7663, "step": 32660 }, { "epoch": 0.9535779977226942, "grad_norm": 0.7071483117899342, "learning_rate": 2.5790754257907544e-07, "loss": 0.6076, "step": 32661 }, { "epoch": 0.9536071939505416, "grad_norm": 0.6974503518186919, "learning_rate": 2.577453365774534e-07, "loss": 0.6094, "step": 32662 }, { "epoch": 0.953636390178389, "grad_norm": 0.6847566665085488, "learning_rate": 2.5758313057583135e-07, "loss": 0.579, "step": 32663 }, { "epoch": 0.9536655864062363, "grad_norm": 0.6826443265349812, "learning_rate": 2.5742092457420925e-07, "loss": 0.5993, "step": 32664 }, { "epoch": 0.9536947826340837, "grad_norm": 0.6820289064317779, "learning_rate": 2.572587185725872e-07, "loss": 0.5551, "step": 32665 }, { "epoch": 0.953723978861931, "grad_norm": 0.7213763155580236, "learning_rate": 2.5709651257096516e-07, "loss": 0.6483, "step": 32666 }, { "epoch": 0.9537531750897784, "grad_norm": 0.6740393625960549, "learning_rate": 2.5693430656934306e-07, "loss": 0.572, "step": 32667 }, { "epoch": 0.9537823713176258, "grad_norm": 0.8520575614969972, "learning_rate": 2.56772100567721e-07, "loss": 0.645, "step": 32668 }, { "epoch": 0.9538115675454731, "grad_norm": 0.7133965053692478, "learning_rate": 2.5660989456609896e-07, "loss": 0.5613, "step": 32669 }, { "epoch": 0.9538407637733205, "grad_norm": 0.7055974071304575, "learning_rate": 2.564476885644769e-07, "loss": 0.6277, "step": 32670 }, { "epoch": 0.9538699600011679, "grad_norm": 0.7522690654169767, "learning_rate": 2.5628548256285487e-07, "loss": 0.7057, "step": 32671 }, { "epoch": 0.9538991562290152, "grad_norm": 0.7773005427504305, "learning_rate": 2.561232765612328e-07, "loss": 0.6783, "step": 32672 }, { "epoch": 0.9539283524568626, "grad_norm": 0.7493073039876215, "learning_rate": 2.5596107055961073e-07, "loss": 0.6862, "step": 32673 }, { "epoch": 0.9539575486847099, "grad_norm": 0.7515749502909724, "learning_rate": 2.557988645579887e-07, "loss": 0.5884, "step": 32674 }, { "epoch": 0.9539867449125573, "grad_norm": 0.7378082976049329, "learning_rate": 2.556366585563666e-07, "loss": 0.6809, "step": 32675 }, { "epoch": 0.9540159411404047, "grad_norm": 0.74478452676427, "learning_rate": 2.5547445255474454e-07, "loss": 0.6789, "step": 32676 }, { "epoch": 0.954045137368252, "grad_norm": 0.7152981129092718, "learning_rate": 2.553122465531225e-07, "loss": 0.6032, "step": 32677 }, { "epoch": 0.9540743335960994, "grad_norm": 0.7352632342518616, "learning_rate": 2.5515004055150044e-07, "loss": 0.6917, "step": 32678 }, { "epoch": 0.9541035298239467, "grad_norm": 0.6884015464465316, "learning_rate": 2.549878345498784e-07, "loss": 0.5998, "step": 32679 }, { "epoch": 0.9541327260517941, "grad_norm": 0.7572761569760108, "learning_rate": 2.548256285482563e-07, "loss": 0.6923, "step": 32680 }, { "epoch": 0.9541619222796415, "grad_norm": 0.7275306998774378, "learning_rate": 2.5466342254663425e-07, "loss": 0.7069, "step": 32681 }, { "epoch": 0.9541911185074888, "grad_norm": 0.6797946810538044, "learning_rate": 2.545012165450122e-07, "loss": 0.5385, "step": 32682 }, { "epoch": 0.9542203147353362, "grad_norm": 0.7336739037165363, "learning_rate": 2.543390105433901e-07, "loss": 0.6635, "step": 32683 }, { "epoch": 0.9542495109631836, "grad_norm": 0.7321153335871154, "learning_rate": 2.5417680454176806e-07, "loss": 0.6559, "step": 32684 }, { "epoch": 0.9542787071910309, "grad_norm": 0.7137092284443541, "learning_rate": 2.54014598540146e-07, "loss": 0.6037, "step": 32685 }, { "epoch": 0.9543079034188783, "grad_norm": 0.8190541462383784, "learning_rate": 2.538523925385239e-07, "loss": 0.7107, "step": 32686 }, { "epoch": 0.9543370996467256, "grad_norm": 0.737688364412386, "learning_rate": 2.5369018653690187e-07, "loss": 0.6822, "step": 32687 }, { "epoch": 0.954366295874573, "grad_norm": 0.6806699132154727, "learning_rate": 2.535279805352798e-07, "loss": 0.564, "step": 32688 }, { "epoch": 0.9543954921024204, "grad_norm": 0.7282413441835032, "learning_rate": 2.533657745336578e-07, "loss": 0.6056, "step": 32689 }, { "epoch": 0.9544246883302677, "grad_norm": 0.640818626926589, "learning_rate": 2.5320356853203573e-07, "loss": 0.548, "step": 32690 }, { "epoch": 0.9544538845581151, "grad_norm": 0.75049077378884, "learning_rate": 2.5304136253041363e-07, "loss": 0.684, "step": 32691 }, { "epoch": 0.9544830807859624, "grad_norm": 0.708323506280619, "learning_rate": 2.528791565287916e-07, "loss": 0.5432, "step": 32692 }, { "epoch": 0.9545122770138098, "grad_norm": 0.6955018571457752, "learning_rate": 2.527169505271695e-07, "loss": 0.594, "step": 32693 }, { "epoch": 0.9545414732416572, "grad_norm": 0.6866984436767734, "learning_rate": 2.5255474452554744e-07, "loss": 0.6184, "step": 32694 }, { "epoch": 0.9545706694695045, "grad_norm": 0.7080751918791498, "learning_rate": 2.523925385239254e-07, "loss": 0.5891, "step": 32695 }, { "epoch": 0.9545998656973519, "grad_norm": 0.7124435121303211, "learning_rate": 2.5223033252230335e-07, "loss": 0.6074, "step": 32696 }, { "epoch": 0.9546290619251993, "grad_norm": 0.7426455295496914, "learning_rate": 2.520681265206813e-07, "loss": 0.6692, "step": 32697 }, { "epoch": 0.9546582581530466, "grad_norm": 0.6933221183987559, "learning_rate": 2.5190592051905926e-07, "loss": 0.6321, "step": 32698 }, { "epoch": 0.954687454380894, "grad_norm": 0.6702258780948339, "learning_rate": 2.5174371451743716e-07, "loss": 0.5539, "step": 32699 }, { "epoch": 0.9547166506087413, "grad_norm": 0.768560679811375, "learning_rate": 2.515815085158151e-07, "loss": 0.7536, "step": 32700 }, { "epoch": 0.9547458468365887, "grad_norm": 0.6581433329999951, "learning_rate": 2.5141930251419306e-07, "loss": 0.5406, "step": 32701 }, { "epoch": 0.9547750430644361, "grad_norm": 0.7414816837891378, "learning_rate": 2.5125709651257097e-07, "loss": 0.6902, "step": 32702 }, { "epoch": 0.9548042392922834, "grad_norm": 0.7358918302648941, "learning_rate": 2.510948905109489e-07, "loss": 0.6558, "step": 32703 }, { "epoch": 0.9548334355201308, "grad_norm": 0.7440133480140408, "learning_rate": 2.5093268450932687e-07, "loss": 0.6378, "step": 32704 }, { "epoch": 0.9548626317479781, "grad_norm": 0.7639816467667703, "learning_rate": 2.5077047850770483e-07, "loss": 0.6501, "step": 32705 }, { "epoch": 0.9548918279758255, "grad_norm": 0.7978375266504528, "learning_rate": 2.5060827250608273e-07, "loss": 0.7484, "step": 32706 }, { "epoch": 0.9549210242036729, "grad_norm": 0.7483391255074082, "learning_rate": 2.504460665044607e-07, "loss": 0.6398, "step": 32707 }, { "epoch": 0.9549502204315202, "grad_norm": 0.7041079217861362, "learning_rate": 2.5028386050283864e-07, "loss": 0.578, "step": 32708 }, { "epoch": 0.9549794166593676, "grad_norm": 0.6686505610290232, "learning_rate": 2.501216545012166e-07, "loss": 0.5366, "step": 32709 }, { "epoch": 0.955008612887215, "grad_norm": 0.714636816470048, "learning_rate": 2.499594484995945e-07, "loss": 0.6149, "step": 32710 }, { "epoch": 0.9550378091150623, "grad_norm": 0.6694063028913826, "learning_rate": 2.4979724249797244e-07, "loss": 0.5687, "step": 32711 }, { "epoch": 0.9550670053429097, "grad_norm": 0.6992610474739821, "learning_rate": 2.496350364963504e-07, "loss": 0.5734, "step": 32712 }, { "epoch": 0.955096201570757, "grad_norm": 0.6854353927867219, "learning_rate": 2.494728304947283e-07, "loss": 0.6158, "step": 32713 }, { "epoch": 0.9551253977986044, "grad_norm": 0.6860382215932966, "learning_rate": 2.4931062449310625e-07, "loss": 0.6215, "step": 32714 }, { "epoch": 0.9551545940264518, "grad_norm": 0.7653596796378307, "learning_rate": 2.491484184914842e-07, "loss": 0.7463, "step": 32715 }, { "epoch": 0.9551837902542991, "grad_norm": 0.7317011138106057, "learning_rate": 2.4898621248986216e-07, "loss": 0.6699, "step": 32716 }, { "epoch": 0.9552129864821465, "grad_norm": 0.9327226156836328, "learning_rate": 2.488240064882401e-07, "loss": 0.6593, "step": 32717 }, { "epoch": 0.9552421827099938, "grad_norm": 0.7266340747564842, "learning_rate": 2.48661800486618e-07, "loss": 0.6539, "step": 32718 }, { "epoch": 0.9552713789378412, "grad_norm": 0.7373099036696059, "learning_rate": 2.4849959448499597e-07, "loss": 0.6187, "step": 32719 }, { "epoch": 0.9553005751656886, "grad_norm": 0.7817978857694353, "learning_rate": 2.4833738848337387e-07, "loss": 0.6706, "step": 32720 }, { "epoch": 0.9553297713935359, "grad_norm": 0.7519584018666927, "learning_rate": 2.481751824817518e-07, "loss": 0.6732, "step": 32721 }, { "epoch": 0.9553589676213833, "grad_norm": 0.7457881500411935, "learning_rate": 2.480129764801298e-07, "loss": 0.6147, "step": 32722 }, { "epoch": 0.9553881638492306, "grad_norm": 0.7134087603364192, "learning_rate": 2.4785077047850773e-07, "loss": 0.6174, "step": 32723 }, { "epoch": 0.955417360077078, "grad_norm": 0.6970252512332844, "learning_rate": 2.476885644768857e-07, "loss": 0.6215, "step": 32724 }, { "epoch": 0.9554465563049254, "grad_norm": 0.7395048876308481, "learning_rate": 2.4752635847526364e-07, "loss": 0.583, "step": 32725 }, { "epoch": 0.9554757525327727, "grad_norm": 0.7380574957153422, "learning_rate": 2.4736415247364154e-07, "loss": 0.6156, "step": 32726 }, { "epoch": 0.9555049487606201, "grad_norm": 0.6949043699201387, "learning_rate": 2.472019464720195e-07, "loss": 0.5452, "step": 32727 }, { "epoch": 0.9555341449884674, "grad_norm": 0.7525618996793012, "learning_rate": 2.4703974047039745e-07, "loss": 0.7056, "step": 32728 }, { "epoch": 0.9555633412163148, "grad_norm": 0.7327937451989658, "learning_rate": 2.4687753446877535e-07, "loss": 0.5959, "step": 32729 }, { "epoch": 0.9555925374441622, "grad_norm": 0.7391903753277201, "learning_rate": 2.467153284671533e-07, "loss": 0.6676, "step": 32730 }, { "epoch": 0.9556217336720095, "grad_norm": 0.7299285243974327, "learning_rate": 2.4655312246553126e-07, "loss": 0.633, "step": 32731 }, { "epoch": 0.9556509298998569, "grad_norm": 0.7003199157663624, "learning_rate": 2.4639091646390916e-07, "loss": 0.5699, "step": 32732 }, { "epoch": 0.9556801261277043, "grad_norm": 0.8031610733065532, "learning_rate": 2.462287104622871e-07, "loss": 0.6558, "step": 32733 }, { "epoch": 0.9557093223555516, "grad_norm": 0.6683662531818981, "learning_rate": 2.4606650446066507e-07, "loss": 0.5752, "step": 32734 }, { "epoch": 0.955738518583399, "grad_norm": 0.6898771247076106, "learning_rate": 2.45904298459043e-07, "loss": 0.5676, "step": 32735 }, { "epoch": 0.9557677148112463, "grad_norm": 0.6867525564936381, "learning_rate": 2.4574209245742097e-07, "loss": 0.6118, "step": 32736 }, { "epoch": 0.9557969110390937, "grad_norm": 0.7555296418584602, "learning_rate": 2.455798864557989e-07, "loss": 0.6914, "step": 32737 }, { "epoch": 0.9558261072669411, "grad_norm": 0.7230287483304788, "learning_rate": 2.4541768045417683e-07, "loss": 0.5893, "step": 32738 }, { "epoch": 0.9558553034947884, "grad_norm": 0.7422000372048828, "learning_rate": 2.4525547445255473e-07, "loss": 0.6472, "step": 32739 }, { "epoch": 0.9558844997226358, "grad_norm": 0.7713807831114518, "learning_rate": 2.450932684509327e-07, "loss": 0.6317, "step": 32740 }, { "epoch": 0.9559136959504831, "grad_norm": 0.7372267253974782, "learning_rate": 2.4493106244931064e-07, "loss": 0.6546, "step": 32741 }, { "epoch": 0.9559428921783305, "grad_norm": 0.6851434086162681, "learning_rate": 2.447688564476886e-07, "loss": 0.587, "step": 32742 }, { "epoch": 0.9559720884061779, "grad_norm": 0.6973295135812244, "learning_rate": 2.4460665044606654e-07, "loss": 0.5812, "step": 32743 }, { "epoch": 0.9560012846340253, "grad_norm": 0.7406030609498783, "learning_rate": 2.444444444444445e-07, "loss": 0.6184, "step": 32744 }, { "epoch": 0.9560304808618727, "grad_norm": 0.7431370624158589, "learning_rate": 2.442822384428224e-07, "loss": 0.6642, "step": 32745 }, { "epoch": 0.9560596770897201, "grad_norm": 0.7700444698577034, "learning_rate": 2.4412003244120035e-07, "loss": 0.6927, "step": 32746 }, { "epoch": 0.9560888733175674, "grad_norm": 0.7834861677349095, "learning_rate": 2.4395782643957825e-07, "loss": 0.6496, "step": 32747 }, { "epoch": 0.9561180695454148, "grad_norm": 0.6841813306415583, "learning_rate": 2.437956204379562e-07, "loss": 0.6185, "step": 32748 }, { "epoch": 0.9561472657732621, "grad_norm": 0.7855833754131136, "learning_rate": 2.4363341443633416e-07, "loss": 0.7084, "step": 32749 }, { "epoch": 0.9561764620011095, "grad_norm": 0.7408004206332748, "learning_rate": 2.434712084347121e-07, "loss": 0.6352, "step": 32750 }, { "epoch": 0.9562056582289569, "grad_norm": 0.7056353839480698, "learning_rate": 2.4330900243309007e-07, "loss": 0.5424, "step": 32751 }, { "epoch": 0.9562348544568042, "grad_norm": 0.7216256138360216, "learning_rate": 2.4314679643146797e-07, "loss": 0.666, "step": 32752 }, { "epoch": 0.9562640506846516, "grad_norm": 0.7590288216840241, "learning_rate": 2.429845904298459e-07, "loss": 0.6912, "step": 32753 }, { "epoch": 0.956293246912499, "grad_norm": 0.7529769471632374, "learning_rate": 2.428223844282239e-07, "loss": 0.6803, "step": 32754 }, { "epoch": 0.9563224431403463, "grad_norm": 0.7239884941129381, "learning_rate": 2.4266017842660183e-07, "loss": 0.6744, "step": 32755 }, { "epoch": 0.9563516393681937, "grad_norm": 0.7693396682030683, "learning_rate": 2.4249797242497973e-07, "loss": 0.731, "step": 32756 }, { "epoch": 0.956380835596041, "grad_norm": 0.7407523554907093, "learning_rate": 2.423357664233577e-07, "loss": 0.6537, "step": 32757 }, { "epoch": 0.9564100318238884, "grad_norm": 0.6954584350156995, "learning_rate": 2.421735604217356e-07, "loss": 0.642, "step": 32758 }, { "epoch": 0.9564392280517358, "grad_norm": 0.8038792710350631, "learning_rate": 2.4201135442011354e-07, "loss": 0.7929, "step": 32759 }, { "epoch": 0.9564684242795831, "grad_norm": 0.6850588196510922, "learning_rate": 2.418491484184915e-07, "loss": 0.5424, "step": 32760 }, { "epoch": 0.9564976205074305, "grad_norm": 0.7968418522569661, "learning_rate": 2.4168694241686945e-07, "loss": 0.798, "step": 32761 }, { "epoch": 0.9565268167352778, "grad_norm": 0.6403155565436468, "learning_rate": 2.415247364152474e-07, "loss": 0.5165, "step": 32762 }, { "epoch": 0.9565560129631252, "grad_norm": 0.7398614902156303, "learning_rate": 2.4136253041362536e-07, "loss": 0.6409, "step": 32763 }, { "epoch": 0.9565852091909726, "grad_norm": 0.742294311546087, "learning_rate": 2.4120032441200326e-07, "loss": 0.6766, "step": 32764 }, { "epoch": 0.9566144054188199, "grad_norm": 0.7797260818470494, "learning_rate": 2.410381184103812e-07, "loss": 0.6558, "step": 32765 }, { "epoch": 0.9566436016466673, "grad_norm": 0.7254498847953105, "learning_rate": 2.408759124087591e-07, "loss": 0.6244, "step": 32766 }, { "epoch": 0.9566727978745146, "grad_norm": 0.6914228724322181, "learning_rate": 2.4071370640713707e-07, "loss": 0.6306, "step": 32767 }, { "epoch": 0.956701994102362, "grad_norm": 0.6903573152303459, "learning_rate": 2.40551500405515e-07, "loss": 0.599, "step": 32768 }, { "epoch": 0.9567311903302094, "grad_norm": 0.7447343287913332, "learning_rate": 2.40389294403893e-07, "loss": 0.7104, "step": 32769 }, { "epoch": 0.9567603865580567, "grad_norm": 0.7386891859976138, "learning_rate": 2.4022708840227093e-07, "loss": 0.6723, "step": 32770 }, { "epoch": 0.9567895827859041, "grad_norm": 0.7209195014163501, "learning_rate": 2.400648824006489e-07, "loss": 0.6474, "step": 32771 }, { "epoch": 0.9568187790137515, "grad_norm": 0.6958208616581932, "learning_rate": 2.399026763990268e-07, "loss": 0.5962, "step": 32772 }, { "epoch": 0.9568479752415988, "grad_norm": 0.6828091902217313, "learning_rate": 2.3974047039740474e-07, "loss": 0.5571, "step": 32773 }, { "epoch": 0.9568771714694462, "grad_norm": 0.6933413728613417, "learning_rate": 2.3957826439578264e-07, "loss": 0.6107, "step": 32774 }, { "epoch": 0.9569063676972935, "grad_norm": 0.8146392298001964, "learning_rate": 2.394160583941606e-07, "loss": 0.7186, "step": 32775 }, { "epoch": 0.9569355639251409, "grad_norm": 0.7445470960951937, "learning_rate": 2.3925385239253855e-07, "loss": 0.691, "step": 32776 }, { "epoch": 0.9569647601529883, "grad_norm": 0.6761240100507333, "learning_rate": 2.390916463909165e-07, "loss": 0.5598, "step": 32777 }, { "epoch": 0.9569939563808356, "grad_norm": 0.7173019365530362, "learning_rate": 2.389294403892944e-07, "loss": 0.6497, "step": 32778 }, { "epoch": 0.957023152608683, "grad_norm": 0.691037997677188, "learning_rate": 2.3876723438767235e-07, "loss": 0.5915, "step": 32779 }, { "epoch": 0.9570523488365303, "grad_norm": 0.739031603495318, "learning_rate": 2.386050283860503e-07, "loss": 0.6368, "step": 32780 }, { "epoch": 0.9570815450643777, "grad_norm": 0.7459343694758328, "learning_rate": 2.3844282238442826e-07, "loss": 0.6827, "step": 32781 }, { "epoch": 0.9571107412922251, "grad_norm": 0.7058515702725238, "learning_rate": 2.382806163828062e-07, "loss": 0.5971, "step": 32782 }, { "epoch": 0.9571399375200724, "grad_norm": 0.7592599764273532, "learning_rate": 2.3811841038118414e-07, "loss": 0.6526, "step": 32783 }, { "epoch": 0.9571691337479198, "grad_norm": 0.7191421588767447, "learning_rate": 2.3795620437956207e-07, "loss": 0.6117, "step": 32784 }, { "epoch": 0.9571983299757671, "grad_norm": 0.7008348461931035, "learning_rate": 2.3779399837794e-07, "loss": 0.6184, "step": 32785 }, { "epoch": 0.9572275262036145, "grad_norm": 0.6953862061514886, "learning_rate": 2.3763179237631792e-07, "loss": 0.599, "step": 32786 }, { "epoch": 0.9572567224314619, "grad_norm": 0.6998125958718964, "learning_rate": 2.3746958637469588e-07, "loss": 0.6553, "step": 32787 }, { "epoch": 0.9572859186593092, "grad_norm": 0.7735234595347662, "learning_rate": 2.3730738037307383e-07, "loss": 0.6039, "step": 32788 }, { "epoch": 0.9573151148871566, "grad_norm": 0.7186431211474241, "learning_rate": 2.3714517437145176e-07, "loss": 0.58, "step": 32789 }, { "epoch": 0.957344311115004, "grad_norm": 0.7356190929592263, "learning_rate": 2.3698296836982971e-07, "loss": 0.6758, "step": 32790 }, { "epoch": 0.9573735073428513, "grad_norm": 0.7384495307824018, "learning_rate": 2.3682076236820761e-07, "loss": 0.6609, "step": 32791 }, { "epoch": 0.9574027035706987, "grad_norm": 0.7639211525658287, "learning_rate": 2.3665855636658557e-07, "loss": 0.711, "step": 32792 }, { "epoch": 0.957431899798546, "grad_norm": 0.7048158733045728, "learning_rate": 2.3649635036496352e-07, "loss": 0.5429, "step": 32793 }, { "epoch": 0.9574610960263934, "grad_norm": 0.7671499937749544, "learning_rate": 2.3633414436334145e-07, "loss": 0.7337, "step": 32794 }, { "epoch": 0.9574902922542408, "grad_norm": 0.7392049226144941, "learning_rate": 2.361719383617194e-07, "loss": 0.6289, "step": 32795 }, { "epoch": 0.9575194884820881, "grad_norm": 0.6832072896205812, "learning_rate": 2.3600973236009736e-07, "loss": 0.5851, "step": 32796 }, { "epoch": 0.9575486847099355, "grad_norm": 0.7765836533933281, "learning_rate": 2.358475263584753e-07, "loss": 0.6592, "step": 32797 }, { "epoch": 0.9575778809377828, "grad_norm": 0.6784219764446948, "learning_rate": 2.356853203568532e-07, "loss": 0.5782, "step": 32798 }, { "epoch": 0.9576070771656302, "grad_norm": 0.7642458107138975, "learning_rate": 2.3552311435523117e-07, "loss": 0.6995, "step": 32799 }, { "epoch": 0.9576362733934776, "grad_norm": 0.7457372943704915, "learning_rate": 2.353609083536091e-07, "loss": 0.6133, "step": 32800 }, { "epoch": 0.9576654696213249, "grad_norm": 0.6804139175489595, "learning_rate": 2.3519870235198705e-07, "loss": 0.5623, "step": 32801 }, { "epoch": 0.9576946658491723, "grad_norm": 0.7286853977651168, "learning_rate": 2.35036496350365e-07, "loss": 0.6468, "step": 32802 }, { "epoch": 0.9577238620770196, "grad_norm": 0.7246914885387633, "learning_rate": 2.3487429034874293e-07, "loss": 0.6287, "step": 32803 }, { "epoch": 0.957753058304867, "grad_norm": 0.7217184855852123, "learning_rate": 2.3471208434712086e-07, "loss": 0.6552, "step": 32804 }, { "epoch": 0.9577822545327144, "grad_norm": 0.763813777429898, "learning_rate": 2.3454987834549878e-07, "loss": 0.6913, "step": 32805 }, { "epoch": 0.9578114507605617, "grad_norm": 0.7818678537384822, "learning_rate": 2.3438767234387674e-07, "loss": 0.6554, "step": 32806 }, { "epoch": 0.9578406469884091, "grad_norm": 0.7004934414174019, "learning_rate": 2.342254663422547e-07, "loss": 0.6452, "step": 32807 }, { "epoch": 0.9578698432162565, "grad_norm": 0.7187247343037757, "learning_rate": 2.3406326034063262e-07, "loss": 0.6297, "step": 32808 }, { "epoch": 0.9578990394441038, "grad_norm": 0.7054092212029096, "learning_rate": 2.3390105433901057e-07, "loss": 0.5847, "step": 32809 }, { "epoch": 0.9579282356719512, "grad_norm": 0.6821757763400806, "learning_rate": 2.3373884833738853e-07, "loss": 0.6004, "step": 32810 }, { "epoch": 0.9579574318997985, "grad_norm": 0.7463531587069493, "learning_rate": 2.3357664233576643e-07, "loss": 0.6282, "step": 32811 }, { "epoch": 0.9579866281276459, "grad_norm": 0.7380046177552951, "learning_rate": 2.3341443633414438e-07, "loss": 0.6592, "step": 32812 }, { "epoch": 0.9580158243554933, "grad_norm": 0.884452281596537, "learning_rate": 2.332522303325223e-07, "loss": 0.7288, "step": 32813 }, { "epoch": 0.9580450205833406, "grad_norm": 0.6789308170524883, "learning_rate": 2.3309002433090026e-07, "loss": 0.5782, "step": 32814 }, { "epoch": 0.958074216811188, "grad_norm": 0.7040780729512179, "learning_rate": 2.3292781832927822e-07, "loss": 0.6107, "step": 32815 }, { "epoch": 0.9581034130390353, "grad_norm": 0.8766462021130331, "learning_rate": 2.3276561232765614e-07, "loss": 0.698, "step": 32816 }, { "epoch": 0.9581326092668827, "grad_norm": 0.7622652685954889, "learning_rate": 2.326034063260341e-07, "loss": 0.6278, "step": 32817 }, { "epoch": 0.9581618054947301, "grad_norm": 0.7189147602373411, "learning_rate": 2.32441200324412e-07, "loss": 0.5966, "step": 32818 }, { "epoch": 0.9581910017225774, "grad_norm": 0.6869013226345715, "learning_rate": 2.3227899432278995e-07, "loss": 0.6304, "step": 32819 }, { "epoch": 0.9582201979504248, "grad_norm": 0.6702578358669068, "learning_rate": 2.321167883211679e-07, "loss": 0.5816, "step": 32820 }, { "epoch": 0.9582493941782722, "grad_norm": 0.7284891696533778, "learning_rate": 2.3195458231954583e-07, "loss": 0.6414, "step": 32821 }, { "epoch": 0.9582785904061195, "grad_norm": 0.7982095227557087, "learning_rate": 2.317923763179238e-07, "loss": 0.7005, "step": 32822 }, { "epoch": 0.9583077866339669, "grad_norm": 0.7405900983379875, "learning_rate": 2.3163017031630174e-07, "loss": 0.6555, "step": 32823 }, { "epoch": 0.9583369828618142, "grad_norm": 0.7619872891367874, "learning_rate": 2.3146796431467964e-07, "loss": 0.5991, "step": 32824 }, { "epoch": 0.9583661790896616, "grad_norm": 0.7126747359579826, "learning_rate": 2.313057583130576e-07, "loss": 0.6522, "step": 32825 }, { "epoch": 0.958395375317509, "grad_norm": 0.6441134627312688, "learning_rate": 2.3114355231143555e-07, "loss": 0.5448, "step": 32826 }, { "epoch": 0.9584245715453563, "grad_norm": 0.6772684083544096, "learning_rate": 2.3098134630981348e-07, "loss": 0.5625, "step": 32827 }, { "epoch": 0.9584537677732037, "grad_norm": 0.679566723817082, "learning_rate": 2.3081914030819143e-07, "loss": 0.5153, "step": 32828 }, { "epoch": 0.958482964001051, "grad_norm": 0.7122019071832276, "learning_rate": 2.3065693430656938e-07, "loss": 0.564, "step": 32829 }, { "epoch": 0.9585121602288984, "grad_norm": 0.7763330344878266, "learning_rate": 2.304947283049473e-07, "loss": 0.5945, "step": 32830 }, { "epoch": 0.9585413564567458, "grad_norm": 1.1334881370118073, "learning_rate": 2.3033252230332524e-07, "loss": 0.6385, "step": 32831 }, { "epoch": 0.9585705526845931, "grad_norm": 0.7531362323798209, "learning_rate": 2.3017031630170317e-07, "loss": 0.7356, "step": 32832 }, { "epoch": 0.9585997489124405, "grad_norm": 0.7149839918017082, "learning_rate": 2.3000811030008112e-07, "loss": 0.5966, "step": 32833 }, { "epoch": 0.9586289451402878, "grad_norm": 0.6613124692618921, "learning_rate": 2.2984590429845907e-07, "loss": 0.5616, "step": 32834 }, { "epoch": 0.9586581413681352, "grad_norm": 0.815792532793975, "learning_rate": 2.29683698296837e-07, "loss": 0.7276, "step": 32835 }, { "epoch": 0.9586873375959826, "grad_norm": 0.7058679582206129, "learning_rate": 2.2952149229521496e-07, "loss": 0.6244, "step": 32836 }, { "epoch": 0.9587165338238299, "grad_norm": 0.7335194407574044, "learning_rate": 2.2935928629359286e-07, "loss": 0.6084, "step": 32837 }, { "epoch": 0.9587457300516773, "grad_norm": 0.6896063165536351, "learning_rate": 2.291970802919708e-07, "loss": 0.576, "step": 32838 }, { "epoch": 0.9587749262795247, "grad_norm": 0.6953554617651821, "learning_rate": 2.2903487429034876e-07, "loss": 0.5797, "step": 32839 }, { "epoch": 0.958804122507372, "grad_norm": 0.7105173073959066, "learning_rate": 2.288726682887267e-07, "loss": 0.6166, "step": 32840 }, { "epoch": 0.9588333187352194, "grad_norm": 0.7163142525426794, "learning_rate": 2.2871046228710465e-07, "loss": 0.6472, "step": 32841 }, { "epoch": 0.9588625149630667, "grad_norm": 0.7650038852414858, "learning_rate": 2.285482562854826e-07, "loss": 0.6412, "step": 32842 }, { "epoch": 0.9588917111909141, "grad_norm": 0.7151776839048573, "learning_rate": 2.2838605028386053e-07, "loss": 0.5934, "step": 32843 }, { "epoch": 0.9589209074187615, "grad_norm": 0.7166925482170716, "learning_rate": 2.2822384428223845e-07, "loss": 0.6851, "step": 32844 }, { "epoch": 0.9589501036466088, "grad_norm": 0.7096482725963775, "learning_rate": 2.2806163828061638e-07, "loss": 0.5861, "step": 32845 }, { "epoch": 0.9589792998744562, "grad_norm": 0.7144074430165054, "learning_rate": 2.2789943227899434e-07, "loss": 0.6556, "step": 32846 }, { "epoch": 0.9590084961023035, "grad_norm": 0.696964179238745, "learning_rate": 2.277372262773723e-07, "loss": 0.6013, "step": 32847 }, { "epoch": 0.9590376923301509, "grad_norm": 0.7212268783704303, "learning_rate": 2.2757502027575022e-07, "loss": 0.6653, "step": 32848 }, { "epoch": 0.9590668885579983, "grad_norm": 0.776414479678267, "learning_rate": 2.2741281427412817e-07, "loss": 0.6852, "step": 32849 }, { "epoch": 0.9590960847858456, "grad_norm": 0.6760416862973293, "learning_rate": 2.2725060827250607e-07, "loss": 0.5956, "step": 32850 }, { "epoch": 0.959125281013693, "grad_norm": 0.7457629195027615, "learning_rate": 2.2708840227088403e-07, "loss": 0.7513, "step": 32851 }, { "epoch": 0.9591544772415403, "grad_norm": 0.7356338477321471, "learning_rate": 2.2692619626926198e-07, "loss": 0.6753, "step": 32852 }, { "epoch": 0.9591836734693877, "grad_norm": 0.7880688899192769, "learning_rate": 2.2676399026763993e-07, "loss": 0.6735, "step": 32853 }, { "epoch": 0.9592128696972351, "grad_norm": 0.7320664766813635, "learning_rate": 2.2660178426601786e-07, "loss": 0.6189, "step": 32854 }, { "epoch": 0.9592420659250824, "grad_norm": 0.6831703405818292, "learning_rate": 2.2643957826439581e-07, "loss": 0.5745, "step": 32855 }, { "epoch": 0.9592712621529298, "grad_norm": 0.723901718988231, "learning_rate": 2.2627737226277377e-07, "loss": 0.6416, "step": 32856 }, { "epoch": 0.9593004583807772, "grad_norm": 0.7730926934386954, "learning_rate": 2.2611516626115167e-07, "loss": 0.695, "step": 32857 }, { "epoch": 0.9593296546086245, "grad_norm": 0.7282559088826726, "learning_rate": 2.2595296025952962e-07, "loss": 0.712, "step": 32858 }, { "epoch": 0.9593588508364719, "grad_norm": 0.7141813379839053, "learning_rate": 2.2579075425790755e-07, "loss": 0.6297, "step": 32859 }, { "epoch": 0.9593880470643192, "grad_norm": 0.7439367795724056, "learning_rate": 2.256285482562855e-07, "loss": 0.7033, "step": 32860 }, { "epoch": 0.9594172432921666, "grad_norm": 0.7247500641134647, "learning_rate": 2.2546634225466346e-07, "loss": 0.666, "step": 32861 }, { "epoch": 0.959446439520014, "grad_norm": 0.7300013840701733, "learning_rate": 2.2530413625304139e-07, "loss": 0.6651, "step": 32862 }, { "epoch": 0.9594756357478613, "grad_norm": 0.7077403489404623, "learning_rate": 2.2514193025141931e-07, "loss": 0.6173, "step": 32863 }, { "epoch": 0.9595048319757087, "grad_norm": 0.7087084681232538, "learning_rate": 2.2497972424979724e-07, "loss": 0.6088, "step": 32864 }, { "epoch": 0.9595340282035562, "grad_norm": 0.7695289274467737, "learning_rate": 2.248175182481752e-07, "loss": 0.6745, "step": 32865 }, { "epoch": 0.9595632244314035, "grad_norm": 0.7867674592744989, "learning_rate": 2.2465531224655315e-07, "loss": 0.6722, "step": 32866 }, { "epoch": 0.9595924206592509, "grad_norm": 0.7728994384266393, "learning_rate": 2.2449310624493108e-07, "loss": 0.6911, "step": 32867 }, { "epoch": 0.9596216168870982, "grad_norm": 0.749021647279549, "learning_rate": 2.2433090024330903e-07, "loss": 0.6595, "step": 32868 }, { "epoch": 0.9596508131149456, "grad_norm": 0.7312021673612872, "learning_rate": 2.2416869424168698e-07, "loss": 0.7004, "step": 32869 }, { "epoch": 0.959680009342793, "grad_norm": 0.701006933352744, "learning_rate": 2.2400648824006488e-07, "loss": 0.5785, "step": 32870 }, { "epoch": 0.9597092055706403, "grad_norm": 0.7036448644023003, "learning_rate": 2.2384428223844284e-07, "loss": 0.5942, "step": 32871 }, { "epoch": 0.9597384017984877, "grad_norm": 0.7661072382562424, "learning_rate": 2.2368207623682077e-07, "loss": 0.6801, "step": 32872 }, { "epoch": 0.959767598026335, "grad_norm": 0.7237240711328705, "learning_rate": 2.2351987023519872e-07, "loss": 0.6305, "step": 32873 }, { "epoch": 0.9597967942541824, "grad_norm": 0.7541126144134973, "learning_rate": 2.2335766423357667e-07, "loss": 0.6432, "step": 32874 }, { "epoch": 0.9598259904820298, "grad_norm": 0.7254406042356956, "learning_rate": 2.231954582319546e-07, "loss": 0.6453, "step": 32875 }, { "epoch": 0.9598551867098771, "grad_norm": 0.6823789179234917, "learning_rate": 2.2303325223033255e-07, "loss": 0.5359, "step": 32876 }, { "epoch": 0.9598843829377245, "grad_norm": 0.7464939481906282, "learning_rate": 2.2287104622871046e-07, "loss": 0.6821, "step": 32877 }, { "epoch": 0.9599135791655719, "grad_norm": 0.7358624982884361, "learning_rate": 2.227088402270884e-07, "loss": 0.6406, "step": 32878 }, { "epoch": 0.9599427753934192, "grad_norm": 0.6803166232358766, "learning_rate": 2.2254663422546636e-07, "loss": 0.5897, "step": 32879 }, { "epoch": 0.9599719716212666, "grad_norm": 0.7634629773047558, "learning_rate": 2.223844282238443e-07, "loss": 0.6802, "step": 32880 }, { "epoch": 0.9600011678491139, "grad_norm": 0.733519059609725, "learning_rate": 2.2222222222222224e-07, "loss": 0.6384, "step": 32881 }, { "epoch": 0.9600303640769613, "grad_norm": 0.7346602741759829, "learning_rate": 2.220600162206002e-07, "loss": 0.6768, "step": 32882 }, { "epoch": 0.9600595603048087, "grad_norm": 0.759592009992305, "learning_rate": 2.218978102189781e-07, "loss": 0.5492, "step": 32883 }, { "epoch": 0.960088756532656, "grad_norm": 0.7324949569591729, "learning_rate": 2.2173560421735605e-07, "loss": 0.6175, "step": 32884 }, { "epoch": 0.9601179527605034, "grad_norm": 0.6702031604575978, "learning_rate": 2.21573398215734e-07, "loss": 0.5714, "step": 32885 }, { "epoch": 0.9601471489883507, "grad_norm": 0.6626896793673638, "learning_rate": 2.2141119221411193e-07, "loss": 0.5548, "step": 32886 }, { "epoch": 0.9601763452161981, "grad_norm": 0.752144262159765, "learning_rate": 2.212489862124899e-07, "loss": 0.624, "step": 32887 }, { "epoch": 0.9602055414440455, "grad_norm": 0.7029045883331162, "learning_rate": 2.2108678021086784e-07, "loss": 0.6629, "step": 32888 }, { "epoch": 0.9602347376718928, "grad_norm": 0.718887092755059, "learning_rate": 2.2092457420924577e-07, "loss": 0.6473, "step": 32889 }, { "epoch": 0.9602639338997402, "grad_norm": 0.7756937559642739, "learning_rate": 2.207623682076237e-07, "loss": 0.6527, "step": 32890 }, { "epoch": 0.9602931301275875, "grad_norm": 0.7068228394866936, "learning_rate": 2.2060016220600162e-07, "loss": 0.6112, "step": 32891 }, { "epoch": 0.9603223263554349, "grad_norm": 0.7422038710307258, "learning_rate": 2.2043795620437958e-07, "loss": 0.6846, "step": 32892 }, { "epoch": 0.9603515225832823, "grad_norm": 0.6672664949736924, "learning_rate": 2.2027575020275753e-07, "loss": 0.5539, "step": 32893 }, { "epoch": 0.9603807188111296, "grad_norm": 0.7193850949039164, "learning_rate": 2.2011354420113546e-07, "loss": 0.64, "step": 32894 }, { "epoch": 0.960409915038977, "grad_norm": 0.73294256780637, "learning_rate": 2.1995133819951341e-07, "loss": 0.5987, "step": 32895 }, { "epoch": 0.9604391112668244, "grad_norm": 0.7244199510754807, "learning_rate": 2.1978913219789131e-07, "loss": 0.6289, "step": 32896 }, { "epoch": 0.9604683074946717, "grad_norm": 0.7107604635911123, "learning_rate": 2.1962692619626927e-07, "loss": 0.6381, "step": 32897 }, { "epoch": 0.9604975037225191, "grad_norm": 0.7378617771556201, "learning_rate": 2.1946472019464722e-07, "loss": 0.6734, "step": 32898 }, { "epoch": 0.9605266999503664, "grad_norm": 0.7254249377912353, "learning_rate": 2.1930251419302515e-07, "loss": 0.6244, "step": 32899 }, { "epoch": 0.9605558961782138, "grad_norm": 0.7553448803126432, "learning_rate": 2.191403081914031e-07, "loss": 0.6582, "step": 32900 }, { "epoch": 0.9605850924060612, "grad_norm": 0.7512885889511963, "learning_rate": 2.1897810218978106e-07, "loss": 0.6606, "step": 32901 }, { "epoch": 0.9606142886339085, "grad_norm": 0.6994289706312954, "learning_rate": 2.1881589618815898e-07, "loss": 0.5772, "step": 32902 }, { "epoch": 0.9606434848617559, "grad_norm": 0.7152228431279208, "learning_rate": 2.186536901865369e-07, "loss": 0.6145, "step": 32903 }, { "epoch": 0.9606726810896032, "grad_norm": 0.6799421778995711, "learning_rate": 2.1849148418491484e-07, "loss": 0.5124, "step": 32904 }, { "epoch": 0.9607018773174506, "grad_norm": 0.6476230098947696, "learning_rate": 2.183292781832928e-07, "loss": 0.5537, "step": 32905 }, { "epoch": 0.960731073545298, "grad_norm": 0.7359465017223993, "learning_rate": 2.1816707218167075e-07, "loss": 0.615, "step": 32906 }, { "epoch": 0.9607602697731453, "grad_norm": 0.6533757316397715, "learning_rate": 2.1800486618004867e-07, "loss": 0.5491, "step": 32907 }, { "epoch": 0.9607894660009927, "grad_norm": 0.7288578046385554, "learning_rate": 2.1784266017842663e-07, "loss": 0.6383, "step": 32908 }, { "epoch": 0.96081866222884, "grad_norm": 0.7523913502422658, "learning_rate": 2.1768045417680456e-07, "loss": 0.6686, "step": 32909 }, { "epoch": 0.9608478584566874, "grad_norm": 0.6878338904796532, "learning_rate": 2.1751824817518248e-07, "loss": 0.5648, "step": 32910 }, { "epoch": 0.9608770546845348, "grad_norm": 0.6561944933377956, "learning_rate": 2.1735604217356044e-07, "loss": 0.5366, "step": 32911 }, { "epoch": 0.9609062509123821, "grad_norm": 0.6727558441521065, "learning_rate": 2.171938361719384e-07, "loss": 0.5825, "step": 32912 }, { "epoch": 0.9609354471402295, "grad_norm": 0.7085255092253792, "learning_rate": 2.1703163017031632e-07, "loss": 0.6288, "step": 32913 }, { "epoch": 0.9609646433680769, "grad_norm": 0.7248228012478959, "learning_rate": 2.1686942416869427e-07, "loss": 0.6205, "step": 32914 }, { "epoch": 0.9609938395959242, "grad_norm": 0.6532310343957641, "learning_rate": 2.1670721816707223e-07, "loss": 0.5288, "step": 32915 }, { "epoch": 0.9610230358237716, "grad_norm": 0.7043096581091683, "learning_rate": 2.1654501216545013e-07, "loss": 0.5991, "step": 32916 }, { "epoch": 0.9610522320516189, "grad_norm": 1.1439329383000565, "learning_rate": 2.1638280616382808e-07, "loss": 0.6691, "step": 32917 }, { "epoch": 0.9610814282794663, "grad_norm": 0.7164797370829665, "learning_rate": 2.16220600162206e-07, "loss": 0.6023, "step": 32918 }, { "epoch": 0.9611106245073137, "grad_norm": 0.6917406592732424, "learning_rate": 2.1605839416058396e-07, "loss": 0.5924, "step": 32919 }, { "epoch": 0.961139820735161, "grad_norm": 0.7125001862283296, "learning_rate": 2.1589618815896192e-07, "loss": 0.6325, "step": 32920 }, { "epoch": 0.9611690169630084, "grad_norm": 0.7073898026357617, "learning_rate": 2.1573398215733984e-07, "loss": 0.5951, "step": 32921 }, { "epoch": 0.9611982131908557, "grad_norm": 0.7111623089227601, "learning_rate": 2.155717761557178e-07, "loss": 0.5932, "step": 32922 }, { "epoch": 0.9612274094187031, "grad_norm": 0.8113277188704033, "learning_rate": 2.154095701540957e-07, "loss": 0.7871, "step": 32923 }, { "epoch": 0.9612566056465505, "grad_norm": 0.6917107788975623, "learning_rate": 2.1524736415247365e-07, "loss": 0.604, "step": 32924 }, { "epoch": 0.9612858018743978, "grad_norm": 0.709497957897302, "learning_rate": 2.150851581508516e-07, "loss": 0.6521, "step": 32925 }, { "epoch": 0.9613149981022452, "grad_norm": 0.7729848715213663, "learning_rate": 2.1492295214922953e-07, "loss": 0.6941, "step": 32926 }, { "epoch": 0.9613441943300925, "grad_norm": 0.6745161524098899, "learning_rate": 2.147607461476075e-07, "loss": 0.587, "step": 32927 }, { "epoch": 0.9613733905579399, "grad_norm": 0.7048527509647955, "learning_rate": 2.1459854014598544e-07, "loss": 0.6455, "step": 32928 }, { "epoch": 0.9614025867857873, "grad_norm": 0.7102448681015564, "learning_rate": 2.1443633414436334e-07, "loss": 0.6189, "step": 32929 }, { "epoch": 0.9614317830136346, "grad_norm": 0.7012258888710979, "learning_rate": 2.142741281427413e-07, "loss": 0.6004, "step": 32930 }, { "epoch": 0.961460979241482, "grad_norm": 0.7400119301976326, "learning_rate": 2.1411192214111922e-07, "loss": 0.6738, "step": 32931 }, { "epoch": 0.9614901754693294, "grad_norm": 0.7467727810472624, "learning_rate": 2.1394971613949718e-07, "loss": 0.7301, "step": 32932 }, { "epoch": 0.9615193716971767, "grad_norm": 0.7573124800114892, "learning_rate": 2.1378751013787513e-07, "loss": 0.7238, "step": 32933 }, { "epoch": 0.9615485679250241, "grad_norm": 0.6866099164675492, "learning_rate": 2.1362530413625306e-07, "loss": 0.5895, "step": 32934 }, { "epoch": 0.9615777641528714, "grad_norm": 0.7621081166666953, "learning_rate": 2.13463098134631e-07, "loss": 0.6966, "step": 32935 }, { "epoch": 0.9616069603807188, "grad_norm": 0.748354079320247, "learning_rate": 2.133008921330089e-07, "loss": 0.6736, "step": 32936 }, { "epoch": 0.9616361566085662, "grad_norm": 0.728415377230074, "learning_rate": 2.1313868613138687e-07, "loss": 0.6504, "step": 32937 }, { "epoch": 0.9616653528364135, "grad_norm": 0.7120463325882955, "learning_rate": 2.1297648012976482e-07, "loss": 0.6039, "step": 32938 }, { "epoch": 0.9616945490642609, "grad_norm": 0.7187560549742975, "learning_rate": 2.1281427412814277e-07, "loss": 0.6526, "step": 32939 }, { "epoch": 0.9617237452921082, "grad_norm": 0.6843367626785736, "learning_rate": 2.126520681265207e-07, "loss": 0.5811, "step": 32940 }, { "epoch": 0.9617529415199556, "grad_norm": 0.741565340871451, "learning_rate": 2.1248986212489866e-07, "loss": 0.6429, "step": 32941 }, { "epoch": 0.961782137747803, "grad_norm": 0.68633042152012, "learning_rate": 2.1232765612327656e-07, "loss": 0.6086, "step": 32942 }, { "epoch": 0.9618113339756503, "grad_norm": 0.733830279574333, "learning_rate": 2.121654501216545e-07, "loss": 0.6399, "step": 32943 }, { "epoch": 0.9618405302034977, "grad_norm": 0.6979789305778596, "learning_rate": 2.1200324412003246e-07, "loss": 0.5791, "step": 32944 }, { "epoch": 0.961869726431345, "grad_norm": 0.6641520528367383, "learning_rate": 2.118410381184104e-07, "loss": 0.5365, "step": 32945 }, { "epoch": 0.9618989226591924, "grad_norm": 0.6720733086584737, "learning_rate": 2.1167883211678835e-07, "loss": 0.5821, "step": 32946 }, { "epoch": 0.9619281188870398, "grad_norm": 0.7230366933854538, "learning_rate": 2.115166261151663e-07, "loss": 0.604, "step": 32947 }, { "epoch": 0.9619573151148871, "grad_norm": 0.7461959251538934, "learning_rate": 2.1135442011354423e-07, "loss": 0.6886, "step": 32948 }, { "epoch": 0.9619865113427345, "grad_norm": 0.7614266646050839, "learning_rate": 2.1119221411192215e-07, "loss": 0.7798, "step": 32949 }, { "epoch": 0.9620157075705819, "grad_norm": 0.7231848693675926, "learning_rate": 2.1103000811030008e-07, "loss": 0.6046, "step": 32950 }, { "epoch": 0.9620449037984292, "grad_norm": 0.7010322468615658, "learning_rate": 2.1086780210867804e-07, "loss": 0.5978, "step": 32951 }, { "epoch": 0.9620741000262766, "grad_norm": 0.7388721441144261, "learning_rate": 2.10705596107056e-07, "loss": 0.6569, "step": 32952 }, { "epoch": 0.9621032962541239, "grad_norm": 0.6997014891094816, "learning_rate": 2.1054339010543392e-07, "loss": 0.6147, "step": 32953 }, { "epoch": 0.9621324924819713, "grad_norm": 0.7330697555605719, "learning_rate": 2.1038118410381187e-07, "loss": 0.618, "step": 32954 }, { "epoch": 0.9621616887098187, "grad_norm": 0.7092252269305488, "learning_rate": 2.1021897810218977e-07, "loss": 0.5923, "step": 32955 }, { "epoch": 0.962190884937666, "grad_norm": 0.6842473806570278, "learning_rate": 2.1005677210056773e-07, "loss": 0.6087, "step": 32956 }, { "epoch": 0.9622200811655134, "grad_norm": 0.7306127819422125, "learning_rate": 2.0989456609894568e-07, "loss": 0.6557, "step": 32957 }, { "epoch": 0.9622492773933607, "grad_norm": 0.7097120595704982, "learning_rate": 2.097323600973236e-07, "loss": 0.6516, "step": 32958 }, { "epoch": 0.9622784736212081, "grad_norm": 0.7244120943952802, "learning_rate": 2.0957015409570156e-07, "loss": 0.6324, "step": 32959 }, { "epoch": 0.9623076698490555, "grad_norm": 0.7183217719467676, "learning_rate": 2.0940794809407951e-07, "loss": 0.6847, "step": 32960 }, { "epoch": 0.9623368660769028, "grad_norm": 0.8527753830149076, "learning_rate": 2.0924574209245744e-07, "loss": 0.6417, "step": 32961 }, { "epoch": 0.9623660623047502, "grad_norm": 0.7730777871056591, "learning_rate": 2.0908353609083537e-07, "loss": 0.6495, "step": 32962 }, { "epoch": 0.9623952585325976, "grad_norm": 0.688301703747222, "learning_rate": 2.089213300892133e-07, "loss": 0.6019, "step": 32963 }, { "epoch": 0.9624244547604449, "grad_norm": 0.70255190204178, "learning_rate": 2.0875912408759125e-07, "loss": 0.6459, "step": 32964 }, { "epoch": 0.9624536509882923, "grad_norm": 0.7111806866723062, "learning_rate": 2.085969180859692e-07, "loss": 0.6481, "step": 32965 }, { "epoch": 0.9624828472161396, "grad_norm": 0.7080435405392015, "learning_rate": 2.0843471208434716e-07, "loss": 0.5767, "step": 32966 }, { "epoch": 0.962512043443987, "grad_norm": 0.7322578038839732, "learning_rate": 2.0827250608272509e-07, "loss": 0.683, "step": 32967 }, { "epoch": 0.9625412396718344, "grad_norm": 0.6585858295152669, "learning_rate": 2.08110300081103e-07, "loss": 0.5285, "step": 32968 }, { "epoch": 0.9625704358996817, "grad_norm": 0.7620031956865857, "learning_rate": 2.0794809407948094e-07, "loss": 0.6753, "step": 32969 }, { "epoch": 0.9625996321275291, "grad_norm": 0.7522702331104524, "learning_rate": 2.077858880778589e-07, "loss": 0.7248, "step": 32970 }, { "epoch": 0.9626288283553764, "grad_norm": 0.6867032984579854, "learning_rate": 2.0762368207623685e-07, "loss": 0.5858, "step": 32971 }, { "epoch": 0.9626580245832238, "grad_norm": 0.7201317441666176, "learning_rate": 2.0746147607461478e-07, "loss": 0.6173, "step": 32972 }, { "epoch": 0.9626872208110712, "grad_norm": 0.6860199927622828, "learning_rate": 2.0729927007299273e-07, "loss": 0.5907, "step": 32973 }, { "epoch": 0.9627164170389185, "grad_norm": 0.7317276367219198, "learning_rate": 2.0713706407137068e-07, "loss": 0.6701, "step": 32974 }, { "epoch": 0.9627456132667659, "grad_norm": 0.6843064383561126, "learning_rate": 2.0697485806974858e-07, "loss": 0.573, "step": 32975 }, { "epoch": 0.9627748094946132, "grad_norm": 0.7137041240391383, "learning_rate": 2.0681265206812654e-07, "loss": 0.5629, "step": 32976 }, { "epoch": 0.9628040057224606, "grad_norm": 0.76118017620567, "learning_rate": 2.0665044606650447e-07, "loss": 0.6918, "step": 32977 }, { "epoch": 0.962833201950308, "grad_norm": 0.738999898134228, "learning_rate": 2.0648824006488242e-07, "loss": 0.6532, "step": 32978 }, { "epoch": 0.9628623981781553, "grad_norm": 0.6964968948004414, "learning_rate": 2.0632603406326037e-07, "loss": 0.6052, "step": 32979 }, { "epoch": 0.9628915944060027, "grad_norm": 0.6653205747516708, "learning_rate": 2.061638280616383e-07, "loss": 0.57, "step": 32980 }, { "epoch": 0.96292079063385, "grad_norm": 0.7342550404818491, "learning_rate": 2.0600162206001625e-07, "loss": 0.6677, "step": 32981 }, { "epoch": 0.9629499868616974, "grad_norm": 0.7338797296159274, "learning_rate": 2.0583941605839415e-07, "loss": 0.5634, "step": 32982 }, { "epoch": 0.9629791830895448, "grad_norm": 0.7317969207968188, "learning_rate": 2.056772100567721e-07, "loss": 0.6747, "step": 32983 }, { "epoch": 0.9630083793173921, "grad_norm": 0.7016601282188951, "learning_rate": 2.0551500405515006e-07, "loss": 0.6409, "step": 32984 }, { "epoch": 0.9630375755452396, "grad_norm": 0.6918507678128512, "learning_rate": 2.05352798053528e-07, "loss": 0.6206, "step": 32985 }, { "epoch": 0.963066771773087, "grad_norm": 0.7091260570217425, "learning_rate": 2.0519059205190594e-07, "loss": 0.6566, "step": 32986 }, { "epoch": 0.9630959680009343, "grad_norm": 0.7049895609075657, "learning_rate": 2.050283860502839e-07, "loss": 0.6358, "step": 32987 }, { "epoch": 0.9631251642287817, "grad_norm": 0.6815885822863716, "learning_rate": 2.048661800486618e-07, "loss": 0.5986, "step": 32988 }, { "epoch": 0.963154360456629, "grad_norm": 0.8778074057965296, "learning_rate": 2.0470397404703975e-07, "loss": 0.5618, "step": 32989 }, { "epoch": 0.9631835566844764, "grad_norm": 0.7366489933522619, "learning_rate": 2.0454176804541768e-07, "loss": 0.6499, "step": 32990 }, { "epoch": 0.9632127529123238, "grad_norm": 0.7286824822984811, "learning_rate": 2.0437956204379563e-07, "loss": 0.6557, "step": 32991 }, { "epoch": 0.9632419491401711, "grad_norm": 0.7212523796599688, "learning_rate": 2.042173560421736e-07, "loss": 0.6133, "step": 32992 }, { "epoch": 0.9632711453680185, "grad_norm": 0.7003772771017479, "learning_rate": 2.0405515004055152e-07, "loss": 0.6339, "step": 32993 }, { "epoch": 0.9633003415958659, "grad_norm": 0.7178566240240213, "learning_rate": 2.0389294403892947e-07, "loss": 0.6617, "step": 32994 }, { "epoch": 0.9633295378237132, "grad_norm": 0.7496454946248988, "learning_rate": 2.037307380373074e-07, "loss": 0.7052, "step": 32995 }, { "epoch": 0.9633587340515606, "grad_norm": 0.7399291733111598, "learning_rate": 2.0356853203568532e-07, "loss": 0.6922, "step": 32996 }, { "epoch": 0.963387930279408, "grad_norm": 0.6923775589440133, "learning_rate": 2.0340632603406328e-07, "loss": 0.5925, "step": 32997 }, { "epoch": 0.9634171265072553, "grad_norm": 0.7988631457973248, "learning_rate": 2.0324412003244123e-07, "loss": 0.6714, "step": 32998 }, { "epoch": 0.9634463227351027, "grad_norm": 0.715919216923308, "learning_rate": 2.0308191403081916e-07, "loss": 0.6364, "step": 32999 }, { "epoch": 0.96347551896295, "grad_norm": 0.7512829818340617, "learning_rate": 2.029197080291971e-07, "loss": 0.6755, "step": 33000 }, { "epoch": 0.9635047151907974, "grad_norm": 0.7503728747058882, "learning_rate": 2.0275750202757501e-07, "loss": 0.6424, "step": 33001 }, { "epoch": 0.9635339114186448, "grad_norm": 0.6782967894623612, "learning_rate": 2.0259529602595297e-07, "loss": 0.6131, "step": 33002 }, { "epoch": 0.9635631076464921, "grad_norm": 0.7260232570363508, "learning_rate": 2.0243309002433092e-07, "loss": 0.5991, "step": 33003 }, { "epoch": 0.9635923038743395, "grad_norm": 0.7587876968063616, "learning_rate": 2.0227088402270885e-07, "loss": 0.6887, "step": 33004 }, { "epoch": 0.9636215001021868, "grad_norm": 0.7657640794339118, "learning_rate": 2.021086780210868e-07, "loss": 0.6862, "step": 33005 }, { "epoch": 0.9636506963300342, "grad_norm": 0.7124063974215068, "learning_rate": 2.0194647201946476e-07, "loss": 0.5611, "step": 33006 }, { "epoch": 0.9636798925578816, "grad_norm": 0.6887424381745515, "learning_rate": 2.0178426601784268e-07, "loss": 0.6094, "step": 33007 }, { "epoch": 0.9637090887857289, "grad_norm": 0.7188208656750548, "learning_rate": 2.016220600162206e-07, "loss": 0.6405, "step": 33008 }, { "epoch": 0.9637382850135763, "grad_norm": 0.7314352590380928, "learning_rate": 2.0145985401459854e-07, "loss": 0.6488, "step": 33009 }, { "epoch": 0.9637674812414236, "grad_norm": 0.6891496851146833, "learning_rate": 2.012976480129765e-07, "loss": 0.5937, "step": 33010 }, { "epoch": 0.963796677469271, "grad_norm": 1.122729117911252, "learning_rate": 2.0113544201135445e-07, "loss": 0.5518, "step": 33011 }, { "epoch": 0.9638258736971184, "grad_norm": 0.6885765415839177, "learning_rate": 2.0097323600973237e-07, "loss": 0.5905, "step": 33012 }, { "epoch": 0.9638550699249657, "grad_norm": 0.6560519547242761, "learning_rate": 2.0081103000811033e-07, "loss": 0.5445, "step": 33013 }, { "epoch": 0.9638842661528131, "grad_norm": 0.7126797929606328, "learning_rate": 2.0064882400648823e-07, "loss": 0.6594, "step": 33014 }, { "epoch": 0.9639134623806604, "grad_norm": 0.7542167071122629, "learning_rate": 2.0048661800486618e-07, "loss": 0.6271, "step": 33015 }, { "epoch": 0.9639426586085078, "grad_norm": 0.7734186142439952, "learning_rate": 2.0032441200324414e-07, "loss": 0.6638, "step": 33016 }, { "epoch": 0.9639718548363552, "grad_norm": 0.6532757831955094, "learning_rate": 2.0016220600162206e-07, "loss": 0.5244, "step": 33017 }, { "epoch": 0.9640010510642025, "grad_norm": 0.7046984190574691, "learning_rate": 2.0000000000000002e-07, "loss": 0.6501, "step": 33018 }, { "epoch": 0.9640302472920499, "grad_norm": 0.7475918501226416, "learning_rate": 1.9983779399837797e-07, "loss": 0.7215, "step": 33019 }, { "epoch": 0.9640594435198973, "grad_norm": 0.6781943879035114, "learning_rate": 1.996755879967559e-07, "loss": 0.5334, "step": 33020 }, { "epoch": 0.9640886397477446, "grad_norm": 0.820179693598953, "learning_rate": 1.9951338199513383e-07, "loss": 0.7234, "step": 33021 }, { "epoch": 0.964117835975592, "grad_norm": 0.7252075384041968, "learning_rate": 1.9935117599351178e-07, "loss": 0.627, "step": 33022 }, { "epoch": 0.9641470322034393, "grad_norm": 0.6750506955892321, "learning_rate": 1.991889699918897e-07, "loss": 0.6113, "step": 33023 }, { "epoch": 0.9641762284312867, "grad_norm": 0.8169983862714744, "learning_rate": 1.9902676399026766e-07, "loss": 0.7163, "step": 33024 }, { "epoch": 0.9642054246591341, "grad_norm": 0.8298387141796999, "learning_rate": 1.9886455798864562e-07, "loss": 0.6872, "step": 33025 }, { "epoch": 0.9642346208869814, "grad_norm": 0.7050946059869949, "learning_rate": 1.9870235198702354e-07, "loss": 0.5833, "step": 33026 }, { "epoch": 0.9642638171148288, "grad_norm": 0.7091149693380973, "learning_rate": 1.985401459854015e-07, "loss": 0.6228, "step": 33027 }, { "epoch": 0.9642930133426761, "grad_norm": 0.6994281213706788, "learning_rate": 1.983779399837794e-07, "loss": 0.6295, "step": 33028 }, { "epoch": 0.9643222095705235, "grad_norm": 0.7309073932990531, "learning_rate": 1.9821573398215735e-07, "loss": 0.6563, "step": 33029 }, { "epoch": 0.9643514057983709, "grad_norm": 0.7297330787145502, "learning_rate": 1.980535279805353e-07, "loss": 0.5817, "step": 33030 }, { "epoch": 0.9643806020262182, "grad_norm": 0.6877447952637696, "learning_rate": 1.9789132197891323e-07, "loss": 0.5769, "step": 33031 }, { "epoch": 0.9644097982540656, "grad_norm": 0.7245778733525282, "learning_rate": 1.9772911597729119e-07, "loss": 0.6091, "step": 33032 }, { "epoch": 0.964438994481913, "grad_norm": 0.7303439241587429, "learning_rate": 1.9756690997566914e-07, "loss": 0.641, "step": 33033 }, { "epoch": 0.9644681907097603, "grad_norm": 0.7390466567800336, "learning_rate": 1.9740470397404704e-07, "loss": 0.7091, "step": 33034 }, { "epoch": 0.9644973869376077, "grad_norm": 0.7662495354737585, "learning_rate": 1.97242497972425e-07, "loss": 0.7107, "step": 33035 }, { "epoch": 0.964526583165455, "grad_norm": 0.7680203401933571, "learning_rate": 1.9708029197080292e-07, "loss": 0.7458, "step": 33036 }, { "epoch": 0.9645557793933024, "grad_norm": 0.7138959296135882, "learning_rate": 1.9691808596918088e-07, "loss": 0.6473, "step": 33037 }, { "epoch": 0.9645849756211498, "grad_norm": 0.6656679493298366, "learning_rate": 1.9675587996755883e-07, "loss": 0.581, "step": 33038 }, { "epoch": 0.9646141718489971, "grad_norm": 0.7668425237530734, "learning_rate": 1.9659367396593676e-07, "loss": 0.7109, "step": 33039 }, { "epoch": 0.9646433680768445, "grad_norm": 0.729953178121373, "learning_rate": 1.964314679643147e-07, "loss": 0.6431, "step": 33040 }, { "epoch": 0.9646725643046918, "grad_norm": 0.7062327050961539, "learning_rate": 1.962692619626926e-07, "loss": 0.6356, "step": 33041 }, { "epoch": 0.9647017605325392, "grad_norm": 0.7199271141538431, "learning_rate": 1.9610705596107057e-07, "loss": 0.5974, "step": 33042 }, { "epoch": 0.9647309567603866, "grad_norm": 0.6631369997742254, "learning_rate": 1.9594484995944852e-07, "loss": 0.5683, "step": 33043 }, { "epoch": 0.9647601529882339, "grad_norm": 0.6991089190137932, "learning_rate": 1.9578264395782645e-07, "loss": 0.5741, "step": 33044 }, { "epoch": 0.9647893492160813, "grad_norm": 0.7465602982899441, "learning_rate": 1.956204379562044e-07, "loss": 0.6638, "step": 33045 }, { "epoch": 0.9648185454439286, "grad_norm": 0.7875011710449606, "learning_rate": 1.9545823195458235e-07, "loss": 0.7082, "step": 33046 }, { "epoch": 0.964847741671776, "grad_norm": 0.7278285056876719, "learning_rate": 1.9529602595296026e-07, "loss": 0.6554, "step": 33047 }, { "epoch": 0.9648769378996234, "grad_norm": 0.7565264355376896, "learning_rate": 1.951338199513382e-07, "loss": 0.6653, "step": 33048 }, { "epoch": 0.9649061341274707, "grad_norm": 0.7589074166254248, "learning_rate": 1.9497161394971614e-07, "loss": 0.6171, "step": 33049 }, { "epoch": 0.9649353303553181, "grad_norm": 0.7421535120971182, "learning_rate": 1.948094079480941e-07, "loss": 0.6318, "step": 33050 }, { "epoch": 0.9649645265831654, "grad_norm": 0.738479046961598, "learning_rate": 1.9464720194647204e-07, "loss": 0.6835, "step": 33051 }, { "epoch": 0.9649937228110128, "grad_norm": 0.7091066510245146, "learning_rate": 1.9448499594485e-07, "loss": 0.5745, "step": 33052 }, { "epoch": 0.9650229190388602, "grad_norm": 0.7004943848803723, "learning_rate": 1.9432278994322793e-07, "loss": 0.605, "step": 33053 }, { "epoch": 0.9650521152667075, "grad_norm": 0.7098615083426235, "learning_rate": 1.9416058394160585e-07, "loss": 0.6528, "step": 33054 }, { "epoch": 0.9650813114945549, "grad_norm": 0.6873904490648257, "learning_rate": 1.9399837793998378e-07, "loss": 0.5607, "step": 33055 }, { "epoch": 0.9651105077224023, "grad_norm": 0.723888749626621, "learning_rate": 1.9383617193836173e-07, "loss": 0.6327, "step": 33056 }, { "epoch": 0.9651397039502496, "grad_norm": 0.6940544322467771, "learning_rate": 1.936739659367397e-07, "loss": 0.6447, "step": 33057 }, { "epoch": 0.965168900178097, "grad_norm": 0.7698514511438629, "learning_rate": 1.9351175993511762e-07, "loss": 0.69, "step": 33058 }, { "epoch": 0.9651980964059443, "grad_norm": 0.6967047049968221, "learning_rate": 1.9334955393349557e-07, "loss": 0.6114, "step": 33059 }, { "epoch": 0.9652272926337917, "grad_norm": 0.7447888358476092, "learning_rate": 1.9318734793187347e-07, "loss": 0.6859, "step": 33060 }, { "epoch": 0.9652564888616391, "grad_norm": 0.7135378191588354, "learning_rate": 1.9302514193025142e-07, "loss": 0.6086, "step": 33061 }, { "epoch": 0.9652856850894864, "grad_norm": 0.749355794436825, "learning_rate": 1.9286293592862938e-07, "loss": 0.7292, "step": 33062 }, { "epoch": 0.9653148813173338, "grad_norm": 0.7457499004828368, "learning_rate": 1.927007299270073e-07, "loss": 0.6099, "step": 33063 }, { "epoch": 0.9653440775451811, "grad_norm": 0.676405098354772, "learning_rate": 1.9253852392538526e-07, "loss": 0.5692, "step": 33064 }, { "epoch": 0.9653732737730285, "grad_norm": 0.7227214639186378, "learning_rate": 1.9237631792376321e-07, "loss": 0.6283, "step": 33065 }, { "epoch": 0.9654024700008759, "grad_norm": 0.6906057060953936, "learning_rate": 1.9221411192214114e-07, "loss": 0.5817, "step": 33066 }, { "epoch": 0.9654316662287232, "grad_norm": 0.6905579198369233, "learning_rate": 1.9205190592051907e-07, "loss": 0.5809, "step": 33067 }, { "epoch": 0.9654608624565706, "grad_norm": 0.7214506274590271, "learning_rate": 1.91889699918897e-07, "loss": 0.6622, "step": 33068 }, { "epoch": 0.965490058684418, "grad_norm": 0.6852999772507343, "learning_rate": 1.9172749391727495e-07, "loss": 0.6043, "step": 33069 }, { "epoch": 0.9655192549122653, "grad_norm": 0.6996507571730785, "learning_rate": 1.915652879156529e-07, "loss": 0.5948, "step": 33070 }, { "epoch": 0.9655484511401127, "grad_norm": 0.7245044054348203, "learning_rate": 1.9140308191403083e-07, "loss": 0.6298, "step": 33071 }, { "epoch": 0.96557764736796, "grad_norm": 0.6980508762393313, "learning_rate": 1.9124087591240878e-07, "loss": 0.6283, "step": 33072 }, { "epoch": 0.9656068435958074, "grad_norm": 0.6908747114579405, "learning_rate": 1.9107866991078669e-07, "loss": 0.597, "step": 33073 }, { "epoch": 0.9656360398236548, "grad_norm": 0.7243767782092175, "learning_rate": 1.9091646390916464e-07, "loss": 0.6539, "step": 33074 }, { "epoch": 0.9656652360515021, "grad_norm": 0.6777891759853658, "learning_rate": 1.907542579075426e-07, "loss": 0.6018, "step": 33075 }, { "epoch": 0.9656944322793495, "grad_norm": 0.7257313930845398, "learning_rate": 1.9059205190592052e-07, "loss": 0.6131, "step": 33076 }, { "epoch": 0.9657236285071968, "grad_norm": 0.6820811782478092, "learning_rate": 1.9042984590429847e-07, "loss": 0.5427, "step": 33077 }, { "epoch": 0.9657528247350442, "grad_norm": 0.6674686177427032, "learning_rate": 1.9026763990267643e-07, "loss": 0.5572, "step": 33078 }, { "epoch": 0.9657820209628916, "grad_norm": 0.7099006374542288, "learning_rate": 1.9010543390105436e-07, "loss": 0.5638, "step": 33079 }, { "epoch": 0.9658112171907389, "grad_norm": 0.6955571419379413, "learning_rate": 1.8994322789943228e-07, "loss": 0.616, "step": 33080 }, { "epoch": 0.9658404134185863, "grad_norm": 0.8301569689553864, "learning_rate": 1.8978102189781024e-07, "loss": 0.7754, "step": 33081 }, { "epoch": 0.9658696096464336, "grad_norm": 0.7017213064706361, "learning_rate": 1.8961881589618816e-07, "loss": 0.6198, "step": 33082 }, { "epoch": 0.965898805874281, "grad_norm": 0.738847304088828, "learning_rate": 1.8945660989456612e-07, "loss": 0.6286, "step": 33083 }, { "epoch": 0.9659280021021284, "grad_norm": 0.7085186382224545, "learning_rate": 1.8929440389294407e-07, "loss": 0.6536, "step": 33084 }, { "epoch": 0.9659571983299757, "grad_norm": 0.7131033440429084, "learning_rate": 1.89132197891322e-07, "loss": 0.57, "step": 33085 }, { "epoch": 0.9659863945578231, "grad_norm": 0.7026765478664374, "learning_rate": 1.8896999188969995e-07, "loss": 0.631, "step": 33086 }, { "epoch": 0.9660155907856705, "grad_norm": 0.6829882776508647, "learning_rate": 1.8880778588807785e-07, "loss": 0.5744, "step": 33087 }, { "epoch": 0.9660447870135178, "grad_norm": 0.6771828859903146, "learning_rate": 1.886455798864558e-07, "loss": 0.5952, "step": 33088 }, { "epoch": 0.9660739832413652, "grad_norm": 0.6665646089734063, "learning_rate": 1.8848337388483376e-07, "loss": 0.5769, "step": 33089 }, { "epoch": 0.9661031794692125, "grad_norm": 0.691808763850668, "learning_rate": 1.883211678832117e-07, "loss": 0.5786, "step": 33090 }, { "epoch": 0.9661323756970599, "grad_norm": 0.7306748609057018, "learning_rate": 1.8815896188158964e-07, "loss": 0.6769, "step": 33091 }, { "epoch": 0.9661615719249073, "grad_norm": 0.6677824300850116, "learning_rate": 1.879967558799676e-07, "loss": 0.582, "step": 33092 }, { "epoch": 0.9661907681527546, "grad_norm": 0.7297950737759303, "learning_rate": 1.878345498783455e-07, "loss": 0.6058, "step": 33093 }, { "epoch": 0.966219964380602, "grad_norm": 0.7434977074847845, "learning_rate": 1.8767234387672345e-07, "loss": 0.655, "step": 33094 }, { "epoch": 0.9662491606084493, "grad_norm": 0.7333347818718056, "learning_rate": 1.8751013787510138e-07, "loss": 0.645, "step": 33095 }, { "epoch": 0.9662783568362967, "grad_norm": 0.709990790085407, "learning_rate": 1.8734793187347933e-07, "loss": 0.6521, "step": 33096 }, { "epoch": 0.9663075530641441, "grad_norm": 0.7732543691224986, "learning_rate": 1.871857258718573e-07, "loss": 0.6987, "step": 33097 }, { "epoch": 0.9663367492919914, "grad_norm": 0.7551715458741868, "learning_rate": 1.8702351987023521e-07, "loss": 0.7154, "step": 33098 }, { "epoch": 0.9663659455198388, "grad_norm": 0.6958727054018092, "learning_rate": 1.8686131386861317e-07, "loss": 0.5889, "step": 33099 }, { "epoch": 0.9663951417476861, "grad_norm": 0.676726299466354, "learning_rate": 1.8669910786699107e-07, "loss": 0.5845, "step": 33100 }, { "epoch": 0.9664243379755335, "grad_norm": 0.7096810741396262, "learning_rate": 1.8653690186536902e-07, "loss": 0.5754, "step": 33101 }, { "epoch": 0.9664535342033809, "grad_norm": 0.819348406864744, "learning_rate": 1.8637469586374698e-07, "loss": 0.6564, "step": 33102 }, { "epoch": 0.9664827304312282, "grad_norm": 0.6846177627954395, "learning_rate": 1.862124898621249e-07, "loss": 0.607, "step": 33103 }, { "epoch": 0.9665119266590756, "grad_norm": 0.6608265089334815, "learning_rate": 1.8605028386050286e-07, "loss": 0.5555, "step": 33104 }, { "epoch": 0.966541122886923, "grad_norm": 0.7439790699094341, "learning_rate": 1.858880778588808e-07, "loss": 0.6372, "step": 33105 }, { "epoch": 0.9665703191147704, "grad_norm": 0.815473449833598, "learning_rate": 1.857258718572587e-07, "loss": 0.6414, "step": 33106 }, { "epoch": 0.9665995153426178, "grad_norm": 0.7388388867707598, "learning_rate": 1.8556366585563667e-07, "loss": 0.7094, "step": 33107 }, { "epoch": 0.9666287115704651, "grad_norm": 0.7653183123429071, "learning_rate": 1.8540145985401462e-07, "loss": 0.6424, "step": 33108 }, { "epoch": 0.9666579077983125, "grad_norm": 0.7470377922238277, "learning_rate": 1.8523925385239255e-07, "loss": 0.7032, "step": 33109 }, { "epoch": 0.9666871040261599, "grad_norm": 0.6945099322438745, "learning_rate": 1.850770478507705e-07, "loss": 0.6029, "step": 33110 }, { "epoch": 0.9667163002540072, "grad_norm": 0.7292094251056684, "learning_rate": 1.8491484184914846e-07, "loss": 0.6528, "step": 33111 }, { "epoch": 0.9667454964818546, "grad_norm": 0.7447832469091108, "learning_rate": 1.8475263584752638e-07, "loss": 0.6729, "step": 33112 }, { "epoch": 0.966774692709702, "grad_norm": 0.7355679535229948, "learning_rate": 1.845904298459043e-07, "loss": 0.6425, "step": 33113 }, { "epoch": 0.9668038889375493, "grad_norm": 0.7244856462130457, "learning_rate": 1.8442822384428224e-07, "loss": 0.6334, "step": 33114 }, { "epoch": 0.9668330851653967, "grad_norm": 0.7435991019705075, "learning_rate": 1.842660178426602e-07, "loss": 0.6925, "step": 33115 }, { "epoch": 0.966862281393244, "grad_norm": 0.6720300421680518, "learning_rate": 1.8410381184103815e-07, "loss": 0.5584, "step": 33116 }, { "epoch": 0.9668914776210914, "grad_norm": 0.7296363066875947, "learning_rate": 1.8394160583941607e-07, "loss": 0.6554, "step": 33117 }, { "epoch": 0.9669206738489388, "grad_norm": 0.7880265909568189, "learning_rate": 1.8377939983779403e-07, "loss": 0.6977, "step": 33118 }, { "epoch": 0.9669498700767861, "grad_norm": 0.7163842667954433, "learning_rate": 1.8361719383617193e-07, "loss": 0.6065, "step": 33119 }, { "epoch": 0.9669790663046335, "grad_norm": 0.6788963901183697, "learning_rate": 1.8345498783454988e-07, "loss": 0.5771, "step": 33120 }, { "epoch": 0.9670082625324808, "grad_norm": 0.7293789609273856, "learning_rate": 1.8329278183292784e-07, "loss": 0.6499, "step": 33121 }, { "epoch": 0.9670374587603282, "grad_norm": 0.7921372653841342, "learning_rate": 1.8313057583130576e-07, "loss": 0.7071, "step": 33122 }, { "epoch": 0.9670666549881756, "grad_norm": 0.6817052644180385, "learning_rate": 1.8296836982968372e-07, "loss": 0.5733, "step": 33123 }, { "epoch": 0.9670958512160229, "grad_norm": 0.78286159820562, "learning_rate": 1.8280616382806167e-07, "loss": 0.6225, "step": 33124 }, { "epoch": 0.9671250474438703, "grad_norm": 0.727352576356906, "learning_rate": 1.826439578264396e-07, "loss": 0.6442, "step": 33125 }, { "epoch": 0.9671542436717177, "grad_norm": 0.7407357893281172, "learning_rate": 1.8248175182481753e-07, "loss": 0.6077, "step": 33126 }, { "epoch": 0.967183439899565, "grad_norm": 0.6987612975234468, "learning_rate": 1.8231954582319545e-07, "loss": 0.5945, "step": 33127 }, { "epoch": 0.9672126361274124, "grad_norm": 0.7124547165748399, "learning_rate": 1.821573398215734e-07, "loss": 0.6439, "step": 33128 }, { "epoch": 0.9672418323552597, "grad_norm": 0.7304158817225623, "learning_rate": 1.8199513381995136e-07, "loss": 0.6126, "step": 33129 }, { "epoch": 0.9672710285831071, "grad_norm": 0.7223501031790324, "learning_rate": 1.818329278183293e-07, "loss": 0.6425, "step": 33130 }, { "epoch": 0.9673002248109545, "grad_norm": 0.6602329421596591, "learning_rate": 1.8167072181670724e-07, "loss": 0.541, "step": 33131 }, { "epoch": 0.9673294210388018, "grad_norm": 0.7458165031813062, "learning_rate": 1.815085158150852e-07, "loss": 0.6017, "step": 33132 }, { "epoch": 0.9673586172666492, "grad_norm": 0.7513142072617027, "learning_rate": 1.813463098134631e-07, "loss": 0.6571, "step": 33133 }, { "epoch": 0.9673878134944965, "grad_norm": 0.688809702657074, "learning_rate": 1.8118410381184105e-07, "loss": 0.6503, "step": 33134 }, { "epoch": 0.9674170097223439, "grad_norm": 0.7020695033058706, "learning_rate": 1.81021897810219e-07, "loss": 0.6694, "step": 33135 }, { "epoch": 0.9674462059501913, "grad_norm": 0.7275711742776593, "learning_rate": 1.8085969180859693e-07, "loss": 0.7076, "step": 33136 }, { "epoch": 0.9674754021780386, "grad_norm": 0.7170559417493222, "learning_rate": 1.8069748580697489e-07, "loss": 0.6501, "step": 33137 }, { "epoch": 0.967504598405886, "grad_norm": 0.7427381251678482, "learning_rate": 1.8053527980535284e-07, "loss": 0.6819, "step": 33138 }, { "epoch": 0.9675337946337333, "grad_norm": 0.7163627698471748, "learning_rate": 1.8037307380373074e-07, "loss": 0.6543, "step": 33139 }, { "epoch": 0.9675629908615807, "grad_norm": 0.721982047656838, "learning_rate": 1.802108678021087e-07, "loss": 0.6176, "step": 33140 }, { "epoch": 0.9675921870894281, "grad_norm": 0.7011929586337656, "learning_rate": 1.8004866180048662e-07, "loss": 0.5462, "step": 33141 }, { "epoch": 0.9676213833172754, "grad_norm": 0.7087369056928808, "learning_rate": 1.7988645579886458e-07, "loss": 0.5899, "step": 33142 }, { "epoch": 0.9676505795451228, "grad_norm": 0.7201372965639032, "learning_rate": 1.7972424979724253e-07, "loss": 0.6156, "step": 33143 }, { "epoch": 0.9676797757729702, "grad_norm": 0.7072056304414188, "learning_rate": 1.7956204379562046e-07, "loss": 0.6088, "step": 33144 }, { "epoch": 0.9677089720008175, "grad_norm": 0.7176798387634812, "learning_rate": 1.793998377939984e-07, "loss": 0.6242, "step": 33145 }, { "epoch": 0.9677381682286649, "grad_norm": 0.7429711188661394, "learning_rate": 1.792376317923763e-07, "loss": 0.6591, "step": 33146 }, { "epoch": 0.9677673644565122, "grad_norm": 0.7342841215826086, "learning_rate": 1.7907542579075427e-07, "loss": 0.7, "step": 33147 }, { "epoch": 0.9677965606843596, "grad_norm": 0.70817671008038, "learning_rate": 1.7891321978913222e-07, "loss": 0.6244, "step": 33148 }, { "epoch": 0.967825756912207, "grad_norm": 0.7790897459216616, "learning_rate": 1.7875101378751015e-07, "loss": 0.7167, "step": 33149 }, { "epoch": 0.9678549531400543, "grad_norm": 0.6744317752201103, "learning_rate": 1.785888077858881e-07, "loss": 0.5869, "step": 33150 }, { "epoch": 0.9678841493679017, "grad_norm": 0.7377840508064304, "learning_rate": 1.7842660178426605e-07, "loss": 0.6615, "step": 33151 }, { "epoch": 0.967913345595749, "grad_norm": 0.7031088585320813, "learning_rate": 1.7826439578264396e-07, "loss": 0.6312, "step": 33152 }, { "epoch": 0.9679425418235964, "grad_norm": 0.6720021733742996, "learning_rate": 1.781021897810219e-07, "loss": 0.5663, "step": 33153 }, { "epoch": 0.9679717380514438, "grad_norm": 3.068025064064947, "learning_rate": 1.7793998377939984e-07, "loss": 0.5924, "step": 33154 }, { "epoch": 0.9680009342792911, "grad_norm": 0.6682099941523119, "learning_rate": 1.777777777777778e-07, "loss": 0.5589, "step": 33155 }, { "epoch": 0.9680301305071385, "grad_norm": 0.7037621813490151, "learning_rate": 1.7761557177615574e-07, "loss": 0.5769, "step": 33156 }, { "epoch": 0.9680593267349858, "grad_norm": 0.6735654717173203, "learning_rate": 1.7745336577453367e-07, "loss": 0.581, "step": 33157 }, { "epoch": 0.9680885229628332, "grad_norm": 0.7225364901012754, "learning_rate": 1.7729115977291163e-07, "loss": 0.6459, "step": 33158 }, { "epoch": 0.9681177191906806, "grad_norm": 0.8535473091665208, "learning_rate": 1.7712895377128953e-07, "loss": 0.6846, "step": 33159 }, { "epoch": 0.9681469154185279, "grad_norm": 0.7106006018204543, "learning_rate": 1.7696674776966748e-07, "loss": 0.6479, "step": 33160 }, { "epoch": 0.9681761116463753, "grad_norm": 0.6928881832193673, "learning_rate": 1.7680454176804543e-07, "loss": 0.6063, "step": 33161 }, { "epoch": 0.9682053078742227, "grad_norm": 0.7260133547551341, "learning_rate": 1.7664233576642336e-07, "loss": 0.6756, "step": 33162 }, { "epoch": 0.96823450410207, "grad_norm": 0.7249424224157984, "learning_rate": 1.7648012976480132e-07, "loss": 0.6301, "step": 33163 }, { "epoch": 0.9682637003299174, "grad_norm": 0.7134628695071502, "learning_rate": 1.7631792376317927e-07, "loss": 0.6559, "step": 33164 }, { "epoch": 0.9682928965577647, "grad_norm": 0.7286534859323079, "learning_rate": 1.7615571776155717e-07, "loss": 0.6628, "step": 33165 }, { "epoch": 0.9683220927856121, "grad_norm": 0.7572448306829461, "learning_rate": 1.7599351175993512e-07, "loss": 0.676, "step": 33166 }, { "epoch": 0.9683512890134595, "grad_norm": 0.6658735526114327, "learning_rate": 1.7583130575831308e-07, "loss": 0.5813, "step": 33167 }, { "epoch": 0.9683804852413068, "grad_norm": 0.6992753932117867, "learning_rate": 1.75669099756691e-07, "loss": 0.6361, "step": 33168 }, { "epoch": 0.9684096814691542, "grad_norm": 0.7340945193787839, "learning_rate": 1.7550689375506896e-07, "loss": 0.6928, "step": 33169 }, { "epoch": 0.9684388776970015, "grad_norm": 0.7369131436750451, "learning_rate": 1.753446877534469e-07, "loss": 0.657, "step": 33170 }, { "epoch": 0.9684680739248489, "grad_norm": 0.6879851340544916, "learning_rate": 1.7518248175182484e-07, "loss": 0.5988, "step": 33171 }, { "epoch": 0.9684972701526963, "grad_norm": 0.638814626452043, "learning_rate": 1.7502027575020277e-07, "loss": 0.5411, "step": 33172 }, { "epoch": 0.9685264663805436, "grad_norm": 0.7057931496967467, "learning_rate": 1.748580697485807e-07, "loss": 0.6171, "step": 33173 }, { "epoch": 0.968555662608391, "grad_norm": 0.7086284136692074, "learning_rate": 1.7469586374695865e-07, "loss": 0.6054, "step": 33174 }, { "epoch": 0.9685848588362383, "grad_norm": 0.6978494368035815, "learning_rate": 1.745336577453366e-07, "loss": 0.6121, "step": 33175 }, { "epoch": 0.9686140550640857, "grad_norm": 0.7069833612554447, "learning_rate": 1.7437145174371453e-07, "loss": 0.6287, "step": 33176 }, { "epoch": 0.9686432512919331, "grad_norm": 0.7236280486305047, "learning_rate": 1.7420924574209248e-07, "loss": 0.655, "step": 33177 }, { "epoch": 0.9686724475197804, "grad_norm": 0.7218151392207858, "learning_rate": 1.7404703974047038e-07, "loss": 0.6376, "step": 33178 }, { "epoch": 0.9687016437476278, "grad_norm": 0.7286454776556531, "learning_rate": 1.7388483373884834e-07, "loss": 0.6722, "step": 33179 }, { "epoch": 0.9687308399754752, "grad_norm": 0.7598912559704907, "learning_rate": 1.737226277372263e-07, "loss": 0.692, "step": 33180 }, { "epoch": 0.9687600362033225, "grad_norm": 0.6815047173423665, "learning_rate": 1.7356042173560422e-07, "loss": 0.5942, "step": 33181 }, { "epoch": 0.9687892324311699, "grad_norm": 0.7738832976454744, "learning_rate": 1.7339821573398217e-07, "loss": 0.7065, "step": 33182 }, { "epoch": 0.9688184286590172, "grad_norm": 0.6919418412411612, "learning_rate": 1.7323600973236013e-07, "loss": 0.5695, "step": 33183 }, { "epoch": 0.9688476248868646, "grad_norm": 0.6573291145235534, "learning_rate": 1.7307380373073806e-07, "loss": 0.5646, "step": 33184 }, { "epoch": 0.968876821114712, "grad_norm": 0.8156564955148636, "learning_rate": 1.7291159772911598e-07, "loss": 0.7745, "step": 33185 }, { "epoch": 0.9689060173425593, "grad_norm": 0.6991333950730754, "learning_rate": 1.727493917274939e-07, "loss": 0.6305, "step": 33186 }, { "epoch": 0.9689352135704067, "grad_norm": 0.9541228010682833, "learning_rate": 1.7258718572587186e-07, "loss": 0.7339, "step": 33187 }, { "epoch": 0.968964409798254, "grad_norm": 0.7063614258479382, "learning_rate": 1.7242497972424982e-07, "loss": 0.6518, "step": 33188 }, { "epoch": 0.9689936060261014, "grad_norm": 0.7587799532451024, "learning_rate": 1.7226277372262775e-07, "loss": 0.6856, "step": 33189 }, { "epoch": 0.9690228022539488, "grad_norm": 0.7355955510136085, "learning_rate": 1.721005677210057e-07, "loss": 0.6425, "step": 33190 }, { "epoch": 0.9690519984817961, "grad_norm": 0.7139844968345994, "learning_rate": 1.7193836171938365e-07, "loss": 0.6308, "step": 33191 }, { "epoch": 0.9690811947096435, "grad_norm": 0.7550603001409998, "learning_rate": 1.7177615571776155e-07, "loss": 0.702, "step": 33192 }, { "epoch": 0.9691103909374909, "grad_norm": 0.661287269776592, "learning_rate": 1.716139497161395e-07, "loss": 0.5448, "step": 33193 }, { "epoch": 0.9691395871653382, "grad_norm": 0.8004397680336501, "learning_rate": 1.7145174371451746e-07, "loss": 0.6158, "step": 33194 }, { "epoch": 0.9691687833931856, "grad_norm": 0.8507400937056708, "learning_rate": 1.712895377128954e-07, "loss": 0.7385, "step": 33195 }, { "epoch": 0.9691979796210329, "grad_norm": 0.7709790304945705, "learning_rate": 1.7112733171127334e-07, "loss": 0.6924, "step": 33196 }, { "epoch": 0.9692271758488803, "grad_norm": 0.6827488747496234, "learning_rate": 1.709651257096513e-07, "loss": 0.5941, "step": 33197 }, { "epoch": 0.9692563720767277, "grad_norm": 0.7065982567818214, "learning_rate": 1.708029197080292e-07, "loss": 0.6056, "step": 33198 }, { "epoch": 0.969285568304575, "grad_norm": 0.7007544711032903, "learning_rate": 1.7064071370640715e-07, "loss": 0.561, "step": 33199 }, { "epoch": 0.9693147645324224, "grad_norm": 0.6868387502388611, "learning_rate": 1.7047850770478508e-07, "loss": 0.5757, "step": 33200 }, { "epoch": 0.9693439607602697, "grad_norm": 0.7480708935421058, "learning_rate": 1.7031630170316303e-07, "loss": 0.6812, "step": 33201 }, { "epoch": 0.9693731569881171, "grad_norm": 0.6836042426611325, "learning_rate": 1.7015409570154099e-07, "loss": 0.5635, "step": 33202 }, { "epoch": 0.9694023532159645, "grad_norm": 0.7480339914688307, "learning_rate": 1.6999188969991891e-07, "loss": 0.6604, "step": 33203 }, { "epoch": 0.9694315494438118, "grad_norm": 0.7132708956499146, "learning_rate": 1.6982968369829687e-07, "loss": 0.6356, "step": 33204 }, { "epoch": 0.9694607456716592, "grad_norm": 0.7505651390872164, "learning_rate": 1.6966747769667477e-07, "loss": 0.6373, "step": 33205 }, { "epoch": 0.9694899418995065, "grad_norm": 0.6676273030863952, "learning_rate": 1.6950527169505272e-07, "loss": 0.5375, "step": 33206 }, { "epoch": 0.9695191381273539, "grad_norm": 0.7543369530758309, "learning_rate": 1.6934306569343068e-07, "loss": 0.7272, "step": 33207 }, { "epoch": 0.9695483343552013, "grad_norm": 0.7161443145235659, "learning_rate": 1.691808596918086e-07, "loss": 0.6397, "step": 33208 }, { "epoch": 0.9695775305830486, "grad_norm": 0.7229171236403343, "learning_rate": 1.6901865369018656e-07, "loss": 0.6331, "step": 33209 }, { "epoch": 0.969606726810896, "grad_norm": 0.7152829078659256, "learning_rate": 1.688564476885645e-07, "loss": 0.6331, "step": 33210 }, { "epoch": 0.9696359230387434, "grad_norm": 0.7528698096068497, "learning_rate": 1.686942416869424e-07, "loss": 0.7104, "step": 33211 }, { "epoch": 0.9696651192665907, "grad_norm": 0.7495418417089986, "learning_rate": 1.6853203568532037e-07, "loss": 0.7264, "step": 33212 }, { "epoch": 0.9696943154944381, "grad_norm": 0.6886254564389994, "learning_rate": 1.683698296836983e-07, "loss": 0.6197, "step": 33213 }, { "epoch": 0.9697235117222854, "grad_norm": 0.7299962475239995, "learning_rate": 1.6820762368207625e-07, "loss": 0.6063, "step": 33214 }, { "epoch": 0.9697527079501328, "grad_norm": 0.7661484843625642, "learning_rate": 1.680454176804542e-07, "loss": 0.6691, "step": 33215 }, { "epoch": 0.9697819041779802, "grad_norm": 0.7297114776852741, "learning_rate": 1.6788321167883213e-07, "loss": 0.6811, "step": 33216 }, { "epoch": 0.9698111004058275, "grad_norm": 0.7084455347017106, "learning_rate": 1.6772100567721008e-07, "loss": 0.5917, "step": 33217 }, { "epoch": 0.9698402966336749, "grad_norm": 0.797651822022785, "learning_rate": 1.6755879967558798e-07, "loss": 0.759, "step": 33218 }, { "epoch": 0.9698694928615222, "grad_norm": 0.7555829870684736, "learning_rate": 1.6739659367396594e-07, "loss": 0.7056, "step": 33219 }, { "epoch": 0.9698986890893696, "grad_norm": 0.7115238686141194, "learning_rate": 1.672343876723439e-07, "loss": 0.6007, "step": 33220 }, { "epoch": 0.969927885317217, "grad_norm": 0.6953503609893182, "learning_rate": 1.6707218167072185e-07, "loss": 0.6474, "step": 33221 }, { "epoch": 0.9699570815450643, "grad_norm": 0.706356867068491, "learning_rate": 1.6690997566909977e-07, "loss": 0.6219, "step": 33222 }, { "epoch": 0.9699862777729117, "grad_norm": 0.705825462459142, "learning_rate": 1.6674776966747773e-07, "loss": 0.6231, "step": 33223 }, { "epoch": 0.970015474000759, "grad_norm": 0.7577686280092093, "learning_rate": 1.6658556366585563e-07, "loss": 0.7157, "step": 33224 }, { "epoch": 0.9700446702286064, "grad_norm": 0.6581729951135171, "learning_rate": 1.6642335766423358e-07, "loss": 0.5342, "step": 33225 }, { "epoch": 0.9700738664564539, "grad_norm": 0.7436254952833448, "learning_rate": 1.6626115166261153e-07, "loss": 0.6387, "step": 33226 }, { "epoch": 0.9701030626843012, "grad_norm": 0.7554745001198001, "learning_rate": 1.6609894566098946e-07, "loss": 0.7314, "step": 33227 }, { "epoch": 0.9701322589121486, "grad_norm": 0.8004074024026212, "learning_rate": 1.6593673965936742e-07, "loss": 0.6809, "step": 33228 }, { "epoch": 0.970161455139996, "grad_norm": 0.6808774950387582, "learning_rate": 1.6577453365774537e-07, "loss": 0.5946, "step": 33229 }, { "epoch": 0.9701906513678433, "grad_norm": 0.7160961568709121, "learning_rate": 1.656123276561233e-07, "loss": 0.6621, "step": 33230 }, { "epoch": 0.9702198475956907, "grad_norm": 0.6521528253516055, "learning_rate": 1.6545012165450122e-07, "loss": 0.5472, "step": 33231 }, { "epoch": 0.970249043823538, "grad_norm": 0.6868237920416262, "learning_rate": 1.6528791565287915e-07, "loss": 0.5961, "step": 33232 }, { "epoch": 0.9702782400513854, "grad_norm": 0.7167588668870799, "learning_rate": 1.651257096512571e-07, "loss": 0.6474, "step": 33233 }, { "epoch": 0.9703074362792328, "grad_norm": 0.7477119929553762, "learning_rate": 1.6496350364963506e-07, "loss": 0.7021, "step": 33234 }, { "epoch": 0.9703366325070801, "grad_norm": 0.7820382213454797, "learning_rate": 1.64801297648013e-07, "loss": 0.7222, "step": 33235 }, { "epoch": 0.9703658287349275, "grad_norm": 0.7397024492487939, "learning_rate": 1.6463909164639094e-07, "loss": 0.6805, "step": 33236 }, { "epoch": 0.9703950249627749, "grad_norm": 0.704743298471251, "learning_rate": 1.644768856447689e-07, "loss": 0.6127, "step": 33237 }, { "epoch": 0.9704242211906222, "grad_norm": 0.7037938876594798, "learning_rate": 1.643146796431468e-07, "loss": 0.6063, "step": 33238 }, { "epoch": 0.9704534174184696, "grad_norm": 0.7446697264638472, "learning_rate": 1.6415247364152475e-07, "loss": 0.6721, "step": 33239 }, { "epoch": 0.9704826136463169, "grad_norm": 0.7084474474799032, "learning_rate": 1.6399026763990268e-07, "loss": 0.5939, "step": 33240 }, { "epoch": 0.9705118098741643, "grad_norm": 0.7027900449795474, "learning_rate": 1.6382806163828063e-07, "loss": 0.5653, "step": 33241 }, { "epoch": 0.9705410061020117, "grad_norm": 0.7104558359048376, "learning_rate": 1.6366585563665858e-07, "loss": 0.6668, "step": 33242 }, { "epoch": 0.970570202329859, "grad_norm": 0.763627409451262, "learning_rate": 1.635036496350365e-07, "loss": 0.7144, "step": 33243 }, { "epoch": 0.9705993985577064, "grad_norm": 0.7434577830141932, "learning_rate": 1.6334144363341444e-07, "loss": 0.6397, "step": 33244 }, { "epoch": 0.9706285947855537, "grad_norm": 0.7066071386527397, "learning_rate": 1.6317923763179237e-07, "loss": 0.6244, "step": 33245 }, { "epoch": 0.9706577910134011, "grad_norm": 0.7296046230845651, "learning_rate": 1.6301703163017032e-07, "loss": 0.6341, "step": 33246 }, { "epoch": 0.9706869872412485, "grad_norm": 0.744542591316687, "learning_rate": 1.6285482562854827e-07, "loss": 0.6563, "step": 33247 }, { "epoch": 0.9707161834690958, "grad_norm": 0.7668684010326201, "learning_rate": 1.626926196269262e-07, "loss": 0.6595, "step": 33248 }, { "epoch": 0.9707453796969432, "grad_norm": 0.6573598983430413, "learning_rate": 1.6253041362530416e-07, "loss": 0.5274, "step": 33249 }, { "epoch": 0.9707745759247906, "grad_norm": 0.7093681622160836, "learning_rate": 1.623682076236821e-07, "loss": 0.5954, "step": 33250 }, { "epoch": 0.9708037721526379, "grad_norm": 0.7953047036001147, "learning_rate": 1.6220600162206e-07, "loss": 0.6719, "step": 33251 }, { "epoch": 0.9708329683804853, "grad_norm": 0.7498178149947735, "learning_rate": 1.6204379562043796e-07, "loss": 0.6283, "step": 33252 }, { "epoch": 0.9708621646083326, "grad_norm": 0.75733796861392, "learning_rate": 1.6188158961881592e-07, "loss": 0.7082, "step": 33253 }, { "epoch": 0.97089136083618, "grad_norm": 0.6594500205460239, "learning_rate": 1.6171938361719385e-07, "loss": 0.5036, "step": 33254 }, { "epoch": 0.9709205570640274, "grad_norm": 0.7317897772013099, "learning_rate": 1.615571776155718e-07, "loss": 0.6693, "step": 33255 }, { "epoch": 0.9709497532918747, "grad_norm": 0.7031719785559554, "learning_rate": 1.6139497161394975e-07, "loss": 0.6233, "step": 33256 }, { "epoch": 0.9709789495197221, "grad_norm": 0.7214652645448418, "learning_rate": 1.6123276561232765e-07, "loss": 0.652, "step": 33257 }, { "epoch": 0.9710081457475694, "grad_norm": 0.7093279371541636, "learning_rate": 1.610705596107056e-07, "loss": 0.6248, "step": 33258 }, { "epoch": 0.9710373419754168, "grad_norm": 0.6842429248942652, "learning_rate": 1.6090835360908354e-07, "loss": 0.5481, "step": 33259 }, { "epoch": 0.9710665382032642, "grad_norm": 0.7212133787101005, "learning_rate": 1.607461476074615e-07, "loss": 0.645, "step": 33260 }, { "epoch": 0.9710957344311115, "grad_norm": 0.7265815146966174, "learning_rate": 1.6058394160583944e-07, "loss": 0.6444, "step": 33261 }, { "epoch": 0.9711249306589589, "grad_norm": 0.8309625097212131, "learning_rate": 1.6042173560421737e-07, "loss": 0.7785, "step": 33262 }, { "epoch": 0.9711541268868062, "grad_norm": 0.6942720133510536, "learning_rate": 1.6025952960259532e-07, "loss": 0.6128, "step": 33263 }, { "epoch": 0.9711833231146536, "grad_norm": 0.7312072164213915, "learning_rate": 1.6009732360097323e-07, "loss": 0.6089, "step": 33264 }, { "epoch": 0.971212519342501, "grad_norm": 0.785823276815304, "learning_rate": 1.5993511759935118e-07, "loss": 0.6902, "step": 33265 }, { "epoch": 0.9712417155703483, "grad_norm": 0.7281963051674896, "learning_rate": 1.5977291159772913e-07, "loss": 0.6756, "step": 33266 }, { "epoch": 0.9712709117981957, "grad_norm": 0.7443771990341991, "learning_rate": 1.5961070559610706e-07, "loss": 0.667, "step": 33267 }, { "epoch": 0.971300108026043, "grad_norm": 0.6996267141134823, "learning_rate": 1.5944849959448501e-07, "loss": 0.6456, "step": 33268 }, { "epoch": 0.9713293042538904, "grad_norm": 0.7524969769978549, "learning_rate": 1.5928629359286297e-07, "loss": 0.6795, "step": 33269 }, { "epoch": 0.9713585004817378, "grad_norm": 0.6887366094984776, "learning_rate": 1.5912408759124087e-07, "loss": 0.5609, "step": 33270 }, { "epoch": 0.9713876967095851, "grad_norm": 0.7025822044193558, "learning_rate": 1.5896188158961882e-07, "loss": 0.6069, "step": 33271 }, { "epoch": 0.9714168929374325, "grad_norm": 0.6875030240618313, "learning_rate": 1.5879967558799675e-07, "loss": 0.4971, "step": 33272 }, { "epoch": 0.9714460891652799, "grad_norm": 0.6675265720303576, "learning_rate": 1.586374695863747e-07, "loss": 0.547, "step": 33273 }, { "epoch": 0.9714752853931272, "grad_norm": 0.7084953755329298, "learning_rate": 1.5847526358475266e-07, "loss": 0.6726, "step": 33274 }, { "epoch": 0.9715044816209746, "grad_norm": 0.7127889839881393, "learning_rate": 1.5831305758313059e-07, "loss": 0.586, "step": 33275 }, { "epoch": 0.9715336778488219, "grad_norm": 0.6972628760729782, "learning_rate": 1.5815085158150854e-07, "loss": 0.5978, "step": 33276 }, { "epoch": 0.9715628740766693, "grad_norm": 0.7095645460637113, "learning_rate": 1.5798864557988647e-07, "loss": 0.6371, "step": 33277 }, { "epoch": 0.9715920703045167, "grad_norm": 0.7748772388672477, "learning_rate": 1.578264395782644e-07, "loss": 0.6299, "step": 33278 }, { "epoch": 0.971621266532364, "grad_norm": 0.7084275940506792, "learning_rate": 1.5766423357664235e-07, "loss": 0.642, "step": 33279 }, { "epoch": 0.9716504627602114, "grad_norm": 0.7462171118079155, "learning_rate": 1.575020275750203e-07, "loss": 0.6937, "step": 33280 }, { "epoch": 0.9716796589880587, "grad_norm": 0.7406662359055654, "learning_rate": 1.5733982157339823e-07, "loss": 0.6892, "step": 33281 }, { "epoch": 0.9717088552159061, "grad_norm": 0.7101741213551026, "learning_rate": 1.5717761557177618e-07, "loss": 0.5562, "step": 33282 }, { "epoch": 0.9717380514437535, "grad_norm": 0.7260823596517911, "learning_rate": 1.5701540957015414e-07, "loss": 0.7007, "step": 33283 }, { "epoch": 0.9717672476716008, "grad_norm": 0.6699844474340823, "learning_rate": 1.5685320356853204e-07, "loss": 0.5726, "step": 33284 }, { "epoch": 0.9717964438994482, "grad_norm": 0.721952960416989, "learning_rate": 1.5669099756691e-07, "loss": 0.6279, "step": 33285 }, { "epoch": 0.9718256401272956, "grad_norm": 0.7330693786296929, "learning_rate": 1.5652879156528792e-07, "loss": 0.6999, "step": 33286 }, { "epoch": 0.9718548363551429, "grad_norm": 0.7103361995292022, "learning_rate": 1.5636658556366587e-07, "loss": 0.618, "step": 33287 }, { "epoch": 0.9718840325829903, "grad_norm": 0.7004687830244483, "learning_rate": 1.562043795620438e-07, "loss": 0.6191, "step": 33288 }, { "epoch": 0.9719132288108376, "grad_norm": 0.7350558018367963, "learning_rate": 1.5604217356042175e-07, "loss": 0.6736, "step": 33289 }, { "epoch": 0.971942425038685, "grad_norm": 0.7779339101820536, "learning_rate": 1.5587996755879968e-07, "loss": 0.7148, "step": 33290 }, { "epoch": 0.9719716212665324, "grad_norm": 0.7309138782152294, "learning_rate": 1.5571776155717764e-07, "loss": 0.6186, "step": 33291 }, { "epoch": 0.9720008174943797, "grad_norm": 0.6995090368078606, "learning_rate": 1.5555555555555556e-07, "loss": 0.6312, "step": 33292 }, { "epoch": 0.9720300137222271, "grad_norm": 0.6998545249250041, "learning_rate": 1.5539334955393352e-07, "loss": 0.622, "step": 33293 }, { "epoch": 0.9720592099500744, "grad_norm": 0.7391703007069925, "learning_rate": 1.5523114355231144e-07, "loss": 0.6654, "step": 33294 }, { "epoch": 0.9720884061779218, "grad_norm": 0.7093584747578172, "learning_rate": 1.5506893755068937e-07, "loss": 0.6446, "step": 33295 }, { "epoch": 0.9721176024057692, "grad_norm": 0.6745648085321251, "learning_rate": 1.5490673154906733e-07, "loss": 0.6059, "step": 33296 }, { "epoch": 0.9721467986336165, "grad_norm": 0.7732051253879365, "learning_rate": 1.5474452554744528e-07, "loss": 0.7166, "step": 33297 }, { "epoch": 0.9721759948614639, "grad_norm": 0.7011474136959235, "learning_rate": 1.545823195458232e-07, "loss": 0.634, "step": 33298 }, { "epoch": 0.9722051910893112, "grad_norm": 0.7403195121112012, "learning_rate": 1.5442011354420113e-07, "loss": 0.7105, "step": 33299 }, { "epoch": 0.9722343873171586, "grad_norm": 0.6750279427749885, "learning_rate": 1.542579075425791e-07, "loss": 0.5797, "step": 33300 }, { "epoch": 0.972263583545006, "grad_norm": 0.774997381454947, "learning_rate": 1.5409570154095704e-07, "loss": 0.7593, "step": 33301 }, { "epoch": 0.9722927797728533, "grad_norm": 0.7161681855313327, "learning_rate": 1.5393349553933497e-07, "loss": 0.6389, "step": 33302 }, { "epoch": 0.9723219760007007, "grad_norm": 0.725580654922677, "learning_rate": 1.537712895377129e-07, "loss": 0.5994, "step": 33303 }, { "epoch": 0.972351172228548, "grad_norm": 0.7245619408226737, "learning_rate": 1.5360908353609085e-07, "loss": 0.6325, "step": 33304 }, { "epoch": 0.9723803684563954, "grad_norm": 0.6966921806292412, "learning_rate": 1.5344687753446878e-07, "loss": 0.5831, "step": 33305 }, { "epoch": 0.9724095646842428, "grad_norm": 0.7029088575804852, "learning_rate": 1.5328467153284673e-07, "loss": 0.6666, "step": 33306 }, { "epoch": 0.9724387609120901, "grad_norm": 0.7146411129001721, "learning_rate": 1.5312246553122469e-07, "loss": 0.581, "step": 33307 }, { "epoch": 0.9724679571399375, "grad_norm": 0.7306728455806416, "learning_rate": 1.529602595296026e-07, "loss": 0.5772, "step": 33308 }, { "epoch": 0.9724971533677849, "grad_norm": 0.7043510722396549, "learning_rate": 1.5279805352798054e-07, "loss": 0.5747, "step": 33309 }, { "epoch": 0.9725263495956322, "grad_norm": 0.6961911919522373, "learning_rate": 1.526358475263585e-07, "loss": 0.5876, "step": 33310 }, { "epoch": 0.9725555458234796, "grad_norm": 0.6872700399697765, "learning_rate": 1.5247364152473642e-07, "loss": 0.5466, "step": 33311 }, { "epoch": 0.972584742051327, "grad_norm": 0.7110956313859662, "learning_rate": 1.5231143552311438e-07, "loss": 0.606, "step": 33312 }, { "epoch": 0.9726139382791743, "grad_norm": 0.7128107413422096, "learning_rate": 1.521492295214923e-07, "loss": 0.6606, "step": 33313 }, { "epoch": 0.9726431345070217, "grad_norm": 0.764610377574281, "learning_rate": 1.5198702351987026e-07, "loss": 0.7191, "step": 33314 }, { "epoch": 0.972672330734869, "grad_norm": 0.7417270106901647, "learning_rate": 1.5182481751824818e-07, "loss": 0.6929, "step": 33315 }, { "epoch": 0.9727015269627164, "grad_norm": 0.7006552233983949, "learning_rate": 1.5166261151662614e-07, "loss": 0.5708, "step": 33316 }, { "epoch": 0.9727307231905638, "grad_norm": 0.6715620995125314, "learning_rate": 1.5150040551500407e-07, "loss": 0.5771, "step": 33317 }, { "epoch": 0.9727599194184111, "grad_norm": 0.7573702810945662, "learning_rate": 1.51338199513382e-07, "loss": 0.7309, "step": 33318 }, { "epoch": 0.9727891156462585, "grad_norm": 0.7121616580182583, "learning_rate": 1.5117599351175995e-07, "loss": 0.6088, "step": 33319 }, { "epoch": 0.9728183118741058, "grad_norm": 0.8611804844089505, "learning_rate": 1.510137875101379e-07, "loss": 0.6987, "step": 33320 }, { "epoch": 0.9728475081019532, "grad_norm": 0.7706652310519622, "learning_rate": 1.5085158150851583e-07, "loss": 0.7353, "step": 33321 }, { "epoch": 0.9728767043298006, "grad_norm": 0.710710377885365, "learning_rate": 1.5068937550689376e-07, "loss": 0.6415, "step": 33322 }, { "epoch": 0.9729059005576479, "grad_norm": 0.7301245097837036, "learning_rate": 1.505271695052717e-07, "loss": 0.6466, "step": 33323 }, { "epoch": 0.9729350967854953, "grad_norm": 0.7691466961320407, "learning_rate": 1.5036496350364966e-07, "loss": 0.6444, "step": 33324 }, { "epoch": 0.9729642930133426, "grad_norm": 0.6767669601351258, "learning_rate": 1.502027575020276e-07, "loss": 0.5571, "step": 33325 }, { "epoch": 0.97299348924119, "grad_norm": 0.6966348966863974, "learning_rate": 1.5004055150040552e-07, "loss": 0.6118, "step": 33326 }, { "epoch": 0.9730226854690374, "grad_norm": 0.7475673495722673, "learning_rate": 1.4987834549878347e-07, "loss": 0.7072, "step": 33327 }, { "epoch": 0.9730518816968847, "grad_norm": 0.7299884732198012, "learning_rate": 1.497161394971614e-07, "loss": 0.6354, "step": 33328 }, { "epoch": 0.9730810779247321, "grad_norm": 0.7632940512200981, "learning_rate": 1.4955393349553935e-07, "loss": 0.5898, "step": 33329 }, { "epoch": 0.9731102741525794, "grad_norm": 0.7121605343986379, "learning_rate": 1.4939172749391728e-07, "loss": 0.5841, "step": 33330 }, { "epoch": 0.9731394703804268, "grad_norm": 0.6778135062785773, "learning_rate": 1.492295214922952e-07, "loss": 0.5387, "step": 33331 }, { "epoch": 0.9731686666082742, "grad_norm": 0.7543624892853728, "learning_rate": 1.4906731549067316e-07, "loss": 0.6708, "step": 33332 }, { "epoch": 0.9731978628361215, "grad_norm": 0.7240062425048007, "learning_rate": 1.4890510948905112e-07, "loss": 0.6894, "step": 33333 }, { "epoch": 0.9732270590639689, "grad_norm": 0.7522635084329102, "learning_rate": 1.4874290348742904e-07, "loss": 0.6394, "step": 33334 }, { "epoch": 0.9732562552918163, "grad_norm": 0.7256923369891094, "learning_rate": 1.48580697485807e-07, "loss": 0.6335, "step": 33335 }, { "epoch": 0.9732854515196636, "grad_norm": 0.7285390854046423, "learning_rate": 1.4841849148418492e-07, "loss": 0.6317, "step": 33336 }, { "epoch": 0.973314647747511, "grad_norm": 0.6764121252998612, "learning_rate": 1.4825628548256288e-07, "loss": 0.5714, "step": 33337 }, { "epoch": 0.9733438439753583, "grad_norm": 0.7738881444277628, "learning_rate": 1.480940794809408e-07, "loss": 0.6518, "step": 33338 }, { "epoch": 0.9733730402032057, "grad_norm": 0.7574167312703306, "learning_rate": 1.4793187347931876e-07, "loss": 0.6509, "step": 33339 }, { "epoch": 0.9734022364310531, "grad_norm": 0.7292835466080874, "learning_rate": 1.4776966747769669e-07, "loss": 0.6112, "step": 33340 }, { "epoch": 0.9734314326589004, "grad_norm": 0.7047763810856442, "learning_rate": 1.4760746147607461e-07, "loss": 0.6329, "step": 33341 }, { "epoch": 0.9734606288867478, "grad_norm": 0.687737171340813, "learning_rate": 1.4744525547445257e-07, "loss": 0.5835, "step": 33342 }, { "epoch": 0.9734898251145951, "grad_norm": 0.7346399084669116, "learning_rate": 1.4728304947283052e-07, "loss": 0.6712, "step": 33343 }, { "epoch": 0.9735190213424425, "grad_norm": 0.7433556520050002, "learning_rate": 1.4712084347120845e-07, "loss": 0.6874, "step": 33344 }, { "epoch": 0.9735482175702899, "grad_norm": 0.7381652812283281, "learning_rate": 1.4695863746958638e-07, "loss": 0.6537, "step": 33345 }, { "epoch": 0.9735774137981372, "grad_norm": 0.7252390553040983, "learning_rate": 1.4679643146796433e-07, "loss": 0.6667, "step": 33346 }, { "epoch": 0.9736066100259847, "grad_norm": 0.6885113562983857, "learning_rate": 1.4663422546634226e-07, "loss": 0.5853, "step": 33347 }, { "epoch": 0.9736358062538321, "grad_norm": 0.6764533651093305, "learning_rate": 1.464720194647202e-07, "loss": 0.5991, "step": 33348 }, { "epoch": 0.9736650024816794, "grad_norm": 0.6732213275216712, "learning_rate": 1.4630981346309814e-07, "loss": 0.5495, "step": 33349 }, { "epoch": 0.9736941987095268, "grad_norm": 0.6787032407013799, "learning_rate": 1.461476074614761e-07, "loss": 0.5917, "step": 33350 }, { "epoch": 0.9737233949373741, "grad_norm": 0.6950732144728544, "learning_rate": 1.4598540145985402e-07, "loss": 0.5787, "step": 33351 }, { "epoch": 0.9737525911652215, "grad_norm": 0.7004873650198727, "learning_rate": 1.4582319545823197e-07, "loss": 0.6041, "step": 33352 }, { "epoch": 0.9737817873930689, "grad_norm": 0.7690062211662221, "learning_rate": 1.456609894566099e-07, "loss": 0.6776, "step": 33353 }, { "epoch": 0.9738109836209162, "grad_norm": 0.7004901993774966, "learning_rate": 1.4549878345498783e-07, "loss": 0.6133, "step": 33354 }, { "epoch": 0.9738401798487636, "grad_norm": 0.6516634060388098, "learning_rate": 1.4533657745336578e-07, "loss": 0.5436, "step": 33355 }, { "epoch": 0.973869376076611, "grad_norm": 0.7988602289522122, "learning_rate": 1.4517437145174374e-07, "loss": 0.5811, "step": 33356 }, { "epoch": 0.9738985723044583, "grad_norm": 0.6601480558531289, "learning_rate": 1.4501216545012166e-07, "loss": 0.5668, "step": 33357 }, { "epoch": 0.9739277685323057, "grad_norm": 0.7492016111790019, "learning_rate": 1.448499594484996e-07, "loss": 0.6747, "step": 33358 }, { "epoch": 0.973956964760153, "grad_norm": 0.7230180532690984, "learning_rate": 1.4468775344687755e-07, "loss": 0.6835, "step": 33359 }, { "epoch": 0.9739861609880004, "grad_norm": 0.7316383892556806, "learning_rate": 1.445255474452555e-07, "loss": 0.6092, "step": 33360 }, { "epoch": 0.9740153572158478, "grad_norm": 0.7118969670121991, "learning_rate": 1.4436334144363343e-07, "loss": 0.5834, "step": 33361 }, { "epoch": 0.9740445534436951, "grad_norm": 0.6955232402742477, "learning_rate": 1.4420113544201138e-07, "loss": 0.5956, "step": 33362 }, { "epoch": 0.9740737496715425, "grad_norm": 0.6874274530887489, "learning_rate": 1.440389294403893e-07, "loss": 0.5552, "step": 33363 }, { "epoch": 0.9741029458993898, "grad_norm": 0.6795888592622742, "learning_rate": 1.4387672343876724e-07, "loss": 0.5532, "step": 33364 }, { "epoch": 0.9741321421272372, "grad_norm": 0.7130137895637831, "learning_rate": 1.437145174371452e-07, "loss": 0.662, "step": 33365 }, { "epoch": 0.9741613383550846, "grad_norm": 0.7232116165871562, "learning_rate": 1.4355231143552314e-07, "loss": 0.618, "step": 33366 }, { "epoch": 0.9741905345829319, "grad_norm": 0.7064777050319883, "learning_rate": 1.4339010543390107e-07, "loss": 0.5928, "step": 33367 }, { "epoch": 0.9742197308107793, "grad_norm": 0.7133979328699657, "learning_rate": 1.43227899432279e-07, "loss": 0.6289, "step": 33368 }, { "epoch": 0.9742489270386266, "grad_norm": 0.662825937267502, "learning_rate": 1.4306569343065695e-07, "loss": 0.575, "step": 33369 }, { "epoch": 0.974278123266474, "grad_norm": 0.7197887058651454, "learning_rate": 1.4290348742903488e-07, "loss": 0.5925, "step": 33370 }, { "epoch": 0.9743073194943214, "grad_norm": 0.6921369999620205, "learning_rate": 1.4274128142741283e-07, "loss": 0.5773, "step": 33371 }, { "epoch": 0.9743365157221687, "grad_norm": 0.7267092438781836, "learning_rate": 1.4257907542579076e-07, "loss": 0.6678, "step": 33372 }, { "epoch": 0.9743657119500161, "grad_norm": 0.6677426877056787, "learning_rate": 1.4241686942416871e-07, "loss": 0.574, "step": 33373 }, { "epoch": 0.9743949081778635, "grad_norm": 0.6830830961851639, "learning_rate": 1.4225466342254664e-07, "loss": 0.5703, "step": 33374 }, { "epoch": 0.9744241044057108, "grad_norm": 0.7638138262313866, "learning_rate": 1.420924574209246e-07, "loss": 0.6647, "step": 33375 }, { "epoch": 0.9744533006335582, "grad_norm": 0.7230551667724917, "learning_rate": 1.4193025141930252e-07, "loss": 0.6436, "step": 33376 }, { "epoch": 0.9744824968614055, "grad_norm": 0.7512122965542528, "learning_rate": 1.4176804541768045e-07, "loss": 0.6803, "step": 33377 }, { "epoch": 0.9745116930892529, "grad_norm": 0.7582838746308245, "learning_rate": 1.416058394160584e-07, "loss": 0.6716, "step": 33378 }, { "epoch": 0.9745408893171003, "grad_norm": 0.6993966481552336, "learning_rate": 1.4144363341443636e-07, "loss": 0.6316, "step": 33379 }, { "epoch": 0.9745700855449476, "grad_norm": 0.7731369204284104, "learning_rate": 1.4128142741281429e-07, "loss": 0.6107, "step": 33380 }, { "epoch": 0.974599281772795, "grad_norm": 0.7004404374453912, "learning_rate": 1.411192214111922e-07, "loss": 0.6218, "step": 33381 }, { "epoch": 0.9746284780006423, "grad_norm": 0.6938398188894499, "learning_rate": 1.4095701540957017e-07, "loss": 0.607, "step": 33382 }, { "epoch": 0.9746576742284897, "grad_norm": 0.7332136882951731, "learning_rate": 1.4079480940794812e-07, "loss": 0.6744, "step": 33383 }, { "epoch": 0.9746868704563371, "grad_norm": 0.6907160641220266, "learning_rate": 1.4063260340632605e-07, "loss": 0.633, "step": 33384 }, { "epoch": 0.9747160666841844, "grad_norm": 0.7400022645013948, "learning_rate": 1.4047039740470398e-07, "loss": 0.6789, "step": 33385 }, { "epoch": 0.9747452629120318, "grad_norm": 0.7015722038171127, "learning_rate": 1.4030819140308193e-07, "loss": 0.6451, "step": 33386 }, { "epoch": 0.9747744591398791, "grad_norm": 0.7437523440487325, "learning_rate": 1.4014598540145986e-07, "loss": 0.6275, "step": 33387 }, { "epoch": 0.9748036553677265, "grad_norm": 0.7177025519913363, "learning_rate": 1.399837793998378e-07, "loss": 0.6354, "step": 33388 }, { "epoch": 0.9748328515955739, "grad_norm": 0.7691101838560962, "learning_rate": 1.3982157339821574e-07, "loss": 0.7357, "step": 33389 }, { "epoch": 0.9748620478234212, "grad_norm": 0.8309221395140546, "learning_rate": 1.396593673965937e-07, "loss": 0.6901, "step": 33390 }, { "epoch": 0.9748912440512686, "grad_norm": 0.7167773104592066, "learning_rate": 1.3949716139497162e-07, "loss": 0.6357, "step": 33391 }, { "epoch": 0.974920440279116, "grad_norm": 0.7361570453923947, "learning_rate": 1.3933495539334957e-07, "loss": 0.7023, "step": 33392 }, { "epoch": 0.9749496365069633, "grad_norm": 0.731777674590111, "learning_rate": 1.391727493917275e-07, "loss": 0.6493, "step": 33393 }, { "epoch": 0.9749788327348107, "grad_norm": 0.7191259473601392, "learning_rate": 1.3901054339010545e-07, "loss": 0.6493, "step": 33394 }, { "epoch": 0.975008028962658, "grad_norm": 0.6695244642299141, "learning_rate": 1.3884833738848338e-07, "loss": 0.589, "step": 33395 }, { "epoch": 0.9750372251905054, "grad_norm": 0.7241922689186783, "learning_rate": 1.3868613138686134e-07, "loss": 0.6485, "step": 33396 }, { "epoch": 0.9750664214183528, "grad_norm": 0.7310889688394564, "learning_rate": 1.3852392538523926e-07, "loss": 0.693, "step": 33397 }, { "epoch": 0.9750956176462001, "grad_norm": 0.7221077655104036, "learning_rate": 1.3836171938361722e-07, "loss": 0.6323, "step": 33398 }, { "epoch": 0.9751248138740475, "grad_norm": 0.7480072489948099, "learning_rate": 1.3819951338199514e-07, "loss": 0.6516, "step": 33399 }, { "epoch": 0.9751540101018948, "grad_norm": 0.7727253278807733, "learning_rate": 1.3803730738037307e-07, "loss": 0.7398, "step": 33400 }, { "epoch": 0.9751832063297422, "grad_norm": 0.7529670085254684, "learning_rate": 1.3787510137875103e-07, "loss": 0.6049, "step": 33401 }, { "epoch": 0.9752124025575896, "grad_norm": 0.7272719714652096, "learning_rate": 1.3771289537712898e-07, "loss": 0.6596, "step": 33402 }, { "epoch": 0.9752415987854369, "grad_norm": 0.6920966587826822, "learning_rate": 1.375506893755069e-07, "loss": 0.553, "step": 33403 }, { "epoch": 0.9752707950132843, "grad_norm": 0.6460282391590403, "learning_rate": 1.3738848337388483e-07, "loss": 0.5589, "step": 33404 }, { "epoch": 0.9752999912411316, "grad_norm": 0.7005099780594689, "learning_rate": 1.372262773722628e-07, "loss": 0.5914, "step": 33405 }, { "epoch": 0.975329187468979, "grad_norm": 0.7761094050583146, "learning_rate": 1.3706407137064074e-07, "loss": 0.707, "step": 33406 }, { "epoch": 0.9753583836968264, "grad_norm": 0.7347871832162214, "learning_rate": 1.3690186536901867e-07, "loss": 0.7025, "step": 33407 }, { "epoch": 0.9753875799246737, "grad_norm": 0.7239632605481588, "learning_rate": 1.367396593673966e-07, "loss": 0.6457, "step": 33408 }, { "epoch": 0.9754167761525211, "grad_norm": 0.7315785378307549, "learning_rate": 1.3657745336577455e-07, "loss": 0.6546, "step": 33409 }, { "epoch": 0.9754459723803685, "grad_norm": 0.7557901968271169, "learning_rate": 1.3641524736415248e-07, "loss": 0.6456, "step": 33410 }, { "epoch": 0.9754751686082158, "grad_norm": 0.7361081881174067, "learning_rate": 1.3625304136253043e-07, "loss": 0.6435, "step": 33411 }, { "epoch": 0.9755043648360632, "grad_norm": 0.775965051296659, "learning_rate": 1.3609083536090836e-07, "loss": 0.71, "step": 33412 }, { "epoch": 0.9755335610639105, "grad_norm": 0.6976458845741147, "learning_rate": 1.3592862935928629e-07, "loss": 0.6114, "step": 33413 }, { "epoch": 0.9755627572917579, "grad_norm": 0.698028335653792, "learning_rate": 1.3576642335766424e-07, "loss": 0.6398, "step": 33414 }, { "epoch": 0.9755919535196053, "grad_norm": 0.652182925281131, "learning_rate": 1.356042173560422e-07, "loss": 0.5315, "step": 33415 }, { "epoch": 0.9756211497474526, "grad_norm": 0.7022918487427735, "learning_rate": 1.3544201135442012e-07, "loss": 0.6418, "step": 33416 }, { "epoch": 0.9756503459753, "grad_norm": 0.7313281665282161, "learning_rate": 1.3527980535279805e-07, "loss": 0.6232, "step": 33417 }, { "epoch": 0.9756795422031473, "grad_norm": 0.7273753220690679, "learning_rate": 1.35117599351176e-07, "loss": 0.6699, "step": 33418 }, { "epoch": 0.9757087384309947, "grad_norm": 0.7033041494195048, "learning_rate": 1.3495539334955396e-07, "loss": 0.5842, "step": 33419 }, { "epoch": 0.9757379346588421, "grad_norm": 0.8046779164151598, "learning_rate": 1.3479318734793188e-07, "loss": 0.7057, "step": 33420 }, { "epoch": 0.9757671308866894, "grad_norm": 0.8955071696198086, "learning_rate": 1.3463098134630984e-07, "loss": 0.6656, "step": 33421 }, { "epoch": 0.9757963271145368, "grad_norm": 0.7507324060204313, "learning_rate": 1.3446877534468776e-07, "loss": 0.7155, "step": 33422 }, { "epoch": 0.9758255233423841, "grad_norm": 0.7208028019872951, "learning_rate": 1.343065693430657e-07, "loss": 0.5784, "step": 33423 }, { "epoch": 0.9758547195702315, "grad_norm": 0.7164603405811014, "learning_rate": 1.3414436334144365e-07, "loss": 0.6208, "step": 33424 }, { "epoch": 0.9758839157980789, "grad_norm": 0.70601341439434, "learning_rate": 1.339821573398216e-07, "loss": 0.6397, "step": 33425 }, { "epoch": 0.9759131120259262, "grad_norm": 0.7797989481839183, "learning_rate": 1.3381995133819953e-07, "loss": 0.7228, "step": 33426 }, { "epoch": 0.9759423082537736, "grad_norm": 0.746926570056338, "learning_rate": 1.3365774533657745e-07, "loss": 0.6576, "step": 33427 }, { "epoch": 0.975971504481621, "grad_norm": 0.6904849414124645, "learning_rate": 1.334955393349554e-07, "loss": 0.592, "step": 33428 }, { "epoch": 0.9760007007094683, "grad_norm": 0.7028936682661178, "learning_rate": 1.3333333333333336e-07, "loss": 0.5851, "step": 33429 }, { "epoch": 0.9760298969373157, "grad_norm": 0.716784169163335, "learning_rate": 1.331711273317113e-07, "loss": 0.6644, "step": 33430 }, { "epoch": 0.976059093165163, "grad_norm": 1.0922773521973603, "learning_rate": 1.3300892133008922e-07, "loss": 0.6667, "step": 33431 }, { "epoch": 0.9760882893930104, "grad_norm": 0.7360755217536155, "learning_rate": 1.3284671532846717e-07, "loss": 0.6614, "step": 33432 }, { "epoch": 0.9761174856208578, "grad_norm": 0.7085327791382302, "learning_rate": 1.326845093268451e-07, "loss": 0.5965, "step": 33433 }, { "epoch": 0.9761466818487051, "grad_norm": 0.7261052745035864, "learning_rate": 1.3252230332522305e-07, "loss": 0.672, "step": 33434 }, { "epoch": 0.9761758780765525, "grad_norm": 0.7505256739949503, "learning_rate": 1.3236009732360098e-07, "loss": 0.6828, "step": 33435 }, { "epoch": 0.9762050743043998, "grad_norm": 0.7614125234095512, "learning_rate": 1.321978913219789e-07, "loss": 0.6177, "step": 33436 }, { "epoch": 0.9762342705322472, "grad_norm": 0.6838548321503622, "learning_rate": 1.3203568532035686e-07, "loss": 0.5658, "step": 33437 }, { "epoch": 0.9762634667600946, "grad_norm": 0.7589125238514084, "learning_rate": 1.3187347931873481e-07, "loss": 0.6726, "step": 33438 }, { "epoch": 0.9762926629879419, "grad_norm": 0.7103432407382134, "learning_rate": 1.3171127331711274e-07, "loss": 0.604, "step": 33439 }, { "epoch": 0.9763218592157893, "grad_norm": 0.6888344446107496, "learning_rate": 1.3154906731549067e-07, "loss": 0.5491, "step": 33440 }, { "epoch": 0.9763510554436367, "grad_norm": 0.732767331016125, "learning_rate": 1.3138686131386862e-07, "loss": 0.6529, "step": 33441 }, { "epoch": 0.976380251671484, "grad_norm": 0.6792170263870217, "learning_rate": 1.3122465531224658e-07, "loss": 0.5804, "step": 33442 }, { "epoch": 0.9764094478993314, "grad_norm": 0.7388931483986014, "learning_rate": 1.310624493106245e-07, "loss": 0.6711, "step": 33443 }, { "epoch": 0.9764386441271787, "grad_norm": 0.7346695135336081, "learning_rate": 1.3090024330900243e-07, "loss": 0.5298, "step": 33444 }, { "epoch": 0.9764678403550261, "grad_norm": 0.7237484855595688, "learning_rate": 1.3073803730738039e-07, "loss": 0.6064, "step": 33445 }, { "epoch": 0.9764970365828735, "grad_norm": 0.7206564106646032, "learning_rate": 1.3057583130575831e-07, "loss": 0.6192, "step": 33446 }, { "epoch": 0.9765262328107208, "grad_norm": 0.725950521718696, "learning_rate": 1.3041362530413627e-07, "loss": 0.645, "step": 33447 }, { "epoch": 0.9765554290385682, "grad_norm": 0.7081692227297175, "learning_rate": 1.3025141930251422e-07, "loss": 0.6274, "step": 33448 }, { "epoch": 0.9765846252664155, "grad_norm": 0.7452951828760772, "learning_rate": 1.3008921330089215e-07, "loss": 0.6951, "step": 33449 }, { "epoch": 0.9766138214942629, "grad_norm": 0.7285580427517896, "learning_rate": 1.2992700729927008e-07, "loss": 0.6851, "step": 33450 }, { "epoch": 0.9766430177221103, "grad_norm": 0.7297889156289746, "learning_rate": 1.2976480129764803e-07, "loss": 0.6824, "step": 33451 }, { "epoch": 0.9766722139499576, "grad_norm": 0.670224732025111, "learning_rate": 1.2960259529602596e-07, "loss": 0.5598, "step": 33452 }, { "epoch": 0.976701410177805, "grad_norm": 0.6785211487122341, "learning_rate": 1.294403892944039e-07, "loss": 0.5722, "step": 33453 }, { "epoch": 0.9767306064056523, "grad_norm": 0.9108430155361484, "learning_rate": 1.2927818329278184e-07, "loss": 0.5984, "step": 33454 }, { "epoch": 0.9767598026334997, "grad_norm": 0.7331158932527091, "learning_rate": 1.291159772911598e-07, "loss": 0.6532, "step": 33455 }, { "epoch": 0.9767889988613471, "grad_norm": 0.7922452421280066, "learning_rate": 1.2895377128953772e-07, "loss": 0.7281, "step": 33456 }, { "epoch": 0.9768181950891944, "grad_norm": 0.749353487785035, "learning_rate": 1.2879156528791567e-07, "loss": 0.6806, "step": 33457 }, { "epoch": 0.9768473913170418, "grad_norm": 0.7419718931183752, "learning_rate": 1.286293592862936e-07, "loss": 0.7189, "step": 33458 }, { "epoch": 0.9768765875448892, "grad_norm": 0.668242847880253, "learning_rate": 1.2846715328467153e-07, "loss": 0.5768, "step": 33459 }, { "epoch": 0.9769057837727365, "grad_norm": 0.7149744117548565, "learning_rate": 1.2830494728304948e-07, "loss": 0.5522, "step": 33460 }, { "epoch": 0.9769349800005839, "grad_norm": 0.727460556068767, "learning_rate": 1.2814274128142744e-07, "loss": 0.6541, "step": 33461 }, { "epoch": 0.9769641762284312, "grad_norm": 0.7493790666257345, "learning_rate": 1.2798053527980536e-07, "loss": 0.6151, "step": 33462 }, { "epoch": 0.9769933724562786, "grad_norm": 0.6872611852337459, "learning_rate": 1.278183292781833e-07, "loss": 0.54, "step": 33463 }, { "epoch": 0.977022568684126, "grad_norm": 0.6615304081215287, "learning_rate": 1.2765612327656124e-07, "loss": 0.5507, "step": 33464 }, { "epoch": 0.9770517649119733, "grad_norm": 0.6616406912110303, "learning_rate": 1.274939172749392e-07, "loss": 0.564, "step": 33465 }, { "epoch": 0.9770809611398207, "grad_norm": 0.7235019855474522, "learning_rate": 1.2733171127331713e-07, "loss": 0.6677, "step": 33466 }, { "epoch": 0.977110157367668, "grad_norm": 0.7346648954648408, "learning_rate": 1.2716950527169505e-07, "loss": 0.6565, "step": 33467 }, { "epoch": 0.9771393535955155, "grad_norm": 0.7718139601282205, "learning_rate": 1.27007299270073e-07, "loss": 0.6525, "step": 33468 }, { "epoch": 0.9771685498233629, "grad_norm": 0.7177538337539049, "learning_rate": 1.2684509326845093e-07, "loss": 0.6409, "step": 33469 }, { "epoch": 0.9771977460512102, "grad_norm": 0.8280602953467889, "learning_rate": 1.266828872668289e-07, "loss": 0.6339, "step": 33470 }, { "epoch": 0.9772269422790576, "grad_norm": 0.7698088157601254, "learning_rate": 1.2652068126520682e-07, "loss": 0.7263, "step": 33471 }, { "epoch": 0.977256138506905, "grad_norm": 0.7310497758996558, "learning_rate": 1.2635847526358474e-07, "loss": 0.6511, "step": 33472 }, { "epoch": 0.9772853347347523, "grad_norm": 0.6552260102228238, "learning_rate": 1.261962692619627e-07, "loss": 0.5558, "step": 33473 }, { "epoch": 0.9773145309625997, "grad_norm": 0.7199452405941452, "learning_rate": 1.2603406326034065e-07, "loss": 0.6575, "step": 33474 }, { "epoch": 0.977343727190447, "grad_norm": 0.6614278092307787, "learning_rate": 1.2587185725871858e-07, "loss": 0.5352, "step": 33475 }, { "epoch": 0.9773729234182944, "grad_norm": 0.6803946457160306, "learning_rate": 1.2570965125709653e-07, "loss": 0.5673, "step": 33476 }, { "epoch": 0.9774021196461418, "grad_norm": 0.6820756852692178, "learning_rate": 1.2554744525547446e-07, "loss": 0.6081, "step": 33477 }, { "epoch": 0.9774313158739891, "grad_norm": 0.7330122124483726, "learning_rate": 1.2538523925385241e-07, "loss": 0.6708, "step": 33478 }, { "epoch": 0.9774605121018365, "grad_norm": 0.6895171228226057, "learning_rate": 1.2522303325223034e-07, "loss": 0.5663, "step": 33479 }, { "epoch": 0.9774897083296838, "grad_norm": 0.7680082775693637, "learning_rate": 1.250608272506083e-07, "loss": 0.7022, "step": 33480 }, { "epoch": 0.9775189045575312, "grad_norm": 0.7129251727740604, "learning_rate": 1.2489862124898622e-07, "loss": 0.6156, "step": 33481 }, { "epoch": 0.9775481007853786, "grad_norm": 0.7946685578934768, "learning_rate": 1.2473641524736415e-07, "loss": 0.7082, "step": 33482 }, { "epoch": 0.9775772970132259, "grad_norm": 0.7141718983991777, "learning_rate": 1.245742092457421e-07, "loss": 0.607, "step": 33483 }, { "epoch": 0.9776064932410733, "grad_norm": 0.706586612997781, "learning_rate": 1.2441200324412006e-07, "loss": 0.597, "step": 33484 }, { "epoch": 0.9776356894689207, "grad_norm": 0.7810695736296925, "learning_rate": 1.2424979724249798e-07, "loss": 0.7205, "step": 33485 }, { "epoch": 0.977664885696768, "grad_norm": 0.7597309752594197, "learning_rate": 1.240875912408759e-07, "loss": 0.6937, "step": 33486 }, { "epoch": 0.9776940819246154, "grad_norm": 0.7023496589595585, "learning_rate": 1.2392538523925387e-07, "loss": 0.6632, "step": 33487 }, { "epoch": 0.9777232781524627, "grad_norm": 0.7304708887952073, "learning_rate": 1.2376317923763182e-07, "loss": 0.6279, "step": 33488 }, { "epoch": 0.9777524743803101, "grad_norm": 0.7171561217158994, "learning_rate": 1.2360097323600975e-07, "loss": 0.5849, "step": 33489 }, { "epoch": 0.9777816706081575, "grad_norm": 0.6734641146661822, "learning_rate": 1.2343876723438767e-07, "loss": 0.5551, "step": 33490 }, { "epoch": 0.9778108668360048, "grad_norm": 0.7627423594146499, "learning_rate": 1.2327656123276563e-07, "loss": 0.6707, "step": 33491 }, { "epoch": 0.9778400630638522, "grad_norm": 0.7710278302121314, "learning_rate": 1.2311435523114356e-07, "loss": 0.7182, "step": 33492 }, { "epoch": 0.9778692592916995, "grad_norm": 0.7331649197161546, "learning_rate": 1.229521492295215e-07, "loss": 0.6072, "step": 33493 }, { "epoch": 0.9778984555195469, "grad_norm": 0.6716861824801975, "learning_rate": 1.2278994322789944e-07, "loss": 0.5945, "step": 33494 }, { "epoch": 0.9779276517473943, "grad_norm": 0.7785631773291296, "learning_rate": 1.2262773722627736e-07, "loss": 0.64, "step": 33495 }, { "epoch": 0.9779568479752416, "grad_norm": 0.7493792243306042, "learning_rate": 1.2246553122465532e-07, "loss": 0.6746, "step": 33496 }, { "epoch": 0.977986044203089, "grad_norm": 0.7349848212402466, "learning_rate": 1.2230332522303327e-07, "loss": 0.6363, "step": 33497 }, { "epoch": 0.9780152404309364, "grad_norm": 0.7458968318765521, "learning_rate": 1.221411192214112e-07, "loss": 0.6719, "step": 33498 }, { "epoch": 0.9780444366587837, "grad_norm": 0.6854466404407666, "learning_rate": 1.2197891321978913e-07, "loss": 0.5987, "step": 33499 }, { "epoch": 0.9780736328866311, "grad_norm": 0.7375915514195283, "learning_rate": 1.2181670721816708e-07, "loss": 0.6555, "step": 33500 }, { "epoch": 0.9781028291144784, "grad_norm": 0.6671564103170343, "learning_rate": 1.2165450121654503e-07, "loss": 0.563, "step": 33501 }, { "epoch": 0.9781320253423258, "grad_norm": 0.7516688668209702, "learning_rate": 1.2149229521492296e-07, "loss": 0.7146, "step": 33502 }, { "epoch": 0.9781612215701732, "grad_norm": 0.7533483396573918, "learning_rate": 1.2133008921330092e-07, "loss": 0.6686, "step": 33503 }, { "epoch": 0.9781904177980205, "grad_norm": 0.6867431593770194, "learning_rate": 1.2116788321167884e-07, "loss": 0.6247, "step": 33504 }, { "epoch": 0.9782196140258679, "grad_norm": 0.7414347789375828, "learning_rate": 1.2100567721005677e-07, "loss": 0.7053, "step": 33505 }, { "epoch": 0.9782488102537152, "grad_norm": 0.6693869421178125, "learning_rate": 1.2084347120843472e-07, "loss": 0.527, "step": 33506 }, { "epoch": 0.9782780064815626, "grad_norm": 0.7244347729323873, "learning_rate": 1.2068126520681268e-07, "loss": 0.661, "step": 33507 }, { "epoch": 0.97830720270941, "grad_norm": 0.7478675138779326, "learning_rate": 1.205190592051906e-07, "loss": 0.6618, "step": 33508 }, { "epoch": 0.9783363989372573, "grad_norm": 0.7388844020672627, "learning_rate": 1.2035685320356853e-07, "loss": 0.6454, "step": 33509 }, { "epoch": 0.9783655951651047, "grad_norm": 0.7247428054589787, "learning_rate": 1.201946472019465e-07, "loss": 0.6428, "step": 33510 }, { "epoch": 0.978394791392952, "grad_norm": 0.7552274532170624, "learning_rate": 1.2003244120032444e-07, "loss": 0.7121, "step": 33511 }, { "epoch": 0.9784239876207994, "grad_norm": 0.7125311470570109, "learning_rate": 1.1987023519870237e-07, "loss": 0.607, "step": 33512 }, { "epoch": 0.9784531838486468, "grad_norm": 0.7086870929871258, "learning_rate": 1.197080291970803e-07, "loss": 0.6042, "step": 33513 }, { "epoch": 0.9784823800764941, "grad_norm": 0.7513202242113509, "learning_rate": 1.1954582319545825e-07, "loss": 0.6899, "step": 33514 }, { "epoch": 0.9785115763043415, "grad_norm": 0.7027618832486938, "learning_rate": 1.1938361719383618e-07, "loss": 0.5964, "step": 33515 }, { "epoch": 0.9785407725321889, "grad_norm": 0.69704957987968, "learning_rate": 1.1922141119221413e-07, "loss": 0.5985, "step": 33516 }, { "epoch": 0.9785699687600362, "grad_norm": 0.7239425755810431, "learning_rate": 1.1905920519059207e-07, "loss": 0.6488, "step": 33517 }, { "epoch": 0.9785991649878836, "grad_norm": 0.7629556546607053, "learning_rate": 1.1889699918897e-07, "loss": 0.7163, "step": 33518 }, { "epoch": 0.9786283612157309, "grad_norm": 0.6286480239591073, "learning_rate": 1.1873479318734794e-07, "loss": 0.5205, "step": 33519 }, { "epoch": 0.9786575574435783, "grad_norm": 0.7101052451287898, "learning_rate": 1.1857258718572588e-07, "loss": 0.5759, "step": 33520 }, { "epoch": 0.9786867536714257, "grad_norm": 0.6877823606650194, "learning_rate": 1.1841038118410381e-07, "loss": 0.5688, "step": 33521 }, { "epoch": 0.978715949899273, "grad_norm": 0.6953106496280496, "learning_rate": 1.1824817518248176e-07, "loss": 0.6339, "step": 33522 }, { "epoch": 0.9787451461271204, "grad_norm": 0.7166228600697478, "learning_rate": 1.180859691808597e-07, "loss": 0.6857, "step": 33523 }, { "epoch": 0.9787743423549677, "grad_norm": 0.8438791849545927, "learning_rate": 1.1792376317923766e-07, "loss": 0.6564, "step": 33524 }, { "epoch": 0.9788035385828151, "grad_norm": 0.7422992923053272, "learning_rate": 1.1776155717761558e-07, "loss": 0.6664, "step": 33525 }, { "epoch": 0.9788327348106625, "grad_norm": 0.7167142385679713, "learning_rate": 1.1759935117599352e-07, "loss": 0.6283, "step": 33526 }, { "epoch": 0.9788619310385098, "grad_norm": 0.7464431607691807, "learning_rate": 1.1743714517437146e-07, "loss": 0.7193, "step": 33527 }, { "epoch": 0.9788911272663572, "grad_norm": 0.6821414283898216, "learning_rate": 1.1727493917274939e-07, "loss": 0.562, "step": 33528 }, { "epoch": 0.9789203234942045, "grad_norm": 0.6866836294296551, "learning_rate": 1.1711273317112735e-07, "loss": 0.593, "step": 33529 }, { "epoch": 0.9789495197220519, "grad_norm": 0.6957703546274902, "learning_rate": 1.1695052716950529e-07, "loss": 0.5976, "step": 33530 }, { "epoch": 0.9789787159498993, "grad_norm": 0.8276778386035153, "learning_rate": 1.1678832116788321e-07, "loss": 0.79, "step": 33531 }, { "epoch": 0.9790079121777466, "grad_norm": 0.7125071919987298, "learning_rate": 1.1662611516626115e-07, "loss": 0.6552, "step": 33532 }, { "epoch": 0.979037108405594, "grad_norm": 0.7231756724146083, "learning_rate": 1.1646390916463911e-07, "loss": 0.6285, "step": 33533 }, { "epoch": 0.9790663046334414, "grad_norm": 0.7770481206276201, "learning_rate": 1.1630170316301705e-07, "loss": 0.6989, "step": 33534 }, { "epoch": 0.9790955008612887, "grad_norm": 0.6960630049494887, "learning_rate": 1.1613949716139498e-07, "loss": 0.5802, "step": 33535 }, { "epoch": 0.9791246970891361, "grad_norm": 0.6870704875008814, "learning_rate": 1.1597729115977292e-07, "loss": 0.5124, "step": 33536 }, { "epoch": 0.9791538933169834, "grad_norm": 0.6944813797787923, "learning_rate": 1.1581508515815087e-07, "loss": 0.5524, "step": 33537 }, { "epoch": 0.9791830895448308, "grad_norm": 0.7397145882702167, "learning_rate": 1.156528791565288e-07, "loss": 0.6941, "step": 33538 }, { "epoch": 0.9792122857726782, "grad_norm": 0.7311410473373695, "learning_rate": 1.1549067315490674e-07, "loss": 0.7432, "step": 33539 }, { "epoch": 0.9792414820005255, "grad_norm": 0.7227217093831171, "learning_rate": 1.1532846715328469e-07, "loss": 0.6374, "step": 33540 }, { "epoch": 0.9792706782283729, "grad_norm": 0.7011783003083611, "learning_rate": 1.1516626115166262e-07, "loss": 0.641, "step": 33541 }, { "epoch": 0.9792998744562202, "grad_norm": 0.700826626749552, "learning_rate": 1.1500405515004056e-07, "loss": 0.6526, "step": 33542 }, { "epoch": 0.9793290706840676, "grad_norm": 0.7326413388238096, "learning_rate": 1.148418491484185e-07, "loss": 0.6614, "step": 33543 }, { "epoch": 0.979358266911915, "grad_norm": 0.6828415899049992, "learning_rate": 1.1467964314679643e-07, "loss": 0.5738, "step": 33544 }, { "epoch": 0.9793874631397623, "grad_norm": 0.7272343609917671, "learning_rate": 1.1451743714517438e-07, "loss": 0.6228, "step": 33545 }, { "epoch": 0.9794166593676097, "grad_norm": 0.7461640523991883, "learning_rate": 1.1435523114355232e-07, "loss": 0.6348, "step": 33546 }, { "epoch": 0.979445855595457, "grad_norm": 0.7012966502483441, "learning_rate": 1.1419302514193026e-07, "loss": 0.6235, "step": 33547 }, { "epoch": 0.9794750518233044, "grad_norm": 0.7584651392909604, "learning_rate": 1.1403081914030819e-07, "loss": 0.6773, "step": 33548 }, { "epoch": 0.9795042480511518, "grad_norm": 0.8452404792105928, "learning_rate": 1.1386861313868614e-07, "loss": 0.7721, "step": 33549 }, { "epoch": 0.9795334442789991, "grad_norm": 0.7495989208490299, "learning_rate": 1.1370640713706409e-07, "loss": 0.6757, "step": 33550 }, { "epoch": 0.9795626405068465, "grad_norm": 0.6999236887480466, "learning_rate": 1.1354420113544201e-07, "loss": 0.6112, "step": 33551 }, { "epoch": 0.9795918367346939, "grad_norm": 0.769290853821975, "learning_rate": 1.1338199513381997e-07, "loss": 0.6333, "step": 33552 }, { "epoch": 0.9796210329625412, "grad_norm": 0.7234461354621782, "learning_rate": 1.1321978913219791e-07, "loss": 0.6549, "step": 33553 }, { "epoch": 0.9796502291903886, "grad_norm": 0.7185027849027055, "learning_rate": 1.1305758313057583e-07, "loss": 0.6206, "step": 33554 }, { "epoch": 0.9796794254182359, "grad_norm": 0.7004936030246253, "learning_rate": 1.1289537712895378e-07, "loss": 0.6256, "step": 33555 }, { "epoch": 0.9797086216460833, "grad_norm": 0.6736524335406211, "learning_rate": 1.1273317112733173e-07, "loss": 0.5635, "step": 33556 }, { "epoch": 0.9797378178739307, "grad_norm": 0.7227039077828628, "learning_rate": 1.1257096512570966e-07, "loss": 0.6377, "step": 33557 }, { "epoch": 0.979767014101778, "grad_norm": 0.6697967326721562, "learning_rate": 1.124087591240876e-07, "loss": 0.5024, "step": 33558 }, { "epoch": 0.9797962103296254, "grad_norm": 0.7280522049122229, "learning_rate": 1.1224655312246554e-07, "loss": 0.6949, "step": 33559 }, { "epoch": 0.9798254065574727, "grad_norm": 0.7213355467469694, "learning_rate": 1.1208434712084349e-07, "loss": 0.6222, "step": 33560 }, { "epoch": 0.9798546027853201, "grad_norm": 0.7202195170859669, "learning_rate": 1.1192214111922142e-07, "loss": 0.6237, "step": 33561 }, { "epoch": 0.9798837990131675, "grad_norm": 0.7200908520746627, "learning_rate": 1.1175993511759936e-07, "loss": 0.638, "step": 33562 }, { "epoch": 0.9799129952410148, "grad_norm": 0.7389842419921894, "learning_rate": 1.115977291159773e-07, "loss": 0.6108, "step": 33563 }, { "epoch": 0.9799421914688622, "grad_norm": 0.7381583661801788, "learning_rate": 1.1143552311435523e-07, "loss": 0.6346, "step": 33564 }, { "epoch": 0.9799713876967096, "grad_norm": 0.717686695098037, "learning_rate": 1.1127331711273318e-07, "loss": 0.6594, "step": 33565 }, { "epoch": 0.9800005839245569, "grad_norm": 0.7337044014692691, "learning_rate": 1.1111111111111112e-07, "loss": 0.67, "step": 33566 }, { "epoch": 0.9800297801524043, "grad_norm": 0.7697272812037803, "learning_rate": 1.1094890510948905e-07, "loss": 0.7244, "step": 33567 }, { "epoch": 0.9800589763802516, "grad_norm": 0.7247356504882249, "learning_rate": 1.10786699107867e-07, "loss": 0.6182, "step": 33568 }, { "epoch": 0.980088172608099, "grad_norm": 0.6837625213845127, "learning_rate": 1.1062449310624494e-07, "loss": 0.5846, "step": 33569 }, { "epoch": 0.9801173688359464, "grad_norm": 0.7506599416797026, "learning_rate": 1.1046228710462288e-07, "loss": 0.68, "step": 33570 }, { "epoch": 0.9801465650637937, "grad_norm": 0.772533224835137, "learning_rate": 1.1030008110300081e-07, "loss": 0.7025, "step": 33571 }, { "epoch": 0.9801757612916411, "grad_norm": 0.6953655132620613, "learning_rate": 1.1013787510137877e-07, "loss": 0.5626, "step": 33572 }, { "epoch": 0.9802049575194884, "grad_norm": 0.6672244341259288, "learning_rate": 1.0997566909975671e-07, "loss": 0.5638, "step": 33573 }, { "epoch": 0.9802341537473358, "grad_norm": 0.7338459275142845, "learning_rate": 1.0981346309813463e-07, "loss": 0.6053, "step": 33574 }, { "epoch": 0.9802633499751832, "grad_norm": 0.6831725601669691, "learning_rate": 1.0965125709651257e-07, "loss": 0.5539, "step": 33575 }, { "epoch": 0.9802925462030305, "grad_norm": 0.7302272903995186, "learning_rate": 1.0948905109489053e-07, "loss": 0.6578, "step": 33576 }, { "epoch": 0.9803217424308779, "grad_norm": 0.7682351204174884, "learning_rate": 1.0932684509326846e-07, "loss": 0.7166, "step": 33577 }, { "epoch": 0.9803509386587252, "grad_norm": 0.6927613345202938, "learning_rate": 1.091646390916464e-07, "loss": 0.5497, "step": 33578 }, { "epoch": 0.9803801348865726, "grad_norm": 0.6984641337151135, "learning_rate": 1.0900243309002434e-07, "loss": 0.5815, "step": 33579 }, { "epoch": 0.98040933111442, "grad_norm": 0.8681252870732229, "learning_rate": 1.0884022708840228e-07, "loss": 0.6568, "step": 33580 }, { "epoch": 0.9804385273422673, "grad_norm": 0.6974982652998926, "learning_rate": 1.0867802108678022e-07, "loss": 0.622, "step": 33581 }, { "epoch": 0.9804677235701147, "grad_norm": 0.7011900312874829, "learning_rate": 1.0851581508515816e-07, "loss": 0.6099, "step": 33582 }, { "epoch": 0.980496919797962, "grad_norm": 0.7522293927690489, "learning_rate": 1.0835360908353611e-07, "loss": 0.713, "step": 33583 }, { "epoch": 0.9805261160258094, "grad_norm": 0.7384930618220469, "learning_rate": 1.0819140308191404e-07, "loss": 0.6645, "step": 33584 }, { "epoch": 0.9805553122536568, "grad_norm": 0.6993187329376651, "learning_rate": 1.0802919708029198e-07, "loss": 0.6259, "step": 33585 }, { "epoch": 0.9805845084815041, "grad_norm": 0.6908005348722542, "learning_rate": 1.0786699107866992e-07, "loss": 0.6345, "step": 33586 }, { "epoch": 0.9806137047093515, "grad_norm": 0.6994332329529137, "learning_rate": 1.0770478507704785e-07, "loss": 0.5923, "step": 33587 }, { "epoch": 0.980642900937199, "grad_norm": 0.7054397296593934, "learning_rate": 1.075425790754258e-07, "loss": 0.6172, "step": 33588 }, { "epoch": 0.9806720971650463, "grad_norm": 0.7462385892219286, "learning_rate": 1.0738037307380374e-07, "loss": 0.6205, "step": 33589 }, { "epoch": 0.9807012933928937, "grad_norm": 0.7039274519037032, "learning_rate": 1.0721816707218167e-07, "loss": 0.5697, "step": 33590 }, { "epoch": 0.980730489620741, "grad_norm": 0.6960455973123608, "learning_rate": 1.0705596107055961e-07, "loss": 0.5937, "step": 33591 }, { "epoch": 0.9807596858485884, "grad_norm": 0.7484352299507511, "learning_rate": 1.0689375506893757e-07, "loss": 0.6441, "step": 33592 }, { "epoch": 0.9807888820764358, "grad_norm": 0.7808474314687355, "learning_rate": 1.067315490673155e-07, "loss": 0.6905, "step": 33593 }, { "epoch": 0.9808180783042831, "grad_norm": 0.93441043478498, "learning_rate": 1.0656934306569343e-07, "loss": 0.6201, "step": 33594 }, { "epoch": 0.9808472745321305, "grad_norm": 0.7129452041077524, "learning_rate": 1.0640713706407139e-07, "loss": 0.6396, "step": 33595 }, { "epoch": 0.9808764707599779, "grad_norm": 0.6989665593929404, "learning_rate": 1.0624493106244933e-07, "loss": 0.6151, "step": 33596 }, { "epoch": 0.9809056669878252, "grad_norm": 0.713649600290961, "learning_rate": 1.0608272506082726e-07, "loss": 0.6963, "step": 33597 }, { "epoch": 0.9809348632156726, "grad_norm": 0.6670997515268423, "learning_rate": 1.059205190592052e-07, "loss": 0.6178, "step": 33598 }, { "epoch": 0.9809640594435199, "grad_norm": 0.7146031380520773, "learning_rate": 1.0575831305758315e-07, "loss": 0.6794, "step": 33599 }, { "epoch": 0.9809932556713673, "grad_norm": 0.8566431014673173, "learning_rate": 1.0559610705596108e-07, "loss": 0.6806, "step": 33600 }, { "epoch": 0.9810224518992147, "grad_norm": 0.7194794927139193, "learning_rate": 1.0543390105433902e-07, "loss": 0.6189, "step": 33601 }, { "epoch": 0.981051648127062, "grad_norm": 0.6833412162063721, "learning_rate": 1.0527169505271696e-07, "loss": 0.5897, "step": 33602 }, { "epoch": 0.9810808443549094, "grad_norm": 0.7176138895077917, "learning_rate": 1.0510948905109489e-07, "loss": 0.653, "step": 33603 }, { "epoch": 0.9811100405827567, "grad_norm": 0.7087739896180125, "learning_rate": 1.0494728304947284e-07, "loss": 0.629, "step": 33604 }, { "epoch": 0.9811392368106041, "grad_norm": 0.8026513185210526, "learning_rate": 1.0478507704785078e-07, "loss": 0.6807, "step": 33605 }, { "epoch": 0.9811684330384515, "grad_norm": 0.8046477685559347, "learning_rate": 1.0462287104622872e-07, "loss": 0.7331, "step": 33606 }, { "epoch": 0.9811976292662988, "grad_norm": 0.7088185906770688, "learning_rate": 1.0446066504460665e-07, "loss": 0.6176, "step": 33607 }, { "epoch": 0.9812268254941462, "grad_norm": 0.7008444910358348, "learning_rate": 1.042984590429846e-07, "loss": 0.6293, "step": 33608 }, { "epoch": 0.9812560217219936, "grad_norm": 0.685168250013813, "learning_rate": 1.0413625304136254e-07, "loss": 0.5364, "step": 33609 }, { "epoch": 0.9812852179498409, "grad_norm": 0.7137012492462524, "learning_rate": 1.0397404703974047e-07, "loss": 0.6231, "step": 33610 }, { "epoch": 0.9813144141776883, "grad_norm": 0.7163261939745127, "learning_rate": 1.0381184103811842e-07, "loss": 0.6286, "step": 33611 }, { "epoch": 0.9813436104055356, "grad_norm": 0.7477695409467447, "learning_rate": 1.0364963503649636e-07, "loss": 0.5788, "step": 33612 }, { "epoch": 0.981372806633383, "grad_norm": 0.7586202637797089, "learning_rate": 1.0348742903487429e-07, "loss": 0.63, "step": 33613 }, { "epoch": 0.9814020028612304, "grad_norm": 0.6830030870652407, "learning_rate": 1.0332522303325223e-07, "loss": 0.5605, "step": 33614 }, { "epoch": 0.9814311990890777, "grad_norm": 0.689786767601945, "learning_rate": 1.0316301703163019e-07, "loss": 0.5954, "step": 33615 }, { "epoch": 0.9814603953169251, "grad_norm": 0.6703927394047442, "learning_rate": 1.0300081103000813e-07, "loss": 0.5705, "step": 33616 }, { "epoch": 0.9814895915447724, "grad_norm": 0.6960794668046336, "learning_rate": 1.0283860502838605e-07, "loss": 0.6134, "step": 33617 }, { "epoch": 0.9815187877726198, "grad_norm": 0.9585767735787637, "learning_rate": 1.02676399026764e-07, "loss": 0.7279, "step": 33618 }, { "epoch": 0.9815479840004672, "grad_norm": 0.7352752461214807, "learning_rate": 1.0251419302514195e-07, "loss": 0.631, "step": 33619 }, { "epoch": 0.9815771802283145, "grad_norm": 0.7025368510393593, "learning_rate": 1.0235198702351988e-07, "loss": 0.5762, "step": 33620 }, { "epoch": 0.9816063764561619, "grad_norm": 0.700417102507626, "learning_rate": 1.0218978102189782e-07, "loss": 0.5825, "step": 33621 }, { "epoch": 0.9816355726840092, "grad_norm": 0.6943005210738682, "learning_rate": 1.0202757502027576e-07, "loss": 0.6035, "step": 33622 }, { "epoch": 0.9816647689118566, "grad_norm": 0.6866083877243712, "learning_rate": 1.018653690186537e-07, "loss": 0.6095, "step": 33623 }, { "epoch": 0.981693965139704, "grad_norm": 0.7111444722376117, "learning_rate": 1.0170316301703164e-07, "loss": 0.6449, "step": 33624 }, { "epoch": 0.9817231613675513, "grad_norm": 0.752235554908161, "learning_rate": 1.0154095701540958e-07, "loss": 0.6902, "step": 33625 }, { "epoch": 0.9817523575953987, "grad_norm": 0.7303138652841851, "learning_rate": 1.0137875101378751e-07, "loss": 0.6314, "step": 33626 }, { "epoch": 0.9817815538232461, "grad_norm": 0.742841820048057, "learning_rate": 1.0121654501216546e-07, "loss": 0.7004, "step": 33627 }, { "epoch": 0.9818107500510934, "grad_norm": 0.7050579537209883, "learning_rate": 1.010543390105434e-07, "loss": 0.6216, "step": 33628 }, { "epoch": 0.9818399462789408, "grad_norm": 0.67754592969554, "learning_rate": 1.0089213300892134e-07, "loss": 0.6258, "step": 33629 }, { "epoch": 0.9818691425067881, "grad_norm": 0.7482253126889441, "learning_rate": 1.0072992700729927e-07, "loss": 0.6546, "step": 33630 }, { "epoch": 0.9818983387346355, "grad_norm": 0.6922724029783167, "learning_rate": 1.0056772100567722e-07, "loss": 0.606, "step": 33631 }, { "epoch": 0.9819275349624829, "grad_norm": 0.7496410737950908, "learning_rate": 1.0040551500405516e-07, "loss": 0.67, "step": 33632 }, { "epoch": 0.9819567311903302, "grad_norm": 0.7468718539124463, "learning_rate": 1.0024330900243309e-07, "loss": 0.6345, "step": 33633 }, { "epoch": 0.9819859274181776, "grad_norm": 0.7338821210243696, "learning_rate": 1.0008110300081103e-07, "loss": 0.6518, "step": 33634 }, { "epoch": 0.982015123646025, "grad_norm": 0.7478932712442423, "learning_rate": 9.991889699918899e-08, "loss": 0.6843, "step": 33635 }, { "epoch": 0.9820443198738723, "grad_norm": 0.6926795279660937, "learning_rate": 9.975669099756691e-08, "loss": 0.5933, "step": 33636 }, { "epoch": 0.9820735161017197, "grad_norm": 0.6503385736635255, "learning_rate": 9.959448499594485e-08, "loss": 0.5411, "step": 33637 }, { "epoch": 0.982102712329567, "grad_norm": 0.6845681822357306, "learning_rate": 9.943227899432281e-08, "loss": 0.5772, "step": 33638 }, { "epoch": 0.9821319085574144, "grad_norm": 0.7374752360392527, "learning_rate": 9.927007299270075e-08, "loss": 0.6731, "step": 33639 }, { "epoch": 0.9821611047852618, "grad_norm": 0.7003864776775213, "learning_rate": 9.910786699107868e-08, "loss": 0.5787, "step": 33640 }, { "epoch": 0.9821903010131091, "grad_norm": 0.765130728940771, "learning_rate": 9.894566098945662e-08, "loss": 0.6754, "step": 33641 }, { "epoch": 0.9822194972409565, "grad_norm": 0.7433193489875958, "learning_rate": 9.878345498783457e-08, "loss": 0.613, "step": 33642 }, { "epoch": 0.9822486934688038, "grad_norm": 0.7680443368281037, "learning_rate": 9.86212489862125e-08, "loss": 0.688, "step": 33643 }, { "epoch": 0.9822778896966512, "grad_norm": 0.8008109896813911, "learning_rate": 9.845904298459044e-08, "loss": 0.7486, "step": 33644 }, { "epoch": 0.9823070859244986, "grad_norm": 0.7594593510424543, "learning_rate": 9.829683698296838e-08, "loss": 0.6974, "step": 33645 }, { "epoch": 0.9823362821523459, "grad_norm": 0.7702802516210209, "learning_rate": 9.81346309813463e-08, "loss": 0.75, "step": 33646 }, { "epoch": 0.9823654783801933, "grad_norm": 0.7265418268141709, "learning_rate": 9.797242497972426e-08, "loss": 0.5914, "step": 33647 }, { "epoch": 0.9823946746080406, "grad_norm": 0.7500599950164873, "learning_rate": 9.78102189781022e-08, "loss": 0.6464, "step": 33648 }, { "epoch": 0.982423870835888, "grad_norm": 0.6776609113856973, "learning_rate": 9.764801297648013e-08, "loss": 0.5886, "step": 33649 }, { "epoch": 0.9824530670637354, "grad_norm": 0.7940219366767929, "learning_rate": 9.748580697485807e-08, "loss": 0.724, "step": 33650 }, { "epoch": 0.9824822632915827, "grad_norm": 0.712249264336647, "learning_rate": 9.732360097323602e-08, "loss": 0.637, "step": 33651 }, { "epoch": 0.9825114595194301, "grad_norm": 0.7010608078420443, "learning_rate": 9.716139497161396e-08, "loss": 0.6141, "step": 33652 }, { "epoch": 0.9825406557472774, "grad_norm": 0.7233722845085412, "learning_rate": 9.699918896999189e-08, "loss": 0.6441, "step": 33653 }, { "epoch": 0.9825698519751248, "grad_norm": 0.8503311683566649, "learning_rate": 9.683698296836984e-08, "loss": 0.7002, "step": 33654 }, { "epoch": 0.9825990482029722, "grad_norm": 0.6983596236905834, "learning_rate": 9.667477696674778e-08, "loss": 0.6243, "step": 33655 }, { "epoch": 0.9826282444308195, "grad_norm": 0.7726572363896287, "learning_rate": 9.651257096512571e-08, "loss": 0.7333, "step": 33656 }, { "epoch": 0.9826574406586669, "grad_norm": 0.7251228165499756, "learning_rate": 9.635036496350365e-08, "loss": 0.6102, "step": 33657 }, { "epoch": 0.9826866368865143, "grad_norm": 0.7696723907523684, "learning_rate": 9.618815896188161e-08, "loss": 0.6646, "step": 33658 }, { "epoch": 0.9827158331143616, "grad_norm": 0.7194275896407808, "learning_rate": 9.602595296025953e-08, "loss": 0.6226, "step": 33659 }, { "epoch": 0.982745029342209, "grad_norm": 0.7884306828731745, "learning_rate": 9.586374695863747e-08, "loss": 0.7193, "step": 33660 }, { "epoch": 0.9827742255700563, "grad_norm": 0.7207865311657627, "learning_rate": 9.570154095701542e-08, "loss": 0.5826, "step": 33661 }, { "epoch": 0.9828034217979037, "grad_norm": 0.7686193665919302, "learning_rate": 9.553933495539334e-08, "loss": 0.7439, "step": 33662 }, { "epoch": 0.9828326180257511, "grad_norm": 0.709077663051794, "learning_rate": 9.53771289537713e-08, "loss": 0.632, "step": 33663 }, { "epoch": 0.9828618142535984, "grad_norm": 0.68119484909098, "learning_rate": 9.521492295214924e-08, "loss": 0.5319, "step": 33664 }, { "epoch": 0.9828910104814458, "grad_norm": 0.7393519926610955, "learning_rate": 9.505271695052718e-08, "loss": 0.6323, "step": 33665 }, { "epoch": 0.9829202067092931, "grad_norm": 0.7536848516445405, "learning_rate": 9.489051094890512e-08, "loss": 0.6813, "step": 33666 }, { "epoch": 0.9829494029371405, "grad_norm": 0.7036733863167502, "learning_rate": 9.472830494728306e-08, "loss": 0.594, "step": 33667 }, { "epoch": 0.9829785991649879, "grad_norm": 0.7248768633097489, "learning_rate": 9.4566098945661e-08, "loss": 0.6339, "step": 33668 }, { "epoch": 0.9830077953928352, "grad_norm": 0.7069676654247238, "learning_rate": 9.440389294403893e-08, "loss": 0.628, "step": 33669 }, { "epoch": 0.9830369916206826, "grad_norm": 0.7056712097074724, "learning_rate": 9.424168694241688e-08, "loss": 0.5834, "step": 33670 }, { "epoch": 0.98306618784853, "grad_norm": 0.7259107547342661, "learning_rate": 9.407948094079482e-08, "loss": 0.6455, "step": 33671 }, { "epoch": 0.9830953840763773, "grad_norm": 0.7116772852418161, "learning_rate": 9.391727493917275e-08, "loss": 0.632, "step": 33672 }, { "epoch": 0.9831245803042247, "grad_norm": 0.7372628061957959, "learning_rate": 9.375506893755069e-08, "loss": 0.6482, "step": 33673 }, { "epoch": 0.983153776532072, "grad_norm": 0.7009691324200066, "learning_rate": 9.359286293592864e-08, "loss": 0.5856, "step": 33674 }, { "epoch": 0.9831829727599194, "grad_norm": 0.7018118595052577, "learning_rate": 9.343065693430658e-08, "loss": 0.5993, "step": 33675 }, { "epoch": 0.9832121689877668, "grad_norm": 0.7038727157006175, "learning_rate": 9.326845093268451e-08, "loss": 0.5882, "step": 33676 }, { "epoch": 0.9832413652156141, "grad_norm": 0.7337364350374672, "learning_rate": 9.310624493106245e-08, "loss": 0.6676, "step": 33677 }, { "epoch": 0.9832705614434615, "grad_norm": 0.7104057356832908, "learning_rate": 9.29440389294404e-08, "loss": 0.6134, "step": 33678 }, { "epoch": 0.9832997576713088, "grad_norm": 0.6589772908853281, "learning_rate": 9.278183292781833e-08, "loss": 0.5548, "step": 33679 }, { "epoch": 0.9833289538991562, "grad_norm": 0.7837971386269279, "learning_rate": 9.261962692619627e-08, "loss": 0.6762, "step": 33680 }, { "epoch": 0.9833581501270036, "grad_norm": 0.7442223645960959, "learning_rate": 9.245742092457423e-08, "loss": 0.6623, "step": 33681 }, { "epoch": 0.9833873463548509, "grad_norm": 0.7703729612607834, "learning_rate": 9.229521492295216e-08, "loss": 0.619, "step": 33682 }, { "epoch": 0.9834165425826983, "grad_norm": 0.631100080422192, "learning_rate": 9.21330089213301e-08, "loss": 0.4892, "step": 33683 }, { "epoch": 0.9834457388105456, "grad_norm": 0.7067505111432355, "learning_rate": 9.197080291970804e-08, "loss": 0.6046, "step": 33684 }, { "epoch": 0.983474935038393, "grad_norm": 0.7274163804548353, "learning_rate": 9.180859691808596e-08, "loss": 0.6218, "step": 33685 }, { "epoch": 0.9835041312662404, "grad_norm": 0.7302251254029364, "learning_rate": 9.164639091646392e-08, "loss": 0.6423, "step": 33686 }, { "epoch": 0.9835333274940877, "grad_norm": 0.6867622708004719, "learning_rate": 9.148418491484186e-08, "loss": 0.6084, "step": 33687 }, { "epoch": 0.9835625237219351, "grad_norm": 0.7877328151569607, "learning_rate": 9.13219789132198e-08, "loss": 0.7487, "step": 33688 }, { "epoch": 0.9835917199497825, "grad_norm": 0.7700683798122268, "learning_rate": 9.115977291159773e-08, "loss": 0.7333, "step": 33689 }, { "epoch": 0.9836209161776298, "grad_norm": 0.7310301707539748, "learning_rate": 9.099756690997568e-08, "loss": 0.6016, "step": 33690 }, { "epoch": 0.9836501124054772, "grad_norm": 0.7100703257678426, "learning_rate": 9.083536090835362e-08, "loss": 0.6102, "step": 33691 }, { "epoch": 0.9836793086333245, "grad_norm": 0.6388247590979629, "learning_rate": 9.067315490673155e-08, "loss": 0.5198, "step": 33692 }, { "epoch": 0.9837085048611719, "grad_norm": 0.7360176938190996, "learning_rate": 9.05109489051095e-08, "loss": 0.6999, "step": 33693 }, { "epoch": 0.9837377010890193, "grad_norm": 0.726798495231955, "learning_rate": 9.034874290348744e-08, "loss": 0.6279, "step": 33694 }, { "epoch": 0.9837668973168666, "grad_norm": 0.6497678148328866, "learning_rate": 9.018653690186537e-08, "loss": 0.5227, "step": 33695 }, { "epoch": 0.983796093544714, "grad_norm": 0.6920330728459374, "learning_rate": 9.002433090024331e-08, "loss": 0.6076, "step": 33696 }, { "epoch": 0.9838252897725613, "grad_norm": 0.7215602844752401, "learning_rate": 8.986212489862126e-08, "loss": 0.4793, "step": 33697 }, { "epoch": 0.9838544860004087, "grad_norm": 0.7058573490741717, "learning_rate": 8.96999188969992e-08, "loss": 0.6364, "step": 33698 }, { "epoch": 0.9838836822282561, "grad_norm": 0.7010989359073957, "learning_rate": 8.953771289537713e-08, "loss": 0.6202, "step": 33699 }, { "epoch": 0.9839128784561034, "grad_norm": 0.7165020483590105, "learning_rate": 8.937550689375507e-08, "loss": 0.6383, "step": 33700 }, { "epoch": 0.9839420746839508, "grad_norm": 0.6992051814107186, "learning_rate": 8.921330089213303e-08, "loss": 0.6018, "step": 33701 }, { "epoch": 0.9839712709117981, "grad_norm": 0.770743041794534, "learning_rate": 8.905109489051095e-08, "loss": 0.5899, "step": 33702 }, { "epoch": 0.9840004671396455, "grad_norm": 0.680747926930063, "learning_rate": 8.88888888888889e-08, "loss": 0.5496, "step": 33703 }, { "epoch": 0.9840296633674929, "grad_norm": 0.7905263943625319, "learning_rate": 8.872668288726684e-08, "loss": 0.7756, "step": 33704 }, { "epoch": 0.9840588595953402, "grad_norm": 0.6845639432033371, "learning_rate": 8.856447688564476e-08, "loss": 0.5505, "step": 33705 }, { "epoch": 0.9840880558231876, "grad_norm": 0.7143751810687651, "learning_rate": 8.840227088402272e-08, "loss": 0.6538, "step": 33706 }, { "epoch": 0.984117252051035, "grad_norm": 0.7274849832386078, "learning_rate": 8.824006488240066e-08, "loss": 0.6392, "step": 33707 }, { "epoch": 0.9841464482788823, "grad_norm": 0.6817915125422965, "learning_rate": 8.807785888077859e-08, "loss": 0.5885, "step": 33708 }, { "epoch": 0.9841756445067298, "grad_norm": 0.7929748404958729, "learning_rate": 8.791565287915654e-08, "loss": 0.6971, "step": 33709 }, { "epoch": 0.9842048407345771, "grad_norm": 0.6669872652709891, "learning_rate": 8.775344687753448e-08, "loss": 0.5621, "step": 33710 }, { "epoch": 0.9842340369624245, "grad_norm": 0.726349317302348, "learning_rate": 8.759124087591242e-08, "loss": 0.6576, "step": 33711 }, { "epoch": 0.9842632331902719, "grad_norm": 0.7478825605338685, "learning_rate": 8.742903487429035e-08, "loss": 0.6368, "step": 33712 }, { "epoch": 0.9842924294181192, "grad_norm": 0.7581417583633503, "learning_rate": 8.72668288726683e-08, "loss": 0.6887, "step": 33713 }, { "epoch": 0.9843216256459666, "grad_norm": 0.7706309070240076, "learning_rate": 8.710462287104624e-08, "loss": 0.6386, "step": 33714 }, { "epoch": 0.984350821873814, "grad_norm": 0.6807049087575443, "learning_rate": 8.694241686942417e-08, "loss": 0.5962, "step": 33715 }, { "epoch": 0.9843800181016613, "grad_norm": 0.7655330717517703, "learning_rate": 8.678021086780211e-08, "loss": 0.7076, "step": 33716 }, { "epoch": 0.9844092143295087, "grad_norm": 0.7377428502108171, "learning_rate": 8.661800486618006e-08, "loss": 0.6791, "step": 33717 }, { "epoch": 0.984438410557356, "grad_norm": 0.760282877293215, "learning_rate": 8.645579886455799e-08, "loss": 0.6501, "step": 33718 }, { "epoch": 0.9844676067852034, "grad_norm": 0.6857921085581078, "learning_rate": 8.629359286293593e-08, "loss": 0.6353, "step": 33719 }, { "epoch": 0.9844968030130508, "grad_norm": 0.7158267204988069, "learning_rate": 8.613138686131387e-08, "loss": 0.648, "step": 33720 }, { "epoch": 0.9845259992408981, "grad_norm": 0.7199798279567794, "learning_rate": 8.596918085969183e-08, "loss": 0.6687, "step": 33721 }, { "epoch": 0.9845551954687455, "grad_norm": 0.8137963057712799, "learning_rate": 8.580697485806975e-08, "loss": 0.6165, "step": 33722 }, { "epoch": 0.9845843916965928, "grad_norm": 0.7237112906488593, "learning_rate": 8.56447688564477e-08, "loss": 0.65, "step": 33723 }, { "epoch": 0.9846135879244402, "grad_norm": 0.754085961055933, "learning_rate": 8.548256285482565e-08, "loss": 0.6269, "step": 33724 }, { "epoch": 0.9846427841522876, "grad_norm": 0.7201919167809676, "learning_rate": 8.532035685320358e-08, "loss": 0.6162, "step": 33725 }, { "epoch": 0.9846719803801349, "grad_norm": 0.7124121552901922, "learning_rate": 8.515815085158152e-08, "loss": 0.6414, "step": 33726 }, { "epoch": 0.9847011766079823, "grad_norm": 0.6951558983802846, "learning_rate": 8.499594484995946e-08, "loss": 0.6029, "step": 33727 }, { "epoch": 0.9847303728358296, "grad_norm": 0.7246634803614219, "learning_rate": 8.483373884833738e-08, "loss": 0.6629, "step": 33728 }, { "epoch": 0.984759569063677, "grad_norm": 0.7007851829038454, "learning_rate": 8.467153284671534e-08, "loss": 0.5992, "step": 33729 }, { "epoch": 0.9847887652915244, "grad_norm": 0.7262182260295513, "learning_rate": 8.450932684509328e-08, "loss": 0.6957, "step": 33730 }, { "epoch": 0.9848179615193717, "grad_norm": 0.7723019601135646, "learning_rate": 8.43471208434712e-08, "loss": 0.6983, "step": 33731 }, { "epoch": 0.9848471577472191, "grad_norm": 0.6692045454731016, "learning_rate": 8.418491484184915e-08, "loss": 0.6108, "step": 33732 }, { "epoch": 0.9848763539750665, "grad_norm": 0.7695927049519691, "learning_rate": 8.40227088402271e-08, "loss": 0.697, "step": 33733 }, { "epoch": 0.9849055502029138, "grad_norm": 0.6655746594834949, "learning_rate": 8.386050283860504e-08, "loss": 0.5424, "step": 33734 }, { "epoch": 0.9849347464307612, "grad_norm": 0.6993719796349699, "learning_rate": 8.369829683698297e-08, "loss": 0.5921, "step": 33735 }, { "epoch": 0.9849639426586085, "grad_norm": 0.6746526122751727, "learning_rate": 8.353609083536092e-08, "loss": 0.5842, "step": 33736 }, { "epoch": 0.9849931388864559, "grad_norm": 0.7197998092764105, "learning_rate": 8.337388483373886e-08, "loss": 0.5978, "step": 33737 }, { "epoch": 0.9850223351143033, "grad_norm": 0.7658433571715408, "learning_rate": 8.321167883211679e-08, "loss": 0.655, "step": 33738 }, { "epoch": 0.9850515313421506, "grad_norm": 0.7420828987858747, "learning_rate": 8.304947283049473e-08, "loss": 0.6809, "step": 33739 }, { "epoch": 0.985080727569998, "grad_norm": 0.7101534289977581, "learning_rate": 8.288726682887269e-08, "loss": 0.6549, "step": 33740 }, { "epoch": 0.9851099237978453, "grad_norm": 0.7079318784037774, "learning_rate": 8.272506082725061e-08, "loss": 0.616, "step": 33741 }, { "epoch": 0.9851391200256927, "grad_norm": 0.7109044629549409, "learning_rate": 8.256285482562855e-08, "loss": 0.6606, "step": 33742 }, { "epoch": 0.9851683162535401, "grad_norm": 0.7072460975466616, "learning_rate": 8.24006488240065e-08, "loss": 0.6227, "step": 33743 }, { "epoch": 0.9851975124813874, "grad_norm": 0.7322980812336024, "learning_rate": 8.223844282238445e-08, "loss": 0.6819, "step": 33744 }, { "epoch": 0.9852267087092348, "grad_norm": 0.7072012033932215, "learning_rate": 8.207623682076237e-08, "loss": 0.6036, "step": 33745 }, { "epoch": 0.9852559049370821, "grad_norm": 0.7405290391247978, "learning_rate": 8.191403081914032e-08, "loss": 0.6336, "step": 33746 }, { "epoch": 0.9852851011649295, "grad_norm": 0.8269420187492271, "learning_rate": 8.175182481751826e-08, "loss": 0.6321, "step": 33747 }, { "epoch": 0.9853142973927769, "grad_norm": 0.6849608841611424, "learning_rate": 8.158961881589618e-08, "loss": 0.5891, "step": 33748 }, { "epoch": 0.9853434936206242, "grad_norm": 0.7570767946057142, "learning_rate": 8.142741281427414e-08, "loss": 0.6319, "step": 33749 }, { "epoch": 0.9853726898484716, "grad_norm": 0.695820454511862, "learning_rate": 8.126520681265208e-08, "loss": 0.5938, "step": 33750 }, { "epoch": 0.985401886076319, "grad_norm": 0.7109045408446856, "learning_rate": 8.110300081103e-08, "loss": 0.6276, "step": 33751 }, { "epoch": 0.9854310823041663, "grad_norm": 0.699487801357283, "learning_rate": 8.094079480940796e-08, "loss": 0.6296, "step": 33752 }, { "epoch": 0.9854602785320137, "grad_norm": 0.7994685882948934, "learning_rate": 8.07785888077859e-08, "loss": 0.6934, "step": 33753 }, { "epoch": 0.985489474759861, "grad_norm": 0.7378752528904965, "learning_rate": 8.061638280616383e-08, "loss": 0.602, "step": 33754 }, { "epoch": 0.9855186709877084, "grad_norm": 0.68320994831499, "learning_rate": 8.045417680454177e-08, "loss": 0.5846, "step": 33755 }, { "epoch": 0.9855478672155558, "grad_norm": 0.7203274194547601, "learning_rate": 8.029197080291972e-08, "loss": 0.6525, "step": 33756 }, { "epoch": 0.9855770634434031, "grad_norm": 0.7173398396762759, "learning_rate": 8.012976480129766e-08, "loss": 0.5918, "step": 33757 }, { "epoch": 0.9856062596712505, "grad_norm": 0.7305158204748461, "learning_rate": 7.996755879967559e-08, "loss": 0.5872, "step": 33758 }, { "epoch": 0.9856354558990978, "grad_norm": 0.6979146948141745, "learning_rate": 7.980535279805353e-08, "loss": 0.561, "step": 33759 }, { "epoch": 0.9856646521269452, "grad_norm": 0.7134543746705176, "learning_rate": 7.964314679643148e-08, "loss": 0.6151, "step": 33760 }, { "epoch": 0.9856938483547926, "grad_norm": 0.6965625489782573, "learning_rate": 7.948094079480941e-08, "loss": 0.6338, "step": 33761 }, { "epoch": 0.9857230445826399, "grad_norm": 0.7324627540184337, "learning_rate": 7.931873479318735e-08, "loss": 0.6299, "step": 33762 }, { "epoch": 0.9857522408104873, "grad_norm": 0.7303754271168145, "learning_rate": 7.915652879156529e-08, "loss": 0.6432, "step": 33763 }, { "epoch": 0.9857814370383347, "grad_norm": 0.7437705613236734, "learning_rate": 7.899432278994323e-08, "loss": 0.6581, "step": 33764 }, { "epoch": 0.985810633266182, "grad_norm": 0.7336840718909974, "learning_rate": 7.883211678832117e-08, "loss": 0.6519, "step": 33765 }, { "epoch": 0.9858398294940294, "grad_norm": 0.7322763594345228, "learning_rate": 7.866991078669911e-08, "loss": 0.621, "step": 33766 }, { "epoch": 0.9858690257218767, "grad_norm": 0.7267716756925724, "learning_rate": 7.850770478507707e-08, "loss": 0.6661, "step": 33767 }, { "epoch": 0.9858982219497241, "grad_norm": 0.7215083403839582, "learning_rate": 7.8345498783455e-08, "loss": 0.6062, "step": 33768 }, { "epoch": 0.9859274181775715, "grad_norm": 0.7063556364019815, "learning_rate": 7.818329278183294e-08, "loss": 0.6132, "step": 33769 }, { "epoch": 0.9859566144054188, "grad_norm": 0.7360445260414097, "learning_rate": 7.802108678021088e-08, "loss": 0.6425, "step": 33770 }, { "epoch": 0.9859858106332662, "grad_norm": 0.6893172477177647, "learning_rate": 7.785888077858882e-08, "loss": 0.6012, "step": 33771 }, { "epoch": 0.9860150068611135, "grad_norm": 0.663546902970871, "learning_rate": 7.769667477696676e-08, "loss": 0.56, "step": 33772 }, { "epoch": 0.9860442030889609, "grad_norm": 0.7742622764593993, "learning_rate": 7.753446877534469e-08, "loss": 0.6461, "step": 33773 }, { "epoch": 0.9860733993168083, "grad_norm": 0.7440105720071009, "learning_rate": 7.737226277372264e-08, "loss": 0.6728, "step": 33774 }, { "epoch": 0.9861025955446556, "grad_norm": 0.7780074415070779, "learning_rate": 7.721005677210057e-08, "loss": 0.7139, "step": 33775 }, { "epoch": 0.986131791772503, "grad_norm": 0.8034019018029139, "learning_rate": 7.704785077047852e-08, "loss": 0.6409, "step": 33776 }, { "epoch": 0.9861609880003503, "grad_norm": 0.7259841448624013, "learning_rate": 7.688564476885645e-08, "loss": 0.646, "step": 33777 }, { "epoch": 0.9861901842281977, "grad_norm": 0.7389067904066833, "learning_rate": 7.672343876723439e-08, "loss": 0.6602, "step": 33778 }, { "epoch": 0.9862193804560451, "grad_norm": 0.7492064628238625, "learning_rate": 7.656123276561234e-08, "loss": 0.5816, "step": 33779 }, { "epoch": 0.9862485766838924, "grad_norm": 0.6833511100999694, "learning_rate": 7.639902676399027e-08, "loss": 0.6055, "step": 33780 }, { "epoch": 0.9862777729117398, "grad_norm": 0.7053364918318967, "learning_rate": 7.623682076236821e-08, "loss": 0.6707, "step": 33781 }, { "epoch": 0.9863069691395872, "grad_norm": 0.6990323729005714, "learning_rate": 7.607461476074615e-08, "loss": 0.5555, "step": 33782 }, { "epoch": 0.9863361653674345, "grad_norm": 0.7092112262146122, "learning_rate": 7.591240875912409e-08, "loss": 0.6313, "step": 33783 }, { "epoch": 0.9863653615952819, "grad_norm": 0.7228244563916554, "learning_rate": 7.575020275750203e-08, "loss": 0.6517, "step": 33784 }, { "epoch": 0.9863945578231292, "grad_norm": 0.6694909992684385, "learning_rate": 7.558799675587997e-08, "loss": 0.5858, "step": 33785 }, { "epoch": 0.9864237540509766, "grad_norm": 0.7808200879387684, "learning_rate": 7.542579075425791e-08, "loss": 0.6749, "step": 33786 }, { "epoch": 0.986452950278824, "grad_norm": 0.6892670618095731, "learning_rate": 7.526358475263585e-08, "loss": 0.5844, "step": 33787 }, { "epoch": 0.9864821465066713, "grad_norm": 0.7940165414956197, "learning_rate": 7.51013787510138e-08, "loss": 0.7736, "step": 33788 }, { "epoch": 0.9865113427345187, "grad_norm": 0.7361567506150046, "learning_rate": 7.493917274939174e-08, "loss": 0.6436, "step": 33789 }, { "epoch": 0.986540538962366, "grad_norm": 0.6894655734532965, "learning_rate": 7.477696674776968e-08, "loss": 0.6249, "step": 33790 }, { "epoch": 0.9865697351902134, "grad_norm": 0.735666462038299, "learning_rate": 7.46147607461476e-08, "loss": 0.6674, "step": 33791 }, { "epoch": 0.9865989314180608, "grad_norm": 0.718587745737425, "learning_rate": 7.445255474452556e-08, "loss": 0.657, "step": 33792 }, { "epoch": 0.9866281276459081, "grad_norm": 0.6812814724549062, "learning_rate": 7.42903487429035e-08, "loss": 0.5592, "step": 33793 }, { "epoch": 0.9866573238737555, "grad_norm": 0.7265270333239543, "learning_rate": 7.412814274128144e-08, "loss": 0.6826, "step": 33794 }, { "epoch": 0.9866865201016028, "grad_norm": 0.7355665416937203, "learning_rate": 7.396593673965938e-08, "loss": 0.6553, "step": 33795 }, { "epoch": 0.9867157163294502, "grad_norm": 0.6804312691743967, "learning_rate": 7.380373073803731e-08, "loss": 0.6104, "step": 33796 }, { "epoch": 0.9867449125572976, "grad_norm": 0.6768790129271152, "learning_rate": 7.364152473641526e-08, "loss": 0.5738, "step": 33797 }, { "epoch": 0.9867741087851449, "grad_norm": 0.7296156412598992, "learning_rate": 7.347931873479319e-08, "loss": 0.6576, "step": 33798 }, { "epoch": 0.9868033050129923, "grad_norm": 0.7035515819974097, "learning_rate": 7.331711273317113e-08, "loss": 0.5962, "step": 33799 }, { "epoch": 0.9868325012408397, "grad_norm": 0.7383679124776653, "learning_rate": 7.315490673154907e-08, "loss": 0.6557, "step": 33800 }, { "epoch": 0.986861697468687, "grad_norm": 0.7471446321491354, "learning_rate": 7.299270072992701e-08, "loss": 0.6185, "step": 33801 }, { "epoch": 0.9868908936965344, "grad_norm": 0.7661421271944109, "learning_rate": 7.283049472830495e-08, "loss": 0.6456, "step": 33802 }, { "epoch": 0.9869200899243817, "grad_norm": 0.722507304099359, "learning_rate": 7.266828872668289e-08, "loss": 0.6297, "step": 33803 }, { "epoch": 0.9869492861522291, "grad_norm": 0.7212142470520513, "learning_rate": 7.250608272506083e-08, "loss": 0.6793, "step": 33804 }, { "epoch": 0.9869784823800765, "grad_norm": 0.7083122003809984, "learning_rate": 7.234387672343877e-08, "loss": 0.6214, "step": 33805 }, { "epoch": 0.9870076786079238, "grad_norm": 0.7363272475661543, "learning_rate": 7.218167072181671e-08, "loss": 0.6791, "step": 33806 }, { "epoch": 0.9870368748357712, "grad_norm": 0.7038509632353332, "learning_rate": 7.201946472019465e-08, "loss": 0.5839, "step": 33807 }, { "epoch": 0.9870660710636185, "grad_norm": 0.7114329486813612, "learning_rate": 7.18572587185726e-08, "loss": 0.6182, "step": 33808 }, { "epoch": 0.9870952672914659, "grad_norm": 0.6648764893876368, "learning_rate": 7.169505271695054e-08, "loss": 0.5572, "step": 33809 }, { "epoch": 0.9871244635193133, "grad_norm": 0.7000689800556873, "learning_rate": 7.153284671532848e-08, "loss": 0.587, "step": 33810 }, { "epoch": 0.9871536597471606, "grad_norm": 0.7965657615498722, "learning_rate": 7.137064071370642e-08, "loss": 0.6909, "step": 33811 }, { "epoch": 0.987182855975008, "grad_norm": 0.7160627275156721, "learning_rate": 7.120843471208436e-08, "loss": 0.6345, "step": 33812 }, { "epoch": 0.9872120522028554, "grad_norm": 0.6873921112700677, "learning_rate": 7.10462287104623e-08, "loss": 0.5698, "step": 33813 }, { "epoch": 0.9872412484307027, "grad_norm": 0.7520835253999067, "learning_rate": 7.088402270884023e-08, "loss": 0.5885, "step": 33814 }, { "epoch": 0.9872704446585501, "grad_norm": 0.7215740222151259, "learning_rate": 7.072181670721818e-08, "loss": 0.6446, "step": 33815 }, { "epoch": 0.9872996408863974, "grad_norm": 0.6940239457675075, "learning_rate": 7.05596107055961e-08, "loss": 0.5959, "step": 33816 }, { "epoch": 0.9873288371142448, "grad_norm": 0.7165544096803639, "learning_rate": 7.039740470397406e-08, "loss": 0.5916, "step": 33817 }, { "epoch": 0.9873580333420922, "grad_norm": 0.7208085290518258, "learning_rate": 7.023519870235199e-08, "loss": 0.6322, "step": 33818 }, { "epoch": 0.9873872295699395, "grad_norm": 0.6964259901771492, "learning_rate": 7.007299270072993e-08, "loss": 0.628, "step": 33819 }, { "epoch": 0.9874164257977869, "grad_norm": 0.7117306470086927, "learning_rate": 6.991078669910787e-08, "loss": 0.6581, "step": 33820 }, { "epoch": 0.9874456220256342, "grad_norm": 0.7243391612902382, "learning_rate": 6.974858069748581e-08, "loss": 0.6022, "step": 33821 }, { "epoch": 0.9874748182534816, "grad_norm": 0.712058891081837, "learning_rate": 6.958637469586375e-08, "loss": 0.5947, "step": 33822 }, { "epoch": 0.987504014481329, "grad_norm": 0.7200831493244799, "learning_rate": 6.942416869424169e-08, "loss": 0.6363, "step": 33823 }, { "epoch": 0.9875332107091763, "grad_norm": 0.6642792116866078, "learning_rate": 6.926196269261963e-08, "loss": 0.5529, "step": 33824 }, { "epoch": 0.9875624069370237, "grad_norm": 0.6827303709391767, "learning_rate": 6.909975669099757e-08, "loss": 0.5668, "step": 33825 }, { "epoch": 0.987591603164871, "grad_norm": 0.6824708956130657, "learning_rate": 6.893755068937551e-08, "loss": 0.5856, "step": 33826 }, { "epoch": 0.9876207993927184, "grad_norm": 0.7148453733922676, "learning_rate": 6.877534468775345e-08, "loss": 0.6076, "step": 33827 }, { "epoch": 0.9876499956205658, "grad_norm": 0.7029728963922308, "learning_rate": 6.86131386861314e-08, "loss": 0.6049, "step": 33828 }, { "epoch": 0.9876791918484132, "grad_norm": 0.6913109614998832, "learning_rate": 6.845093268450933e-08, "loss": 0.6165, "step": 33829 }, { "epoch": 0.9877083880762606, "grad_norm": 0.7186068957789147, "learning_rate": 6.828872668288728e-08, "loss": 0.6517, "step": 33830 }, { "epoch": 0.987737584304108, "grad_norm": 0.7310955171138893, "learning_rate": 6.812652068126522e-08, "loss": 0.6929, "step": 33831 }, { "epoch": 0.9877667805319553, "grad_norm": 0.7343212455517963, "learning_rate": 6.796431467964314e-08, "loss": 0.6544, "step": 33832 }, { "epoch": 0.9877959767598027, "grad_norm": 0.7007836582758303, "learning_rate": 6.78021086780211e-08, "loss": 0.5869, "step": 33833 }, { "epoch": 0.98782517298765, "grad_norm": 0.7263341937910311, "learning_rate": 6.763990267639902e-08, "loss": 0.6563, "step": 33834 }, { "epoch": 0.9878543692154974, "grad_norm": 0.7798131770965183, "learning_rate": 6.747769667477698e-08, "loss": 0.7483, "step": 33835 }, { "epoch": 0.9878835654433448, "grad_norm": 0.7519290477211167, "learning_rate": 6.731549067315492e-08, "loss": 0.636, "step": 33836 }, { "epoch": 0.9879127616711921, "grad_norm": 0.6814399145715513, "learning_rate": 6.715328467153285e-08, "loss": 0.5901, "step": 33837 }, { "epoch": 0.9879419578990395, "grad_norm": 0.746543704948418, "learning_rate": 6.69910786699108e-08, "loss": 0.6543, "step": 33838 }, { "epoch": 0.9879711541268869, "grad_norm": 0.7323612965734928, "learning_rate": 6.682887266828873e-08, "loss": 0.6202, "step": 33839 }, { "epoch": 0.9880003503547342, "grad_norm": 0.6771961821909426, "learning_rate": 6.666666666666668e-08, "loss": 0.567, "step": 33840 }, { "epoch": 0.9880295465825816, "grad_norm": 0.7504112162847214, "learning_rate": 6.650446066504461e-08, "loss": 0.6951, "step": 33841 }, { "epoch": 0.9880587428104289, "grad_norm": 0.7049980236566034, "learning_rate": 6.634225466342255e-08, "loss": 0.6296, "step": 33842 }, { "epoch": 0.9880879390382763, "grad_norm": 0.7588459622642594, "learning_rate": 6.618004866180049e-08, "loss": 0.6881, "step": 33843 }, { "epoch": 0.9881171352661237, "grad_norm": 0.777874783667544, "learning_rate": 6.601784266017843e-08, "loss": 0.612, "step": 33844 }, { "epoch": 0.988146331493971, "grad_norm": 0.7890250311643315, "learning_rate": 6.585563665855637e-08, "loss": 0.7141, "step": 33845 }, { "epoch": 0.9881755277218184, "grad_norm": 0.7712868134360863, "learning_rate": 6.569343065693431e-08, "loss": 0.5854, "step": 33846 }, { "epoch": 0.9882047239496657, "grad_norm": 0.6938874230893282, "learning_rate": 6.553122465531225e-08, "loss": 0.6165, "step": 33847 }, { "epoch": 0.9882339201775131, "grad_norm": 0.7173052017484499, "learning_rate": 6.536901865369019e-08, "loss": 0.5956, "step": 33848 }, { "epoch": 0.9882631164053605, "grad_norm": 0.7828664146026283, "learning_rate": 6.520681265206813e-08, "loss": 0.6053, "step": 33849 }, { "epoch": 0.9882923126332078, "grad_norm": 0.7285251930706853, "learning_rate": 6.504460665044607e-08, "loss": 0.6749, "step": 33850 }, { "epoch": 0.9883215088610552, "grad_norm": 0.6936023272128912, "learning_rate": 6.488240064882401e-08, "loss": 0.6297, "step": 33851 }, { "epoch": 0.9883507050889025, "grad_norm": 0.6549679719837596, "learning_rate": 6.472019464720196e-08, "loss": 0.5287, "step": 33852 }, { "epoch": 0.9883799013167499, "grad_norm": 0.7598345019226321, "learning_rate": 6.45579886455799e-08, "loss": 0.6539, "step": 33853 }, { "epoch": 0.9884090975445973, "grad_norm": 0.6875666542949781, "learning_rate": 6.439578264395784e-08, "loss": 0.6199, "step": 33854 }, { "epoch": 0.9884382937724446, "grad_norm": 0.7091249368154618, "learning_rate": 6.423357664233576e-08, "loss": 0.61, "step": 33855 }, { "epoch": 0.988467490000292, "grad_norm": 0.7126889963147578, "learning_rate": 6.407137064071372e-08, "loss": 0.6552, "step": 33856 }, { "epoch": 0.9884966862281394, "grad_norm": 0.749267551211066, "learning_rate": 6.390916463909165e-08, "loss": 0.6695, "step": 33857 }, { "epoch": 0.9885258824559867, "grad_norm": 0.7003523043281865, "learning_rate": 6.37469586374696e-08, "loss": 0.5989, "step": 33858 }, { "epoch": 0.9885550786838341, "grad_norm": 0.6964807806200889, "learning_rate": 6.358475263584753e-08, "loss": 0.536, "step": 33859 }, { "epoch": 0.9885842749116814, "grad_norm": 0.6788426679037793, "learning_rate": 6.342254663422547e-08, "loss": 0.5853, "step": 33860 }, { "epoch": 0.9886134711395288, "grad_norm": 0.7172052263414005, "learning_rate": 6.326034063260341e-08, "loss": 0.6137, "step": 33861 }, { "epoch": 0.9886426673673762, "grad_norm": 0.6982242481979181, "learning_rate": 6.309813463098135e-08, "loss": 0.5879, "step": 33862 }, { "epoch": 0.9886718635952235, "grad_norm": 0.7614605471119485, "learning_rate": 6.293592862935929e-08, "loss": 0.668, "step": 33863 }, { "epoch": 0.9887010598230709, "grad_norm": 0.7325536843676324, "learning_rate": 6.277372262773723e-08, "loss": 0.6874, "step": 33864 }, { "epoch": 0.9887302560509182, "grad_norm": 0.7331225323395562, "learning_rate": 6.261151662611517e-08, "loss": 0.6209, "step": 33865 }, { "epoch": 0.9887594522787656, "grad_norm": 0.7135782324187633, "learning_rate": 6.244931062449311e-08, "loss": 0.6328, "step": 33866 }, { "epoch": 0.988788648506613, "grad_norm": 0.7057424421890568, "learning_rate": 6.228710462287105e-08, "loss": 0.5928, "step": 33867 }, { "epoch": 0.9888178447344603, "grad_norm": 0.7729035526209835, "learning_rate": 6.212489862124899e-08, "loss": 0.6872, "step": 33868 }, { "epoch": 0.9888470409623077, "grad_norm": 0.668776467306115, "learning_rate": 6.196269261962693e-08, "loss": 0.5491, "step": 33869 }, { "epoch": 0.988876237190155, "grad_norm": 0.7130718312137987, "learning_rate": 6.180048661800487e-08, "loss": 0.6314, "step": 33870 }, { "epoch": 0.9889054334180024, "grad_norm": 0.7146220047433323, "learning_rate": 6.163828061638281e-08, "loss": 0.6369, "step": 33871 }, { "epoch": 0.9889346296458498, "grad_norm": 0.7404332133544244, "learning_rate": 6.147607461476075e-08, "loss": 0.6541, "step": 33872 }, { "epoch": 0.9889638258736971, "grad_norm": 0.7455375923128696, "learning_rate": 6.131386861313868e-08, "loss": 0.6861, "step": 33873 }, { "epoch": 0.9889930221015445, "grad_norm": 0.7299371210205672, "learning_rate": 6.115166261151664e-08, "loss": 0.6842, "step": 33874 }, { "epoch": 0.9890222183293919, "grad_norm": 0.6863484295227945, "learning_rate": 6.098945660989456e-08, "loss": 0.599, "step": 33875 }, { "epoch": 0.9890514145572392, "grad_norm": 0.7258805210329234, "learning_rate": 6.082725060827252e-08, "loss": 0.5543, "step": 33876 }, { "epoch": 0.9890806107850866, "grad_norm": 0.7673648765851184, "learning_rate": 6.066504460665046e-08, "loss": 0.6996, "step": 33877 }, { "epoch": 0.9891098070129339, "grad_norm": 0.7065295887663043, "learning_rate": 6.050283860502839e-08, "loss": 0.6063, "step": 33878 }, { "epoch": 0.9891390032407813, "grad_norm": 0.7237712462404253, "learning_rate": 6.034063260340634e-08, "loss": 0.6533, "step": 33879 }, { "epoch": 0.9891681994686287, "grad_norm": 0.7534730732677115, "learning_rate": 6.017842660178427e-08, "loss": 0.6547, "step": 33880 }, { "epoch": 0.989197395696476, "grad_norm": 0.773593106412159, "learning_rate": 6.001622060016222e-08, "loss": 0.7457, "step": 33881 }, { "epoch": 0.9892265919243234, "grad_norm": 0.7097222762280122, "learning_rate": 5.985401459854015e-08, "loss": 0.5723, "step": 33882 }, { "epoch": 0.9892557881521707, "grad_norm": 0.6759970925073839, "learning_rate": 5.969180859691809e-08, "loss": 0.5455, "step": 33883 }, { "epoch": 0.9892849843800181, "grad_norm": 0.7337734920088412, "learning_rate": 5.9529602595296036e-08, "loss": 0.6622, "step": 33884 }, { "epoch": 0.9893141806078655, "grad_norm": 0.6885652089653869, "learning_rate": 5.936739659367397e-08, "loss": 0.5813, "step": 33885 }, { "epoch": 0.9893433768357128, "grad_norm": 0.7276092096635667, "learning_rate": 5.9205190592051904e-08, "loss": 0.6616, "step": 33886 }, { "epoch": 0.9893725730635602, "grad_norm": 0.7298172383429137, "learning_rate": 5.904298459042985e-08, "loss": 0.6348, "step": 33887 }, { "epoch": 0.9894017692914076, "grad_norm": 0.685893313942312, "learning_rate": 5.888077858880779e-08, "loss": 0.59, "step": 33888 }, { "epoch": 0.9894309655192549, "grad_norm": 0.7536160937717908, "learning_rate": 5.871857258718573e-08, "loss": 0.7321, "step": 33889 }, { "epoch": 0.9894601617471023, "grad_norm": 0.717401476891478, "learning_rate": 5.855636658556367e-08, "loss": 0.6414, "step": 33890 }, { "epoch": 0.9894893579749496, "grad_norm": 0.7425420831620237, "learning_rate": 5.839416058394161e-08, "loss": 0.6656, "step": 33891 }, { "epoch": 0.989518554202797, "grad_norm": 0.732105777480064, "learning_rate": 5.8231954582319554e-08, "loss": 0.6474, "step": 33892 }, { "epoch": 0.9895477504306444, "grad_norm": 0.717011167706833, "learning_rate": 5.806974858069749e-08, "loss": 0.6217, "step": 33893 }, { "epoch": 0.9895769466584917, "grad_norm": 0.8045944162658802, "learning_rate": 5.7907542579075435e-08, "loss": 0.8093, "step": 33894 }, { "epoch": 0.9896061428863391, "grad_norm": 0.8248283631732763, "learning_rate": 5.774533657745337e-08, "loss": 0.7477, "step": 33895 }, { "epoch": 0.9896353391141864, "grad_norm": 0.7230386463155094, "learning_rate": 5.758313057583131e-08, "loss": 0.6756, "step": 33896 }, { "epoch": 0.9896645353420338, "grad_norm": 0.714643854036862, "learning_rate": 5.742092457420925e-08, "loss": 0.6178, "step": 33897 }, { "epoch": 0.9896937315698812, "grad_norm": 0.7350297729397235, "learning_rate": 5.725871857258719e-08, "loss": 0.5896, "step": 33898 }, { "epoch": 0.9897229277977285, "grad_norm": 0.6781160691052579, "learning_rate": 5.709651257096513e-08, "loss": 0.5835, "step": 33899 }, { "epoch": 0.9897521240255759, "grad_norm": 0.6979737686352746, "learning_rate": 5.693430656934307e-08, "loss": 0.5902, "step": 33900 }, { "epoch": 0.9897813202534232, "grad_norm": 1.7362400016124682, "learning_rate": 5.6772100567721006e-08, "loss": 0.8031, "step": 33901 }, { "epoch": 0.9898105164812706, "grad_norm": 0.7176957531689064, "learning_rate": 5.6609894566098954e-08, "loss": 0.5933, "step": 33902 }, { "epoch": 0.989839712709118, "grad_norm": 0.7266913100168928, "learning_rate": 5.644768856447689e-08, "loss": 0.6822, "step": 33903 }, { "epoch": 0.9898689089369653, "grad_norm": 0.7751390896078721, "learning_rate": 5.628548256285483e-08, "loss": 0.7009, "step": 33904 }, { "epoch": 0.9898981051648127, "grad_norm": 0.7546282383245646, "learning_rate": 5.612327656123277e-08, "loss": 0.6498, "step": 33905 }, { "epoch": 0.98992730139266, "grad_norm": 0.8062645793087523, "learning_rate": 5.596107055961071e-08, "loss": 0.6948, "step": 33906 }, { "epoch": 0.9899564976205074, "grad_norm": 0.7241587200416355, "learning_rate": 5.579886455798865e-08, "loss": 0.6255, "step": 33907 }, { "epoch": 0.9899856938483548, "grad_norm": 0.7364727584082889, "learning_rate": 5.563665855636659e-08, "loss": 0.713, "step": 33908 }, { "epoch": 0.9900148900762021, "grad_norm": 0.7639339682674902, "learning_rate": 5.5474452554744525e-08, "loss": 0.6416, "step": 33909 }, { "epoch": 0.9900440863040495, "grad_norm": 0.7772577635716724, "learning_rate": 5.531224655312247e-08, "loss": 0.7581, "step": 33910 }, { "epoch": 0.9900732825318969, "grad_norm": 0.7016998676642717, "learning_rate": 5.5150040551500406e-08, "loss": 0.6544, "step": 33911 }, { "epoch": 0.9901024787597442, "grad_norm": 0.7171549790823639, "learning_rate": 5.4987834549878353e-08, "loss": 0.6058, "step": 33912 }, { "epoch": 0.9901316749875916, "grad_norm": 0.7323129209438535, "learning_rate": 5.482562854825629e-08, "loss": 0.72, "step": 33913 }, { "epoch": 0.9901608712154389, "grad_norm": 0.6946832133583601, "learning_rate": 5.466342254663423e-08, "loss": 0.6135, "step": 33914 }, { "epoch": 0.9901900674432863, "grad_norm": 0.7458530573882218, "learning_rate": 5.450121654501217e-08, "loss": 0.6074, "step": 33915 }, { "epoch": 0.9902192636711337, "grad_norm": 0.7087465787640872, "learning_rate": 5.433901054339011e-08, "loss": 0.5845, "step": 33916 }, { "epoch": 0.990248459898981, "grad_norm": 0.7200794955797873, "learning_rate": 5.4176804541768056e-08, "loss": 0.6505, "step": 33917 }, { "epoch": 0.9902776561268284, "grad_norm": 0.7291341425705167, "learning_rate": 5.401459854014599e-08, "loss": 0.6522, "step": 33918 }, { "epoch": 0.9903068523546757, "grad_norm": 0.7112897631414519, "learning_rate": 5.3852392538523924e-08, "loss": 0.601, "step": 33919 }, { "epoch": 0.9903360485825231, "grad_norm": 0.692107383187249, "learning_rate": 5.369018653690187e-08, "loss": 0.578, "step": 33920 }, { "epoch": 0.9903652448103705, "grad_norm": 0.7540472124548583, "learning_rate": 5.3527980535279806e-08, "loss": 0.6, "step": 33921 }, { "epoch": 0.9903944410382178, "grad_norm": 0.7538314812900053, "learning_rate": 5.336577453365775e-08, "loss": 0.7048, "step": 33922 }, { "epoch": 0.9904236372660652, "grad_norm": 0.6736391945871578, "learning_rate": 5.3203568532035694e-08, "loss": 0.5773, "step": 33923 }, { "epoch": 0.9904528334939126, "grad_norm": 0.654648932323894, "learning_rate": 5.304136253041363e-08, "loss": 0.5688, "step": 33924 }, { "epoch": 0.9904820297217599, "grad_norm": 0.6791311053421656, "learning_rate": 5.2879156528791575e-08, "loss": 0.5567, "step": 33925 }, { "epoch": 0.9905112259496073, "grad_norm": 0.7209069344446608, "learning_rate": 5.271695052716951e-08, "loss": 0.6618, "step": 33926 }, { "epoch": 0.9905404221774546, "grad_norm": 0.7811543513349005, "learning_rate": 5.255474452554744e-08, "loss": 0.6931, "step": 33927 }, { "epoch": 0.990569618405302, "grad_norm": 0.7849028060440594, "learning_rate": 5.239253852392539e-08, "loss": 0.697, "step": 33928 }, { "epoch": 0.9905988146331494, "grad_norm": 0.698922838119105, "learning_rate": 5.2230332522303324e-08, "loss": 0.6071, "step": 33929 }, { "epoch": 0.9906280108609967, "grad_norm": 0.7815769483453384, "learning_rate": 5.206812652068127e-08, "loss": 0.6835, "step": 33930 }, { "epoch": 0.9906572070888441, "grad_norm": 0.690217718853629, "learning_rate": 5.190592051905921e-08, "loss": 0.6208, "step": 33931 }, { "epoch": 0.9906864033166914, "grad_norm": 0.7455903431549961, "learning_rate": 5.1743714517437146e-08, "loss": 0.6073, "step": 33932 }, { "epoch": 0.9907155995445388, "grad_norm": 0.7202521019846753, "learning_rate": 5.158150851581509e-08, "loss": 0.6851, "step": 33933 }, { "epoch": 0.9907447957723862, "grad_norm": 0.6749371493616048, "learning_rate": 5.141930251419303e-08, "loss": 0.5771, "step": 33934 }, { "epoch": 0.9907739920002335, "grad_norm": 0.7157025591534604, "learning_rate": 5.1257096512570974e-08, "loss": 0.6684, "step": 33935 }, { "epoch": 0.9908031882280809, "grad_norm": 0.7506073525906299, "learning_rate": 5.109489051094891e-08, "loss": 0.6261, "step": 33936 }, { "epoch": 0.9908323844559283, "grad_norm": 0.7061706410138212, "learning_rate": 5.093268450932685e-08, "loss": 0.5876, "step": 33937 }, { "epoch": 0.9908615806837756, "grad_norm": 0.7287676301117733, "learning_rate": 5.077047850770479e-08, "loss": 0.6191, "step": 33938 }, { "epoch": 0.990890776911623, "grad_norm": 0.6866768009894835, "learning_rate": 5.060827250608273e-08, "loss": 0.5954, "step": 33939 }, { "epoch": 0.9909199731394703, "grad_norm": 0.6860722260405038, "learning_rate": 5.044606650446067e-08, "loss": 0.6088, "step": 33940 }, { "epoch": 0.9909491693673177, "grad_norm": 0.7817104279104429, "learning_rate": 5.028386050283861e-08, "loss": 0.6356, "step": 33941 }, { "epoch": 0.9909783655951651, "grad_norm": 0.772653873152646, "learning_rate": 5.0121654501216546e-08, "loss": 0.6345, "step": 33942 }, { "epoch": 0.9910075618230124, "grad_norm": 0.7204121633748992, "learning_rate": 4.995944849959449e-08, "loss": 0.6294, "step": 33943 }, { "epoch": 0.9910367580508598, "grad_norm": 0.6625632149284725, "learning_rate": 4.979724249797243e-08, "loss": 0.5476, "step": 33944 }, { "epoch": 0.9910659542787071, "grad_norm": 0.7382104136885163, "learning_rate": 4.9635036496350374e-08, "loss": 0.6617, "step": 33945 }, { "epoch": 0.9910951505065545, "grad_norm": 0.790160430176862, "learning_rate": 4.947283049472831e-08, "loss": 0.7339, "step": 33946 }, { "epoch": 0.9911243467344019, "grad_norm": 0.7537963053158689, "learning_rate": 4.931062449310625e-08, "loss": 0.7091, "step": 33947 }, { "epoch": 0.9911535429622492, "grad_norm": 0.8228864194044605, "learning_rate": 4.914841849148419e-08, "loss": 0.7017, "step": 33948 }, { "epoch": 0.9911827391900966, "grad_norm": 0.7327831330311911, "learning_rate": 4.898621248986213e-08, "loss": 0.6311, "step": 33949 }, { "epoch": 0.9912119354179441, "grad_norm": 0.717018909723493, "learning_rate": 4.8824006488240064e-08, "loss": 0.6837, "step": 33950 }, { "epoch": 0.9912411316457914, "grad_norm": 0.7025643596686354, "learning_rate": 4.866180048661801e-08, "loss": 0.5456, "step": 33951 }, { "epoch": 0.9912703278736388, "grad_norm": 0.7406720805742978, "learning_rate": 4.8499594484995945e-08, "loss": 0.7034, "step": 33952 }, { "epoch": 0.9912995241014861, "grad_norm": 0.6756583366938262, "learning_rate": 4.833738848337389e-08, "loss": 0.5758, "step": 33953 }, { "epoch": 0.9913287203293335, "grad_norm": 0.725602239500666, "learning_rate": 4.8175182481751826e-08, "loss": 0.6165, "step": 33954 }, { "epoch": 0.9913579165571809, "grad_norm": 0.7687537739864059, "learning_rate": 4.801297648012977e-08, "loss": 0.6891, "step": 33955 }, { "epoch": 0.9913871127850282, "grad_norm": 0.707311261582304, "learning_rate": 4.785077047850771e-08, "loss": 0.6267, "step": 33956 }, { "epoch": 0.9914163090128756, "grad_norm": 0.7140935220013722, "learning_rate": 4.768856447688565e-08, "loss": 0.6465, "step": 33957 }, { "epoch": 0.991445505240723, "grad_norm": 0.7354669681447584, "learning_rate": 4.752635847526359e-08, "loss": 0.6336, "step": 33958 }, { "epoch": 0.9914747014685703, "grad_norm": 0.8069276383504294, "learning_rate": 4.736415247364153e-08, "loss": 0.6697, "step": 33959 }, { "epoch": 0.9915038976964177, "grad_norm": 0.7626809143124514, "learning_rate": 4.7201946472019464e-08, "loss": 0.6854, "step": 33960 }, { "epoch": 0.991533093924265, "grad_norm": 0.6808014056657766, "learning_rate": 4.703974047039741e-08, "loss": 0.5781, "step": 33961 }, { "epoch": 0.9915622901521124, "grad_norm": 0.6876240706871043, "learning_rate": 4.6877534468775345e-08, "loss": 0.5717, "step": 33962 }, { "epoch": 0.9915914863799598, "grad_norm": 0.6890967483578808, "learning_rate": 4.671532846715329e-08, "loss": 0.5971, "step": 33963 }, { "epoch": 0.9916206826078071, "grad_norm": 0.7346821032556065, "learning_rate": 4.6553122465531226e-08, "loss": 0.6998, "step": 33964 }, { "epoch": 0.9916498788356545, "grad_norm": 0.7455178980798531, "learning_rate": 4.639091646390917e-08, "loss": 0.6479, "step": 33965 }, { "epoch": 0.9916790750635018, "grad_norm": 0.6724664959380025, "learning_rate": 4.6228710462287114e-08, "loss": 0.6036, "step": 33966 }, { "epoch": 0.9917082712913492, "grad_norm": 0.8158872829468188, "learning_rate": 4.606650446066505e-08, "loss": 0.7053, "step": 33967 }, { "epoch": 0.9917374675191966, "grad_norm": 0.7449361348731406, "learning_rate": 4.590429845904298e-08, "loss": 0.5814, "step": 33968 }, { "epoch": 0.9917666637470439, "grad_norm": 0.7498158172384533, "learning_rate": 4.574209245742093e-08, "loss": 0.6337, "step": 33969 }, { "epoch": 0.9917958599748913, "grad_norm": 0.7063906747294922, "learning_rate": 4.557988645579886e-08, "loss": 0.6572, "step": 33970 }, { "epoch": 0.9918250562027386, "grad_norm": 0.7650003264516749, "learning_rate": 4.541768045417681e-08, "loss": 0.6837, "step": 33971 }, { "epoch": 0.991854252430586, "grad_norm": 0.6824616753117035, "learning_rate": 4.525547445255475e-08, "loss": 0.5995, "step": 33972 }, { "epoch": 0.9918834486584334, "grad_norm": 0.6745494208338166, "learning_rate": 4.5093268450932685e-08, "loss": 0.5579, "step": 33973 }, { "epoch": 0.9919126448862807, "grad_norm": 0.7163883133493871, "learning_rate": 4.493106244931063e-08, "loss": 0.6317, "step": 33974 }, { "epoch": 0.9919418411141281, "grad_norm": 0.9548589897180153, "learning_rate": 4.4768856447688566e-08, "loss": 0.6363, "step": 33975 }, { "epoch": 0.9919710373419754, "grad_norm": 0.743798499187072, "learning_rate": 4.4606650446066514e-08, "loss": 0.6359, "step": 33976 }, { "epoch": 0.9920002335698228, "grad_norm": 0.6710451940774774, "learning_rate": 4.444444444444445e-08, "loss": 0.5513, "step": 33977 }, { "epoch": 0.9920294297976702, "grad_norm": 0.7430182125431238, "learning_rate": 4.428223844282238e-08, "loss": 0.604, "step": 33978 }, { "epoch": 0.9920586260255175, "grad_norm": 0.7037204667700493, "learning_rate": 4.412003244120033e-08, "loss": 0.5727, "step": 33979 }, { "epoch": 0.9920878222533649, "grad_norm": 0.7271582524980644, "learning_rate": 4.395782643957827e-08, "loss": 0.6656, "step": 33980 }, { "epoch": 0.9921170184812123, "grad_norm": 0.7220956329935959, "learning_rate": 4.379562043795621e-08, "loss": 0.5786, "step": 33981 }, { "epoch": 0.9921462147090596, "grad_norm": 0.757851988112267, "learning_rate": 4.363341443633415e-08, "loss": 0.6655, "step": 33982 }, { "epoch": 0.992175410936907, "grad_norm": 0.7007969731985824, "learning_rate": 4.3471208434712085e-08, "loss": 0.5827, "step": 33983 }, { "epoch": 0.9922046071647543, "grad_norm": 0.7017943047934464, "learning_rate": 4.330900243309003e-08, "loss": 0.6204, "step": 33984 }, { "epoch": 0.9922338033926017, "grad_norm": 0.702018349992088, "learning_rate": 4.3146796431467966e-08, "loss": 0.5972, "step": 33985 }, { "epoch": 0.9922629996204491, "grad_norm": 0.7179021271617482, "learning_rate": 4.298459042984591e-08, "loss": 0.6611, "step": 33986 }, { "epoch": 0.9922921958482964, "grad_norm": 0.6968879784624773, "learning_rate": 4.282238442822385e-08, "loss": 0.5883, "step": 33987 }, { "epoch": 0.9923213920761438, "grad_norm": 0.6444771276094481, "learning_rate": 4.266017842660179e-08, "loss": 0.5177, "step": 33988 }, { "epoch": 0.9923505883039911, "grad_norm": 0.7355413602926587, "learning_rate": 4.249797242497973e-08, "loss": 0.6076, "step": 33989 }, { "epoch": 0.9923797845318385, "grad_norm": 0.7401723835336598, "learning_rate": 4.233576642335767e-08, "loss": 0.6648, "step": 33990 }, { "epoch": 0.9924089807596859, "grad_norm": 0.7024824583044774, "learning_rate": 4.21735604217356e-08, "loss": 0.6437, "step": 33991 }, { "epoch": 0.9924381769875332, "grad_norm": 0.6768767488701865, "learning_rate": 4.201135442011355e-08, "loss": 0.5386, "step": 33992 }, { "epoch": 0.9924673732153806, "grad_norm": 0.699180217522112, "learning_rate": 4.1849148418491484e-08, "loss": 0.6006, "step": 33993 }, { "epoch": 0.992496569443228, "grad_norm": 0.6921800269061651, "learning_rate": 4.168694241686943e-08, "loss": 0.6166, "step": 33994 }, { "epoch": 0.9925257656710753, "grad_norm": 0.7828756662382681, "learning_rate": 4.1524736415247366e-08, "loss": 0.6912, "step": 33995 }, { "epoch": 0.9925549618989227, "grad_norm": 0.6944351256609053, "learning_rate": 4.1362530413625306e-08, "loss": 0.6073, "step": 33996 }, { "epoch": 0.99258415812677, "grad_norm": 0.7379063147746108, "learning_rate": 4.120032441200325e-08, "loss": 0.5806, "step": 33997 }, { "epoch": 0.9926133543546174, "grad_norm": 0.7108677567765418, "learning_rate": 4.103811841038119e-08, "loss": 0.6125, "step": 33998 }, { "epoch": 0.9926425505824648, "grad_norm": 0.7542724296077801, "learning_rate": 4.087591240875913e-08, "loss": 0.6353, "step": 33999 }, { "epoch": 0.9926717468103121, "grad_norm": 0.7759161928549365, "learning_rate": 4.071370640713707e-08, "loss": 0.6749, "step": 34000 }, { "epoch": 0.9927009430381595, "grad_norm": 0.7143041721742323, "learning_rate": 4.0551500405515e-08, "loss": 0.6392, "step": 34001 }, { "epoch": 0.9927301392660068, "grad_norm": 0.6921796345502627, "learning_rate": 4.038929440389295e-08, "loss": 0.5553, "step": 34002 }, { "epoch": 0.9927593354938542, "grad_norm": 0.7199594227781084, "learning_rate": 4.0227088402270884e-08, "loss": 0.6421, "step": 34003 }, { "epoch": 0.9927885317217016, "grad_norm": 0.7817366373720762, "learning_rate": 4.006488240064883e-08, "loss": 0.6699, "step": 34004 }, { "epoch": 0.9928177279495489, "grad_norm": 0.7909500848384409, "learning_rate": 3.9902676399026765e-08, "loss": 0.6685, "step": 34005 }, { "epoch": 0.9928469241773963, "grad_norm": 0.7724571920024333, "learning_rate": 3.9740470397404706e-08, "loss": 0.6608, "step": 34006 }, { "epoch": 0.9928761204052436, "grad_norm": 0.7103331437518233, "learning_rate": 3.9578264395782646e-08, "loss": 0.6219, "step": 34007 }, { "epoch": 0.992905316633091, "grad_norm": 0.7187281236036882, "learning_rate": 3.941605839416059e-08, "loss": 0.6194, "step": 34008 }, { "epoch": 0.9929345128609384, "grad_norm": 0.7278166988314033, "learning_rate": 3.9253852392538534e-08, "loss": 0.6203, "step": 34009 }, { "epoch": 0.9929637090887857, "grad_norm": 0.6989297674519448, "learning_rate": 3.909164639091647e-08, "loss": 0.6622, "step": 34010 }, { "epoch": 0.9929929053166331, "grad_norm": 0.6944454438922137, "learning_rate": 3.892944038929441e-08, "loss": 0.5781, "step": 34011 }, { "epoch": 0.9930221015444805, "grad_norm": 0.7460505991501568, "learning_rate": 3.876723438767234e-08, "loss": 0.7053, "step": 34012 }, { "epoch": 0.9930512977723278, "grad_norm": 0.7192767244736172, "learning_rate": 3.8605028386050284e-08, "loss": 0.6079, "step": 34013 }, { "epoch": 0.9930804940001752, "grad_norm": 0.7015041475272021, "learning_rate": 3.8442822384428224e-08, "loss": 0.6087, "step": 34014 }, { "epoch": 0.9931096902280225, "grad_norm": 0.712574686723394, "learning_rate": 3.828061638280617e-08, "loss": 0.6404, "step": 34015 }, { "epoch": 0.9931388864558699, "grad_norm": 0.7790987974043296, "learning_rate": 3.8118410381184105e-08, "loss": 0.6865, "step": 34016 }, { "epoch": 0.9931680826837173, "grad_norm": 0.7286028153609397, "learning_rate": 3.7956204379562046e-08, "loss": 0.6483, "step": 34017 }, { "epoch": 0.9931972789115646, "grad_norm": 0.6827965480671156, "learning_rate": 3.779399837793999e-08, "loss": 0.6016, "step": 34018 }, { "epoch": 0.993226475139412, "grad_norm": 0.729826013513752, "learning_rate": 3.763179237631793e-08, "loss": 0.6033, "step": 34019 }, { "epoch": 0.9932556713672593, "grad_norm": 0.8166483489216582, "learning_rate": 3.746958637469587e-08, "loss": 0.7818, "step": 34020 }, { "epoch": 0.9932848675951067, "grad_norm": 0.7138827279602833, "learning_rate": 3.73073803730738e-08, "loss": 0.5916, "step": 34021 }, { "epoch": 0.9933140638229541, "grad_norm": 0.6656919533561418, "learning_rate": 3.714517437145175e-08, "loss": 0.575, "step": 34022 }, { "epoch": 0.9933432600508014, "grad_norm": 0.70233445257523, "learning_rate": 3.698296836982969e-08, "loss": 0.612, "step": 34023 }, { "epoch": 0.9933724562786488, "grad_norm": 0.7467142088746842, "learning_rate": 3.682076236820763e-08, "loss": 0.7069, "step": 34024 }, { "epoch": 0.9934016525064961, "grad_norm": 0.7431594314647044, "learning_rate": 3.6658556366585564e-08, "loss": 0.6583, "step": 34025 }, { "epoch": 0.9934308487343435, "grad_norm": 0.6993208999692062, "learning_rate": 3.6496350364963505e-08, "loss": 0.5935, "step": 34026 }, { "epoch": 0.9934600449621909, "grad_norm": 0.7937904616625748, "learning_rate": 3.6334144363341446e-08, "loss": 0.7207, "step": 34027 }, { "epoch": 0.9934892411900382, "grad_norm": 0.7473129404277294, "learning_rate": 3.6171938361719386e-08, "loss": 0.6549, "step": 34028 }, { "epoch": 0.9935184374178856, "grad_norm": 0.7522112997310141, "learning_rate": 3.600973236009733e-08, "loss": 0.6576, "step": 34029 }, { "epoch": 0.993547633645733, "grad_norm": 0.7229137837777407, "learning_rate": 3.584752635847527e-08, "loss": 0.6355, "step": 34030 }, { "epoch": 0.9935768298735803, "grad_norm": 0.7811038632107353, "learning_rate": 3.568532035685321e-08, "loss": 0.6464, "step": 34031 }, { "epoch": 0.9936060261014277, "grad_norm": 0.6952744332221736, "learning_rate": 3.552311435523115e-08, "loss": 0.6134, "step": 34032 }, { "epoch": 0.993635222329275, "grad_norm": 0.7245432055867639, "learning_rate": 3.536090835360909e-08, "loss": 0.624, "step": 34033 }, { "epoch": 0.9936644185571224, "grad_norm": 0.6938568609368208, "learning_rate": 3.519870235198703e-08, "loss": 0.5876, "step": 34034 }, { "epoch": 0.9936936147849698, "grad_norm": 0.7140958622262676, "learning_rate": 3.5036496350364964e-08, "loss": 0.6287, "step": 34035 }, { "epoch": 0.9937228110128171, "grad_norm": 0.6839140608128114, "learning_rate": 3.4874290348742905e-08, "loss": 0.5881, "step": 34036 }, { "epoch": 0.9937520072406645, "grad_norm": 0.703718943554584, "learning_rate": 3.4712084347120845e-08, "loss": 0.5821, "step": 34037 }, { "epoch": 0.9937812034685118, "grad_norm": 0.7641992262163023, "learning_rate": 3.4549878345498786e-08, "loss": 0.7042, "step": 34038 }, { "epoch": 0.9938103996963592, "grad_norm": 0.7747456200446824, "learning_rate": 3.4387672343876727e-08, "loss": 0.685, "step": 34039 }, { "epoch": 0.9938395959242066, "grad_norm": 0.7473842326059469, "learning_rate": 3.422546634225467e-08, "loss": 0.6762, "step": 34040 }, { "epoch": 0.9938687921520539, "grad_norm": 0.7286135980688548, "learning_rate": 3.406326034063261e-08, "loss": 0.6444, "step": 34041 }, { "epoch": 0.9938979883799013, "grad_norm": 0.7468120834776535, "learning_rate": 3.390105433901055e-08, "loss": 0.6601, "step": 34042 }, { "epoch": 0.9939271846077486, "grad_norm": 0.8052413953925707, "learning_rate": 3.373884833738849e-08, "loss": 0.7463, "step": 34043 }, { "epoch": 0.993956380835596, "grad_norm": 0.7122022925573737, "learning_rate": 3.357664233576642e-08, "loss": 0.6517, "step": 34044 }, { "epoch": 0.9939855770634434, "grad_norm": 0.6505184099076184, "learning_rate": 3.3414436334144364e-08, "loss": 0.5423, "step": 34045 }, { "epoch": 0.9940147732912907, "grad_norm": 0.682400445061946, "learning_rate": 3.3252230332522304e-08, "loss": 0.5875, "step": 34046 }, { "epoch": 0.9940439695191381, "grad_norm": 0.6690881730507569, "learning_rate": 3.3090024330900245e-08, "loss": 0.5722, "step": 34047 }, { "epoch": 0.9940731657469855, "grad_norm": 0.7042587681463734, "learning_rate": 3.2927818329278186e-08, "loss": 0.5916, "step": 34048 }, { "epoch": 0.9941023619748328, "grad_norm": 0.6738704565848792, "learning_rate": 3.2765612327656126e-08, "loss": 0.5804, "step": 34049 }, { "epoch": 0.9941315582026802, "grad_norm": 0.6829413319892396, "learning_rate": 3.260340632603407e-08, "loss": 0.598, "step": 34050 }, { "epoch": 0.9941607544305275, "grad_norm": 0.7545268267846956, "learning_rate": 3.244120032441201e-08, "loss": 0.7003, "step": 34051 }, { "epoch": 0.9941899506583749, "grad_norm": 0.7712678384437514, "learning_rate": 3.227899432278995e-08, "loss": 0.6423, "step": 34052 }, { "epoch": 0.9942191468862223, "grad_norm": 0.6631759351843454, "learning_rate": 3.211678832116788e-08, "loss": 0.5192, "step": 34053 }, { "epoch": 0.9942483431140696, "grad_norm": 0.6987201855027839, "learning_rate": 3.195458231954582e-08, "loss": 0.6148, "step": 34054 }, { "epoch": 0.994277539341917, "grad_norm": 0.7131014470845044, "learning_rate": 3.1792376317923763e-08, "loss": 0.5968, "step": 34055 }, { "epoch": 0.9943067355697643, "grad_norm": 0.712750810402235, "learning_rate": 3.1630170316301704e-08, "loss": 0.6344, "step": 34056 }, { "epoch": 0.9943359317976117, "grad_norm": 0.7344967242012065, "learning_rate": 3.1467964314679645e-08, "loss": 0.6312, "step": 34057 }, { "epoch": 0.9943651280254591, "grad_norm": 0.7442390630913531, "learning_rate": 3.1305758313057585e-08, "loss": 0.6746, "step": 34058 }, { "epoch": 0.9943943242533064, "grad_norm": 0.7260743132523665, "learning_rate": 3.1143552311435526e-08, "loss": 0.6712, "step": 34059 }, { "epoch": 0.9944235204811538, "grad_norm": 0.6765326039182189, "learning_rate": 3.0981346309813466e-08, "loss": 0.5798, "step": 34060 }, { "epoch": 0.9944527167090011, "grad_norm": 0.7045772889572439, "learning_rate": 3.081914030819141e-08, "loss": 0.6309, "step": 34061 }, { "epoch": 0.9944819129368485, "grad_norm": 0.7880199778141386, "learning_rate": 3.065693430656934e-08, "loss": 0.7233, "step": 34062 }, { "epoch": 0.9945111091646959, "grad_norm": 0.6881521943126079, "learning_rate": 3.049472830494728e-08, "loss": 0.5579, "step": 34063 }, { "epoch": 0.9945403053925432, "grad_norm": 0.7496451249371326, "learning_rate": 3.033252230332523e-08, "loss": 0.6625, "step": 34064 }, { "epoch": 0.9945695016203906, "grad_norm": 0.7073131701311136, "learning_rate": 3.017031630170317e-08, "loss": 0.6053, "step": 34065 }, { "epoch": 0.994598697848238, "grad_norm": 0.7325952416713604, "learning_rate": 3.000811030008111e-08, "loss": 0.6628, "step": 34066 }, { "epoch": 0.9946278940760853, "grad_norm": 0.6854203604570417, "learning_rate": 2.9845904298459044e-08, "loss": 0.5665, "step": 34067 }, { "epoch": 0.9946570903039327, "grad_norm": 0.7205829208840602, "learning_rate": 2.9683698296836985e-08, "loss": 0.6052, "step": 34068 }, { "epoch": 0.99468628653178, "grad_norm": 0.7045128589792081, "learning_rate": 2.9521492295214925e-08, "loss": 0.5895, "step": 34069 }, { "epoch": 0.9947154827596274, "grad_norm": 0.7317250113698192, "learning_rate": 2.9359286293592866e-08, "loss": 0.633, "step": 34070 }, { "epoch": 0.9947446789874749, "grad_norm": 0.7529637162674914, "learning_rate": 2.9197080291970803e-08, "loss": 0.6523, "step": 34071 }, { "epoch": 0.9947738752153222, "grad_norm": 0.7138426309754601, "learning_rate": 2.9034874290348744e-08, "loss": 0.579, "step": 34072 }, { "epoch": 0.9948030714431696, "grad_norm": 0.7004201701450372, "learning_rate": 2.8872668288726685e-08, "loss": 0.6227, "step": 34073 }, { "epoch": 0.994832267671017, "grad_norm": 0.685253970215837, "learning_rate": 2.8710462287104625e-08, "loss": 0.6015, "step": 34074 }, { "epoch": 0.9948614638988643, "grad_norm": 0.7433377867778121, "learning_rate": 2.8548256285482566e-08, "loss": 0.6725, "step": 34075 }, { "epoch": 0.9948906601267117, "grad_norm": 0.7403456461610619, "learning_rate": 2.8386050283860503e-08, "loss": 0.7273, "step": 34076 }, { "epoch": 0.994919856354559, "grad_norm": 0.7117846263138343, "learning_rate": 2.8223844282238444e-08, "loss": 0.6399, "step": 34077 }, { "epoch": 0.9949490525824064, "grad_norm": 0.6864041714454521, "learning_rate": 2.8061638280616384e-08, "loss": 0.568, "step": 34078 }, { "epoch": 0.9949782488102538, "grad_norm": 0.753123790764174, "learning_rate": 2.7899432278994325e-08, "loss": 0.6917, "step": 34079 }, { "epoch": 0.9950074450381011, "grad_norm": 0.7127554648181154, "learning_rate": 2.7737226277372262e-08, "loss": 0.6247, "step": 34080 }, { "epoch": 0.9950366412659485, "grad_norm": 0.7403436393117646, "learning_rate": 2.7575020275750203e-08, "loss": 0.6845, "step": 34081 }, { "epoch": 0.9950658374937958, "grad_norm": 0.736279532258226, "learning_rate": 2.7412814274128144e-08, "loss": 0.6713, "step": 34082 }, { "epoch": 0.9950950337216432, "grad_norm": 0.7035479847995691, "learning_rate": 2.7250608272506084e-08, "loss": 0.5892, "step": 34083 }, { "epoch": 0.9951242299494906, "grad_norm": 0.7347348441026216, "learning_rate": 2.7088402270884028e-08, "loss": 0.7353, "step": 34084 }, { "epoch": 0.9951534261773379, "grad_norm": 0.7665258007235377, "learning_rate": 2.6926196269261962e-08, "loss": 0.6529, "step": 34085 }, { "epoch": 0.9951826224051853, "grad_norm": 0.7277512157867387, "learning_rate": 2.6763990267639903e-08, "loss": 0.6464, "step": 34086 }, { "epoch": 0.9952118186330327, "grad_norm": 0.7144682086413547, "learning_rate": 2.6601784266017847e-08, "loss": 0.637, "step": 34087 }, { "epoch": 0.99524101486088, "grad_norm": 0.7254840345841402, "learning_rate": 2.6439578264395787e-08, "loss": 0.6207, "step": 34088 }, { "epoch": 0.9952702110887274, "grad_norm": 1.0060671635146732, "learning_rate": 2.627737226277372e-08, "loss": 0.6353, "step": 34089 }, { "epoch": 0.9952994073165747, "grad_norm": 0.7328274097678743, "learning_rate": 2.6115166261151662e-08, "loss": 0.6545, "step": 34090 }, { "epoch": 0.9953286035444221, "grad_norm": 0.6722468800368365, "learning_rate": 2.5952960259529606e-08, "loss": 0.5706, "step": 34091 }, { "epoch": 0.9953577997722695, "grad_norm": 0.7416811390827002, "learning_rate": 2.5790754257907547e-08, "loss": 0.6834, "step": 34092 }, { "epoch": 0.9953869960001168, "grad_norm": 0.7650120702225878, "learning_rate": 2.5628548256285487e-08, "loss": 0.7192, "step": 34093 }, { "epoch": 0.9954161922279642, "grad_norm": 0.7489060516093478, "learning_rate": 2.5466342254663425e-08, "loss": 0.6811, "step": 34094 }, { "epoch": 0.9954453884558115, "grad_norm": 0.7459616189329923, "learning_rate": 2.5304136253041365e-08, "loss": 0.6654, "step": 34095 }, { "epoch": 0.9954745846836589, "grad_norm": 0.7232585812009834, "learning_rate": 2.5141930251419306e-08, "loss": 0.6445, "step": 34096 }, { "epoch": 0.9955037809115063, "grad_norm": 0.7049594203597711, "learning_rate": 2.4979724249797246e-08, "loss": 0.6515, "step": 34097 }, { "epoch": 0.9955329771393536, "grad_norm": 0.7403874710401628, "learning_rate": 2.4817518248175187e-08, "loss": 0.5939, "step": 34098 }, { "epoch": 0.995562173367201, "grad_norm": 0.7180942988544022, "learning_rate": 2.4655312246553124e-08, "loss": 0.6072, "step": 34099 }, { "epoch": 0.9955913695950483, "grad_norm": 0.6918011287317997, "learning_rate": 2.4493106244931065e-08, "loss": 0.5965, "step": 34100 }, { "epoch": 0.9956205658228957, "grad_norm": 0.690015738480249, "learning_rate": 2.4330900243309006e-08, "loss": 0.6155, "step": 34101 }, { "epoch": 0.9956497620507431, "grad_norm": 0.7517958036955006, "learning_rate": 2.4168694241686946e-08, "loss": 0.6825, "step": 34102 }, { "epoch": 0.9956789582785904, "grad_norm": 0.6836062280259158, "learning_rate": 2.4006488240064884e-08, "loss": 0.5941, "step": 34103 }, { "epoch": 0.9957081545064378, "grad_norm": 0.674103415763433, "learning_rate": 2.3844282238442824e-08, "loss": 0.5698, "step": 34104 }, { "epoch": 0.9957373507342852, "grad_norm": 0.7154717332658591, "learning_rate": 2.3682076236820765e-08, "loss": 0.6412, "step": 34105 }, { "epoch": 0.9957665469621325, "grad_norm": 0.6760109376607308, "learning_rate": 2.3519870235198705e-08, "loss": 0.615, "step": 34106 }, { "epoch": 0.9957957431899799, "grad_norm": 0.7856197187733509, "learning_rate": 2.3357664233576646e-08, "loss": 0.7162, "step": 34107 }, { "epoch": 0.9958249394178272, "grad_norm": 0.7774441922977077, "learning_rate": 2.3195458231954583e-08, "loss": 0.7111, "step": 34108 }, { "epoch": 0.9958541356456746, "grad_norm": 0.6615690551213329, "learning_rate": 2.3033252230332524e-08, "loss": 0.5277, "step": 34109 }, { "epoch": 0.995883331873522, "grad_norm": 0.6824611544155326, "learning_rate": 2.2871046228710465e-08, "loss": 0.5743, "step": 34110 }, { "epoch": 0.9959125281013693, "grad_norm": 0.743817381025015, "learning_rate": 2.2708840227088405e-08, "loss": 0.6577, "step": 34111 }, { "epoch": 0.9959417243292167, "grad_norm": 0.7371602883604904, "learning_rate": 2.2546634225466343e-08, "loss": 0.6209, "step": 34112 }, { "epoch": 0.995970920557064, "grad_norm": 0.6687698807725989, "learning_rate": 2.2384428223844283e-08, "loss": 0.5272, "step": 34113 }, { "epoch": 0.9960001167849114, "grad_norm": 0.6933246207707745, "learning_rate": 2.2222222222222224e-08, "loss": 0.6322, "step": 34114 }, { "epoch": 0.9960293130127588, "grad_norm": 0.7281215164842064, "learning_rate": 2.2060016220600164e-08, "loss": 0.6874, "step": 34115 }, { "epoch": 0.9960585092406061, "grad_norm": 0.68706404802935, "learning_rate": 2.1897810218978105e-08, "loss": 0.58, "step": 34116 }, { "epoch": 0.9960877054684535, "grad_norm": 0.7384132511911289, "learning_rate": 2.1735604217356042e-08, "loss": 0.6554, "step": 34117 }, { "epoch": 0.9961169016963008, "grad_norm": 0.6536296409746296, "learning_rate": 2.1573398215733983e-08, "loss": 0.5428, "step": 34118 }, { "epoch": 0.9961460979241482, "grad_norm": 0.717776099823968, "learning_rate": 2.1411192214111924e-08, "loss": 0.6115, "step": 34119 }, { "epoch": 0.9961752941519956, "grad_norm": 0.705948403778588, "learning_rate": 2.1248986212489864e-08, "loss": 0.5881, "step": 34120 }, { "epoch": 0.9962044903798429, "grad_norm": 0.6958487026899072, "learning_rate": 2.10867802108678e-08, "loss": 0.5983, "step": 34121 }, { "epoch": 0.9962336866076903, "grad_norm": 0.7542104783805432, "learning_rate": 2.0924574209245742e-08, "loss": 0.712, "step": 34122 }, { "epoch": 0.9962628828355377, "grad_norm": 0.7490902990914082, "learning_rate": 2.0762368207623683e-08, "loss": 0.6984, "step": 34123 }, { "epoch": 0.996292079063385, "grad_norm": 0.7549332976653995, "learning_rate": 2.0600162206001623e-08, "loss": 0.6619, "step": 34124 }, { "epoch": 0.9963212752912324, "grad_norm": 0.7145616446700286, "learning_rate": 2.0437956204379564e-08, "loss": 0.6539, "step": 34125 }, { "epoch": 0.9963504715190797, "grad_norm": 0.7478211449971939, "learning_rate": 2.02757502027575e-08, "loss": 0.7213, "step": 34126 }, { "epoch": 0.9963796677469271, "grad_norm": 0.6991010546516608, "learning_rate": 2.0113544201135442e-08, "loss": 0.6396, "step": 34127 }, { "epoch": 0.9964088639747745, "grad_norm": 0.7394667112909934, "learning_rate": 1.9951338199513383e-08, "loss": 0.6763, "step": 34128 }, { "epoch": 0.9964380602026218, "grad_norm": 0.7213589060352892, "learning_rate": 1.9789132197891323e-08, "loss": 0.599, "step": 34129 }, { "epoch": 0.9964672564304692, "grad_norm": 0.7243661318487135, "learning_rate": 1.9626926196269267e-08, "loss": 0.6369, "step": 34130 }, { "epoch": 0.9964964526583165, "grad_norm": 0.6853871449177448, "learning_rate": 1.9464720194647204e-08, "loss": 0.5808, "step": 34131 }, { "epoch": 0.9965256488861639, "grad_norm": 0.6822449972130021, "learning_rate": 1.9302514193025142e-08, "loss": 0.5922, "step": 34132 }, { "epoch": 0.9965548451140113, "grad_norm": 0.6965828810176841, "learning_rate": 1.9140308191403086e-08, "loss": 0.596, "step": 34133 }, { "epoch": 0.9965840413418586, "grad_norm": 0.7161220277941459, "learning_rate": 1.8978102189781023e-08, "loss": 0.6522, "step": 34134 }, { "epoch": 0.996613237569706, "grad_norm": 0.7526186895092063, "learning_rate": 1.8815896188158964e-08, "loss": 0.664, "step": 34135 }, { "epoch": 0.9966424337975534, "grad_norm": 0.8079202363570279, "learning_rate": 1.86536901865369e-08, "loss": 0.6561, "step": 34136 }, { "epoch": 0.9966716300254007, "grad_norm": 0.7079696594665127, "learning_rate": 1.8491484184914845e-08, "loss": 0.6258, "step": 34137 }, { "epoch": 0.9967008262532481, "grad_norm": 0.7063188320543999, "learning_rate": 1.8329278183292782e-08, "loss": 0.6333, "step": 34138 }, { "epoch": 0.9967300224810954, "grad_norm": 0.7266270062435686, "learning_rate": 1.8167072181670723e-08, "loss": 0.6247, "step": 34139 }, { "epoch": 0.9967592187089428, "grad_norm": 0.7003766728282925, "learning_rate": 1.8004866180048663e-08, "loss": 0.6333, "step": 34140 }, { "epoch": 0.9967884149367902, "grad_norm": 0.6771478414894785, "learning_rate": 1.7842660178426604e-08, "loss": 0.5729, "step": 34141 }, { "epoch": 0.9968176111646375, "grad_norm": 0.7208935807808915, "learning_rate": 1.7680454176804545e-08, "loss": 0.6349, "step": 34142 }, { "epoch": 0.9968468073924849, "grad_norm": 0.71364752118482, "learning_rate": 1.7518248175182482e-08, "loss": 0.584, "step": 34143 }, { "epoch": 0.9968760036203322, "grad_norm": 0.7758161752562314, "learning_rate": 1.7356042173560423e-08, "loss": 0.657, "step": 34144 }, { "epoch": 0.9969051998481796, "grad_norm": 0.7340264320341736, "learning_rate": 1.7193836171938363e-08, "loss": 0.6676, "step": 34145 }, { "epoch": 0.996934396076027, "grad_norm": 0.6982229739039297, "learning_rate": 1.7031630170316304e-08, "loss": 0.6035, "step": 34146 }, { "epoch": 0.9969635923038743, "grad_norm": 0.7060128662425681, "learning_rate": 1.6869424168694245e-08, "loss": 0.6318, "step": 34147 }, { "epoch": 0.9969927885317217, "grad_norm": 0.7469644289345584, "learning_rate": 1.6707218167072182e-08, "loss": 0.6771, "step": 34148 }, { "epoch": 0.997021984759569, "grad_norm": 0.6961269980006785, "learning_rate": 1.6545012165450122e-08, "loss": 0.5942, "step": 34149 }, { "epoch": 0.9970511809874164, "grad_norm": 0.7028596421956828, "learning_rate": 1.6382806163828063e-08, "loss": 0.6029, "step": 34150 }, { "epoch": 0.9970803772152638, "grad_norm": 0.9416646633469365, "learning_rate": 1.6220600162206004e-08, "loss": 0.7563, "step": 34151 }, { "epoch": 0.9971095734431111, "grad_norm": 0.7138727413195459, "learning_rate": 1.605839416058394e-08, "loss": 0.637, "step": 34152 }, { "epoch": 0.9971387696709585, "grad_norm": 0.7122182830045608, "learning_rate": 1.5896188158961882e-08, "loss": 0.6138, "step": 34153 }, { "epoch": 0.9971679658988059, "grad_norm": 0.6845374774176961, "learning_rate": 1.5733982157339822e-08, "loss": 0.5761, "step": 34154 }, { "epoch": 0.9971971621266532, "grad_norm": 0.7443440623749203, "learning_rate": 1.5571776155717763e-08, "loss": 0.6562, "step": 34155 }, { "epoch": 0.9972263583545006, "grad_norm": 0.7176903986516954, "learning_rate": 1.5409570154095704e-08, "loss": 0.6577, "step": 34156 }, { "epoch": 0.9972555545823479, "grad_norm": 0.6872204370965485, "learning_rate": 1.524736415247364e-08, "loss": 0.6076, "step": 34157 }, { "epoch": 0.9972847508101953, "grad_norm": 0.761910745513072, "learning_rate": 1.5085158150851585e-08, "loss": 0.7065, "step": 34158 }, { "epoch": 0.9973139470380427, "grad_norm": 0.717714031094387, "learning_rate": 1.4922952149229522e-08, "loss": 0.6692, "step": 34159 }, { "epoch": 0.99734314326589, "grad_norm": 0.7019864999993519, "learning_rate": 1.4760746147607463e-08, "loss": 0.6474, "step": 34160 }, { "epoch": 0.9973723394937374, "grad_norm": 0.7717919136155629, "learning_rate": 1.4598540145985402e-08, "loss": 0.6701, "step": 34161 }, { "epoch": 0.9974015357215847, "grad_norm": 0.7852551774497681, "learning_rate": 1.4436334144363342e-08, "loss": 0.691, "step": 34162 }, { "epoch": 0.9974307319494321, "grad_norm": 0.7244496144813394, "learning_rate": 1.4274128142741283e-08, "loss": 0.6363, "step": 34163 }, { "epoch": 0.9974599281772795, "grad_norm": 0.7281447772443803, "learning_rate": 1.4111922141119222e-08, "loss": 0.6469, "step": 34164 }, { "epoch": 0.9974891244051268, "grad_norm": 0.816481755649884, "learning_rate": 1.3949716139497163e-08, "loss": 0.7478, "step": 34165 }, { "epoch": 0.9975183206329742, "grad_norm": 0.7425235857304576, "learning_rate": 1.3787510137875102e-08, "loss": 0.7283, "step": 34166 }, { "epoch": 0.9975475168608215, "grad_norm": 0.7098877796100856, "learning_rate": 1.3625304136253042e-08, "loss": 0.6183, "step": 34167 }, { "epoch": 0.9975767130886689, "grad_norm": 0.7235494990818552, "learning_rate": 1.3463098134630981e-08, "loss": 0.6728, "step": 34168 }, { "epoch": 0.9976059093165163, "grad_norm": 0.6699188865939726, "learning_rate": 1.3300892133008923e-08, "loss": 0.5045, "step": 34169 }, { "epoch": 0.9976351055443636, "grad_norm": 0.7559369923268514, "learning_rate": 1.313868613138686e-08, "loss": 0.6378, "step": 34170 }, { "epoch": 0.997664301772211, "grad_norm": 0.7745934215688617, "learning_rate": 1.2976480129764803e-08, "loss": 0.6749, "step": 34171 }, { "epoch": 0.9976934980000584, "grad_norm": 0.7823711849665165, "learning_rate": 1.2814274128142744e-08, "loss": 0.7337, "step": 34172 }, { "epoch": 0.9977226942279057, "grad_norm": 0.7319478077984529, "learning_rate": 1.2652068126520683e-08, "loss": 0.686, "step": 34173 }, { "epoch": 0.9977518904557531, "grad_norm": 0.7561942559483519, "learning_rate": 1.2489862124898623e-08, "loss": 0.6814, "step": 34174 }, { "epoch": 0.9977810866836004, "grad_norm": 0.7279591391520492, "learning_rate": 1.2327656123276562e-08, "loss": 0.6376, "step": 34175 }, { "epoch": 0.9978102829114478, "grad_norm": 0.7069968949577451, "learning_rate": 1.2165450121654503e-08, "loss": 0.5949, "step": 34176 }, { "epoch": 0.9978394791392952, "grad_norm": 0.7422209275494294, "learning_rate": 1.2003244120032442e-08, "loss": 0.6832, "step": 34177 }, { "epoch": 0.9978686753671425, "grad_norm": 0.7135835162990841, "learning_rate": 1.1841038118410382e-08, "loss": 0.6709, "step": 34178 }, { "epoch": 0.9978978715949899, "grad_norm": 0.7318997561836896, "learning_rate": 1.1678832116788323e-08, "loss": 0.6377, "step": 34179 }, { "epoch": 0.9979270678228372, "grad_norm": 0.6827991765321267, "learning_rate": 1.1516626115166262e-08, "loss": 0.6135, "step": 34180 }, { "epoch": 0.9979562640506846, "grad_norm": 0.7039902284128251, "learning_rate": 1.1354420113544203e-08, "loss": 0.6386, "step": 34181 }, { "epoch": 0.997985460278532, "grad_norm": 0.7254730812255769, "learning_rate": 1.1192214111922142e-08, "loss": 0.5581, "step": 34182 }, { "epoch": 0.9980146565063793, "grad_norm": 0.7220111808825938, "learning_rate": 1.1030008110300082e-08, "loss": 0.6157, "step": 34183 }, { "epoch": 0.9980438527342267, "grad_norm": 0.7479168644537102, "learning_rate": 1.0867802108678021e-08, "loss": 0.6622, "step": 34184 }, { "epoch": 0.998073048962074, "grad_norm": 0.7273203656204351, "learning_rate": 1.0705596107055962e-08, "loss": 0.62, "step": 34185 }, { "epoch": 0.9981022451899214, "grad_norm": 0.6923593822452486, "learning_rate": 1.05433901054339e-08, "loss": 0.6084, "step": 34186 }, { "epoch": 0.9981314414177688, "grad_norm": 0.7302678373790336, "learning_rate": 1.0381184103811841e-08, "loss": 0.6509, "step": 34187 }, { "epoch": 0.9981606376456161, "grad_norm": 0.6512350316944939, "learning_rate": 1.0218978102189782e-08, "loss": 0.5251, "step": 34188 }, { "epoch": 0.9981898338734635, "grad_norm": 0.7035645054450334, "learning_rate": 1.0056772100567721e-08, "loss": 0.616, "step": 34189 }, { "epoch": 0.9982190301013109, "grad_norm": 0.6911147127144289, "learning_rate": 9.894566098945662e-09, "loss": 0.5752, "step": 34190 }, { "epoch": 0.9982482263291583, "grad_norm": 0.7126451722771335, "learning_rate": 9.732360097323602e-09, "loss": 0.6404, "step": 34191 }, { "epoch": 0.9982774225570057, "grad_norm": 0.7433809807964699, "learning_rate": 9.570154095701543e-09, "loss": 0.6816, "step": 34192 }, { "epoch": 0.998306618784853, "grad_norm": 0.7506303036727171, "learning_rate": 9.407948094079482e-09, "loss": 0.6698, "step": 34193 }, { "epoch": 0.9983358150127004, "grad_norm": 0.7059678736979675, "learning_rate": 9.245742092457422e-09, "loss": 0.5957, "step": 34194 }, { "epoch": 0.9983650112405478, "grad_norm": 0.6983387457316068, "learning_rate": 9.083536090835361e-09, "loss": 0.6344, "step": 34195 }, { "epoch": 0.9983942074683951, "grad_norm": 0.7207204834242424, "learning_rate": 8.921330089213302e-09, "loss": 0.605, "step": 34196 }, { "epoch": 0.9984234036962425, "grad_norm": 0.6949150554942011, "learning_rate": 8.759124087591241e-09, "loss": 0.5752, "step": 34197 }, { "epoch": 0.9984525999240899, "grad_norm": 0.7327715159672556, "learning_rate": 8.596918085969182e-09, "loss": 0.6607, "step": 34198 }, { "epoch": 0.9984817961519372, "grad_norm": 0.8024417127557583, "learning_rate": 8.434712084347122e-09, "loss": 0.7447, "step": 34199 }, { "epoch": 0.9985109923797846, "grad_norm": 0.7336943414787676, "learning_rate": 8.272506082725061e-09, "loss": 0.6763, "step": 34200 }, { "epoch": 0.9985401886076319, "grad_norm": 0.7255982292520039, "learning_rate": 8.110300081103002e-09, "loss": 0.6142, "step": 34201 }, { "epoch": 0.9985693848354793, "grad_norm": 0.701806522039842, "learning_rate": 7.948094079480941e-09, "loss": 0.6002, "step": 34202 }, { "epoch": 0.9985985810633267, "grad_norm": 0.6857784355196971, "learning_rate": 7.785888077858881e-09, "loss": 0.5957, "step": 34203 }, { "epoch": 0.998627777291174, "grad_norm": 0.7160378473669983, "learning_rate": 7.62368207623682e-09, "loss": 0.6604, "step": 34204 }, { "epoch": 0.9986569735190214, "grad_norm": 0.7592508078162891, "learning_rate": 7.461476074614761e-09, "loss": 0.6606, "step": 34205 }, { "epoch": 0.9986861697468687, "grad_norm": 0.7351435476784584, "learning_rate": 7.299270072992701e-09, "loss": 0.6277, "step": 34206 }, { "epoch": 0.9987153659747161, "grad_norm": 0.6873675728554177, "learning_rate": 7.1370640713706415e-09, "loss": 0.5831, "step": 34207 }, { "epoch": 0.9987445622025635, "grad_norm": 0.7388572476072999, "learning_rate": 6.974858069748581e-09, "loss": 0.6309, "step": 34208 }, { "epoch": 0.9987737584304108, "grad_norm": 0.721899059091826, "learning_rate": 6.812652068126521e-09, "loss": 0.5911, "step": 34209 }, { "epoch": 0.9988029546582582, "grad_norm": 0.7672420053217419, "learning_rate": 6.650446066504462e-09, "loss": 0.6283, "step": 34210 }, { "epoch": 0.9988321508861056, "grad_norm": 0.8703378227020531, "learning_rate": 6.4882400648824015e-09, "loss": 0.7213, "step": 34211 }, { "epoch": 0.9988613471139529, "grad_norm": 0.7975459358451994, "learning_rate": 6.326034063260341e-09, "loss": 0.7675, "step": 34212 }, { "epoch": 0.9988905433418003, "grad_norm": 0.6399782453584276, "learning_rate": 6.163828061638281e-09, "loss": 0.5508, "step": 34213 }, { "epoch": 0.9989197395696476, "grad_norm": 0.7272908987742678, "learning_rate": 6.001622060016221e-09, "loss": 0.6937, "step": 34214 }, { "epoch": 0.998948935797495, "grad_norm": 0.6782554442886177, "learning_rate": 5.8394160583941615e-09, "loss": 0.5684, "step": 34215 }, { "epoch": 0.9989781320253424, "grad_norm": 0.7740826840752328, "learning_rate": 5.677210056772101e-09, "loss": 0.6745, "step": 34216 }, { "epoch": 0.9990073282531897, "grad_norm": 0.7674311854435928, "learning_rate": 5.515004055150041e-09, "loss": 0.6726, "step": 34217 }, { "epoch": 0.9990365244810371, "grad_norm": 0.7933340161941161, "learning_rate": 5.352798053527981e-09, "loss": 0.7522, "step": 34218 }, { "epoch": 0.9990657207088844, "grad_norm": 0.7218795506101608, "learning_rate": 5.190592051905921e-09, "loss": 0.6561, "step": 34219 }, { "epoch": 0.9990949169367318, "grad_norm": 0.7590215157375719, "learning_rate": 5.0283860502838605e-09, "loss": 0.6953, "step": 34220 }, { "epoch": 0.9991241131645792, "grad_norm": 0.6761894587265558, "learning_rate": 4.866180048661801e-09, "loss": 0.5512, "step": 34221 }, { "epoch": 0.9991533093924265, "grad_norm": 0.6886013691908097, "learning_rate": 4.703974047039741e-09, "loss": 0.6154, "step": 34222 }, { "epoch": 0.9991825056202739, "grad_norm": 0.7074552715299596, "learning_rate": 4.541768045417681e-09, "loss": 0.6417, "step": 34223 }, { "epoch": 0.9992117018481212, "grad_norm": 0.7347427620959156, "learning_rate": 4.3795620437956205e-09, "loss": 0.6629, "step": 34224 }, { "epoch": 0.9992408980759686, "grad_norm": 0.6912164198284321, "learning_rate": 4.217356042173561e-09, "loss": 0.5816, "step": 34225 }, { "epoch": 0.999270094303816, "grad_norm": 0.7209870731244985, "learning_rate": 4.055150040551501e-09, "loss": 0.6596, "step": 34226 }, { "epoch": 0.9992992905316633, "grad_norm": 0.8251416046151732, "learning_rate": 3.892944038929441e-09, "loss": 0.6123, "step": 34227 }, { "epoch": 0.9993284867595107, "grad_norm": 0.7218197582154907, "learning_rate": 3.7307380373073805e-09, "loss": 0.6503, "step": 34228 }, { "epoch": 0.999357682987358, "grad_norm": 0.7325410993608322, "learning_rate": 3.5685320356853207e-09, "loss": 0.6326, "step": 34229 }, { "epoch": 0.9993868792152054, "grad_norm": 0.773058709768086, "learning_rate": 3.4063260340632605e-09, "loss": 0.715, "step": 34230 }, { "epoch": 0.9994160754430528, "grad_norm": 0.7031415414977319, "learning_rate": 3.2441200324412007e-09, "loss": 0.6401, "step": 34231 }, { "epoch": 0.9994452716709001, "grad_norm": 0.7643697859220143, "learning_rate": 3.0819140308191405e-09, "loss": 0.5467, "step": 34232 }, { "epoch": 0.9994744678987475, "grad_norm": 0.6640089381137656, "learning_rate": 2.9197080291970808e-09, "loss": 0.5477, "step": 34233 }, { "epoch": 0.9995036641265949, "grad_norm": 0.7476264419871284, "learning_rate": 2.7575020275750206e-09, "loss": 0.6834, "step": 34234 }, { "epoch": 0.9995328603544422, "grad_norm": 0.7231807509606918, "learning_rate": 2.5952960259529603e-09, "loss": 0.6583, "step": 34235 }, { "epoch": 0.9995620565822896, "grad_norm": 0.7027883539970735, "learning_rate": 2.4330900243309006e-09, "loss": 0.6397, "step": 34236 }, { "epoch": 0.9995912528101369, "grad_norm": 0.6720115038058015, "learning_rate": 2.2708840227088404e-09, "loss": 0.55, "step": 34237 }, { "epoch": 0.9996204490379843, "grad_norm": 0.7356972991349885, "learning_rate": 2.1086780210867806e-09, "loss": 0.6781, "step": 34238 }, { "epoch": 0.9996496452658317, "grad_norm": 0.7522250741470138, "learning_rate": 1.9464720194647204e-09, "loss": 0.7124, "step": 34239 }, { "epoch": 0.999678841493679, "grad_norm": 0.711656055874632, "learning_rate": 1.7842660178426604e-09, "loss": 0.5679, "step": 34240 }, { "epoch": 0.9997080377215264, "grad_norm": 0.7382809693326264, "learning_rate": 1.6220600162206004e-09, "loss": 0.6318, "step": 34241 }, { "epoch": 0.9997372339493737, "grad_norm": 0.7108533209328919, "learning_rate": 1.4598540145985404e-09, "loss": 0.6235, "step": 34242 }, { "epoch": 0.9997664301772211, "grad_norm": 0.7560341004874889, "learning_rate": 1.2976480129764802e-09, "loss": 0.6783, "step": 34243 }, { "epoch": 0.9997956264050685, "grad_norm": 0.6943686328839122, "learning_rate": 1.1354420113544202e-09, "loss": 0.6054, "step": 34244 }, { "epoch": 0.9998248226329158, "grad_norm": 0.766038199866277, "learning_rate": 9.732360097323602e-10, "loss": 0.6701, "step": 34245 }, { "epoch": 0.9998540188607632, "grad_norm": 0.7427100292469434, "learning_rate": 8.110300081103002e-10, "loss": 0.6548, "step": 34246 }, { "epoch": 0.9998832150886106, "grad_norm": 0.7818760767811955, "learning_rate": 6.488240064882401e-10, "loss": 0.7093, "step": 34247 }, { "epoch": 0.9999124113164579, "grad_norm": 0.7387511397340527, "learning_rate": 4.866180048661801e-10, "loss": 0.6217, "step": 34248 }, { "epoch": 0.9999416075443053, "grad_norm": 0.691899339276094, "learning_rate": 3.2441200324412004e-10, "loss": 0.6069, "step": 34249 }, { "epoch": 0.9999708037721526, "grad_norm": 0.7129831687828403, "learning_rate": 1.6220600162206002e-10, "loss": 0.6659, "step": 34250 }, { "epoch": 1.0, "grad_norm": 0.7129207459720134, "learning_rate": 0.0, "loss": 0.6223, "step": 34251 }, { "epoch": 1.0, "step": 34251, "total_flos": 487866325008384.0, "train_loss": 0.6708199132322719, "train_runtime": 52723.7529, "train_samples_per_second": 10.394, "train_steps_per_second": 0.65 } ], "logging_steps": 1, "max_steps": 34251, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 487866325008384.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }